# Inserting data into the database and estimating execution time

In [1]:
from onehealth_db import postgresql_database as db
from pathlib import Path
import time
import xarray as xr

### Set up necessary variables

In [2]:
# PostgreSQL database URL
db_url = "postgresql+psycopg2://postgres:postgres@localhost:5432/postgres"
# initialize the database
engine = db.initialize_database(db_url, replace=True)

PostGIS extension installed.
All tables dropped.
All tables created.
Database initialized successfully.


In [None]:
# record running time
run_time = {}

# variable types
var_types = [
    {
        "name": "t2m",
        "unit": "Celsius",
        "description": "2m temperature"
    },
    {
        "name": "tp",
        "unit": "mm",
        "description": "Total precipitation"
    },
    {
        "name": "popu",
        "unit": "1",
        "description": "Total population"
    }
]

### Add data into the database

In [4]:
# start recording time
t0 = time.time()

In [None]:
# add NUTS definition data
data_in = Path("../data/in")
shapefile_path = data_in / "NUTS_RG_20M_2024_4326.shp"
db.insert_nuts_def(engine, shapefile_path)
t_nuts_def = time.time()

# add variable types
db.insert_var_types(engine, var_types)
t_var_type = time.time()

# processed era5-land data
data_out = Path("../data/out")
era5_land_path = data_out / "era5_data_2020_to_2025_all_2t_tp_monthly_celsius_mm_resampled_05degree_trim.nc"
isimip_path = data_in / "population_histsoc_30arcmin_annual_1901_2021_renamed.nc"

with xr.open_dataset(era5_land_path) as era5_ds:
    # add grid points
    db.insert_grid_points(engine, 
                          latitudes=era5_ds.latitude.values, 
                          longitudes=era5_ds.longitude.values)
    t_grid_point = time.time()

    # add time points
    db.insert_time_points(engine, time_point_data=era5_ds.time.values)
    t_time_point = time.time()

    # get id maps for grid, time, and variable types
    grid_id_map, time_id_map, var_type_id_map = db.get_id_maps(engine)
    t_get_id_map = time.time()

    # add variable values
    db.insert_var_values(engine, era5_ds, "t2m", grid_id_map, time_id_map, var_type_id_map)
    t_insert_t2m = time.time()
    db.insert_var_values(engine, era5_ds, "tp", grid_id_map, time_id_map, var_type_id_map)
    t_insert_tp = time.time()

with xr.open_dataset(isimip_path) as isimip_ds:
    # add population data
    db.insert_var_values(engine, isimip_ds, "popu", grid_id_map, time_id_map, var_type_id_map)
    t_insert_popu = time.time()

t_end = time.time()

Variable types inserted.
Grid points inserted.
Time points inserted.


In [None]:
# calculate execution time
run_time["nuts_def"] = t_nuts_def - t0
run_time["var_type"] = t_var_type - t_nuts_def
run_time["grid_point"] = t_grid_point - t_var_type
run_time["time_point"] = t_time_point - t_grid_point
run_time["get_id_map"] = t_get_id_map - t_time_point
run_time["insert_t2m"] = t_insert_t2m - t_get_id_map
run_time["insert_tp"] = t_insert_tp - t_insert_t2m
run_time["insert_popu"] = t_insert_popu - t_insert_tp
total_time = t_end - t0

print(f"NUTS definition data inserted in {run_time['nuts_def']} seconds.")
print(f"Variable types inserted in {run_time['var_type']} seconds.")
print(f"Grid points inserted in {run_time['grid_point']} seconds.")
print(f"Time points inserted in {run_time['time_point']} seconds.")
print(f"ID maps retrieved in {run_time['get_id_map']} seconds.")
print(f"t2m variable values inserted in {run_time['insert_t2m']} seconds.")
print(f"tp variable values inserted in {run_time['insert_tp']} seconds.")
print(f"Population variable values inserted in {run_time['insert_popu']} seconds.")
print(f"Total execution time: {total_time} seconds.")

NUTS definition data inserted in 0.1630110740661621 seconds.
Variable types inserted in 0.004929065704345703 seconds.
Grid points inserted in 5.763792276382446 seconds.
Time points inserted in 0.0034987926483154297 seconds.
Total execution time: 5.935499668121338 seconds.
