# Notebook to explore DuckDB
DuckDB files are created from dataframes of reading netCDF files. These netCDF files can be found under `data/in`.

By default, the DuckDB files are stored in `data/`

In [None]:
from pathlib import Path
import xarray as xr
from onehealth_db import database
import duckdb

In [None]:
folder_path = Path("../data/in")
f_no_area_before_celsius = folder_path / "era5_data_2025_03_monthly.nc"
f_no_area_after_celsius = folder_path / "era5_data_2025_03_monthly_celsius.nc"
f_area_before_celsius = folder_path / "era5_data_2025_03_monthly_area.nc"
f_area_after_celsius = folder_path / "era5_data_2025_03_monthly_area_celsius.nc"
f_multi_before_celsicus = folder_path / "era5_data_2022_2023_2024_all_monthly.nc"
f_multi_after_celsicus = folder_path / "era5_data_2022_2023_2024_all_monthly_celsius.nc"
f_2024_area_after_celsius = folder_path / "era5_data_2024_01_02_monthly_area_celsius.nc"

In [None]:
# load netCDF files
ds_no_area_before_celsius = xr.open_dataset(f_no_area_before_celsius)
ds_no_area_after_celsius = xr.open_dataset(f_no_area_after_celsius)
ds_area_before_celsius = xr.open_dataset(f_area_before_celsius)
ds_area_after_celsius = xr.open_dataset(f_area_after_celsius)
ds_multi_before_celsius = xr.open_dataset(f_multi_before_celsicus)
ds_multi_after_celsius = xr.open_dataset(f_multi_after_celsicus)
ds_2024_area_after_celsius = xr.open_dataset(f_2024_area_after_celsius)

The following cells aim to explore the DB created from dataframes of netCDF files.

In [None]:
# convert to dataframe
df = ds_2024_area_after_celsius.to_dataframe().reset_index()
df

In [None]:
out_data = df[["valid_time", "latitude", "longitude", "t2m"]]
out_data

In [None]:
# save dataframe to duckdb
# by default, database file is data/onehealth.db`
# table name is onehealth
database_file_path = Path("../data/onehealth.db")
table_name = "onehealth"
database.import_data(out_data, database_file_path, table_name)

In [None]:
# inspect the database
con = duckdb.connect(database_file_path)
t2m_data = con.sql(f"""
    SELECT t2m
    FROM {table_name}
    LIMIT 10
""").df()
t2m_data