# Notebook to explore GRIB and netCDF files
These files are downloaded from [Copernicus Climate Data Store](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-land-monthly-means?tab=download), using `cdsapi`.

The downloaded files are stored in `data/in`. The `area` option uses values `45`, `90`, `-45`, `-90` for `North`, `East`, `South`, `West`, respectively.

In [None]:
from pathlib import Path
import xarray as xr
from matplotlib import pyplot as plt
import geopandas as gpd
from shapely.geometry import Point

In [None]:
folder_path = Path("../data/in")
f_area_before_celsius = folder_path / "era5_data_2024_01_02_monthly_area.nc"
f_area_after_celsius = folder_path / "era5_data_2024_01_02_monthly_area_celsius.nc"

In [None]:
# load netCDF files
ds_area_before_celsius = xr.open_dataset(f_area_before_celsius)
ds_area_after_celsius = xr.open_dataset(f_area_after_celsius)

The following cells aim to explore the data structure

In [None]:
ds_area_before_celsius

In [None]:
ds_area_before_celsius.sel(latitude=20.0, longitude=10.0, method="nearest").to_dataframe().head(5)

In [None]:
ds_area_after_celsius

In [None]:
ds_area_after_celsius.sel(latitude=20.0, longitude=10.0, method="nearest").to_dataframe().head(5)

In [None]:
lat = 20.0
lon = 10.0
ds_area_after_celsius["t2m"].sel(latitude=lat, longitude=lon, method="nearest").plot(color="blue", marker="o")
plt.title("2m temperature in 2024 at lat-{}, lon-{}".format(lat, lon))
plt.show()


In [None]:
# convert to dataframe
df = ds_area_after_celsius.to_dataframe().reset_index()
df

In [None]:
out_data = df[["valid_time", "latitude", "longitude", "t2m"]]
out_data = out_data[out_data['valid_time'] == '2024-02-01']

In [None]:
# drop all nan values and filter by time
out_data_clean = out_data[out_data['valid_time'] == '2024-02-01'].dropna()
out_data_clean

In [None]:
out_data.to_csv("../data/out/era5_data_2024_01_02_monthly_area_celsius_with_NaN.csv", index=False)
out_data_clean.to_csv("../data/out/era5_data_2024_01_02_monthly_area_celsius_february.csv", index=False)

In [None]:
out_data.to_json(
    "../data/out/era5_data_2024_01_02_monthly_area_celsius.json",
    orient="records",
    date_format="iso",
    lines=True,
)

In [None]:
# xarray data to geopandas

# Convert xarray dataset to pandas DataFrame
df = ds_area_after_celsius.to_dataframe().reset_index()

# Filter necessary columns (latitude, longitude, and any variable of interest)
df_filtered = df[['latitude', 'longitude', 't2m']]
df_filtered = df[df['valid_time'] == '2024-02-01']

# Create geometry column using latitude and longitude
geometry = [Point(xy) for xy in zip(df_filtered['longitude'], df_filtered['latitude'])]

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(df_filtered, geometry=geometry)

# Set the coordinate reference system (CRS) if known (e.g., WGS84)
gdf.set_crs(epsg=4326, inplace=True)

# Save to a GeoJSON file
gdf.to_file("../data/out/era5_data_2024_01_02_monthly_area_celsius.geojson", driver="GeoJSON")

# Display the GeoDataFrame
gdf.head()