In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import rioxarray
import geopandas as gpd
from shapely.geometry import box
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def def_dims(nc_file):
    lons = np.array(nc_file.variables[[name for name in nc_file.indexes if ('lon' in name.lower()) | ('eas' in name.lower()) | ('wes' in name.lower()) | ('x' in name.lower())][0]][:])
    lats = np.array(nc_file.variables[[name for name in nc_file.indexes if ('lat' in name.lower()) | ('nor' in name.lower()) | ('sou' in name.lower()) | ('y' in name.lower())][0]][:])
    dif_lon = np.abs(lons[0] - lons[1], dtype= np.float64)
    dif_lat = np.abs(lats[0] - lats[1], dtype= np.float64)
    return dif_lon, dif_lat, lons, lats

In [3]:
aod = xr.open_dataset(r"C:\Users\user\OneDrive\Desktop\gdal trials\AOD nc\2022.nc").drop('spatial_ref')
folder = './Era5 Land/'
files = os.listdir(folder)
era5 = [xr.open_dataset(folder + file).coarsen(valid_time=24).mean() for file in files]
era5 = xr.merge(era5)

In [4]:
lon_dim = [name for name in aod.indexes if ('lon' in name.lower()) | ('eas' in name.lower()) | ('wes' in name.lower()) | ('x' in name.lower())][0]
lat_dim = [name for name in aod.indexes if ('lat' in name.lower()) | ('nor' in name.lower()) | ('sou' in name.lower()) | ('y' in name.lower())][0]
aod = aod.rename({lat_dim: 'latitude'})
aod = aod.rename({lon_dim: 'longitude'})

In [5]:
aod

In [6]:
era5

In [38]:
'''dif_lon, dif_lat, lons, lats = def_dims(aod)
dss = []
crs = 'epsg:4326'
regridding_factor = 20
for i in range(len(lons)):
    for j in range(len(lats)):
        boxx = create_box(lons[i], lats[j], dif_lon, dif_lat, crs = crs)
        df = extracting_and_averaging_polygon(boxx, era5, regridding_factor, crs = crs)
        df['longitude'] = lons[i]
        df['latitude'] = lats[j]
        df.set_index(['time', 'longitude', 'latitude'], inplace=True)
        
        ds = df.to_xarray()
        del df, boxx
        if len(dss) == 0:
            dss = ds
        else:
            dss = xr.merge([dss, ds])
            del ds'''

"dif_lon, dif_lat, lons, lats = def_dims(aod)\ndss = []\ncrs = 'epsg:4326'\nregridding_factor = 20\nfor i in range(len(lons)):\n    for j in range(len(lats)):\n        boxx = create_box(lons[i], lats[j], dif_lon, dif_lat, crs = crs)\n        df = extracting_and_averaging_polygon(boxx, era5, regridding_factor, crs = crs)\n        df['longitude'] = lons[i]\n        df['latitude'] = lats[j]\n        df.set_index(['time', 'longitude', 'latitude'], inplace=True)\n        \n        ds = df.to_xarray()\n        del df, boxx\n        if len(dss) == 0:\n            dss = ds\n        else:\n            dss = xr.merge([dss, ds])\n            del ds"

In [7]:
dif_lon, dif_lat, lons, lats = def_dims(aod)
del dif_lat, dif_lon

df = []
for i in range(len(lons)):
    for j in range(len(lats)):
        dat = era5.sel(latitude=lats[j], longitude=lons[i], method='nearest').to_pandas().drop(['number', 'expver'], axis=1).reset_index()
        dat['time'] = pd.DatetimeIndex(dat['valid_time'].dt.date)
        dat['latitude'] = lats[j]
        dat['longitude'] = lons[i]
        dat.drop('valid_time', axis=1, inplace=True)
        df.append(dat)

In [8]:
era5_as = pd.concat(df).set_index(['time', 'longitude', 'latitude']).to_xarray()

In [18]:
era5_as

In [19]:
# Number of chunks
n = 16

# Get indices to split along the time dimension
time_len = era5_as.dims["time"]
splits = np.array_split(np.arange(time_len), n)

# Save each chunk to a separate file
for i, idx in enumerate(splits):
    ds_subset = era5_as.isel(time=idx)
    ds_subset.to_netcdf(f"./Era5 Land Assimilated/Era5_land_{i+1}.nc")

In [23]:
folder = './CAMS Reanalysis AOD/'
files = os.listdir(folder)
cams_aod = [xr.open_dataset(folder + file).coarsen(forecast_reference_time=2).mean() for file in files]
cams_aod = xr.merge(cams_aod)

In [29]:
cams_aod

In [30]:
time_dim = [name for name in cams_aod.indexes if ('time' in name.lower()) | ('date' in name.lower()) | ('year' in name.lower()) | ('month' in name.lower())][0]
time_dim

'forecast_reference_time'

In [33]:
cams_aod[time_dim] = pd.DatetimeIndex(cams_aod[time_dim].dt.date)

In [34]:
cams_aod = cams_aod.rename({time_dim: 'time'})

In [40]:
cams_aod = cams_aod.drop('valid_time')

In [49]:
cams_aod = cams_aod.to_dataframe().reset_index().drop('forecast_period', axis=1).set_index(['time','longitude','latitude']).to_xarray()

In [50]:
cams_aod

In [51]:
dif_lon, dif_lat, lons, lats = def_dims(aod)
del dif_lat, dif_lon

df = []
for i in range(len(lons)):
    for j in range(len(lats)):
        dat = cams_aod.sel(latitude=lats[j], longitude=lons[i], method='nearest').to_pandas().reset_index()
        #dat['time'] = pd.DatetimeIndex(dat['valid_time'].dt.date)
        dat['latitude'] = lats[j]
        dat['longitude'] = lons[i]
        #dat.drop('valid_time', axis=1, inplace=True)
        df.append(dat)

In [57]:
cams_aod_as = pd.concat(df).set_index(['time', 'longitude', 'latitude']).to_xarray()

In [60]:
xr.merge([cams_aod_as, aod])

In [58]:
# Number of chunks
n = 8

# Get indices to split along the time dimension
time_len = cams_aod_as.dims["time"]
splits = np.array_split(np.arange(time_len), n)

# Save each chunk to a separate file
for i, idx in enumerate(splits):
    ds_subset = cams_aod_as.isel(time=idx)
    ds_subset.to_netcdf(f"./CAMS Reanalysis AOD Assimilated/cams_aod_{i+1}.nc")