In [1]:
import numpy as np                        # numerical computations
from matplotlib import pyplot as plt      # visualisation
import xarray as xr                       # for netcdf
import pandas as pd                       
import os

In [3]:
first_file = xr.open_dataset("../data/Archivos_reanalisisERA5_realesINUMET/Reanalisis/Datos_t2m_horario_2000a2010_uy.nc")
first_file.head()

In [5]:
def prepare_data(file):
    df = pd.DataFrame()

    df["time"] = pd.to_datetime(file.time)
    df["t2m"] = file.t2m.mean(dim=['latitude', 'longitude'])
    df.set_index("time")

    # type of the date column -> datetime64[ns] !!
    # separating the time column
    df["date"] = df["time"].dt.date
    df["day"] = df["time"].dt.day
    df["month"] = df["time"].dt.month

    # changing the format of the date column by removing the hours

    # in .groupby only 2 columns date and t2m are taken from the groupings
    # in .agg the dict is used to apply those functions only to t2m column
    df_by_day = df.groupby("date")[["date", "t2m"]].agg({"t2m": ["mean", "min", "max"]}).rename(columns={"t2m":"t2m_daily"})   # temp precipitation becomes daily and not hourly mesured

    df_by_day.columns = ['_'.join(col).strip() for col in df_by_day.columns.values]
    
    df = df.join(df_by_day, on="date").drop("date", axis = 1)
    
    return df

f1 = prepare_data(first_file)

In [7]:
second_file = xr.open_dataset("../data/Archivos_reanalisisERA5_realesINUMET/Reanalisis/Datos_t2m_horario_2011a2021_uy.nc")
second_file.head()

In [9]:
f2 = prepare_data(second_file)

In [71]:
# f1.append(f2) # cette ligne est ok, mais il faudrait modifier le append pour un concat

res = pd.concat([f1, f2])
res.tail()

Unnamed: 0,time,t2m,day,month,t2m_daily_mean,t2m_daily_min,t2m_daily_max
96427,2021-12-31 19:00:00,306.261017,31,12,300.427338,293.982544,306.261017
96428,2021-12-31 20:00:00,305.979279,31,12,300.427338,293.982544,306.261017
96429,2021-12-31 21:00:00,305.09494,31,12,300.427338,293.982544,306.261017
96430,2021-12-31 22:00:00,303.514465,31,12,300.427338,293.982544,306.261017
96431,2021-12-31 23:00:00,301.792786,31,12,300.427338,293.982544,306.261017


In [39]:
# created a folder within nc_files with needed files -> tas variable and historical scenario
def prepare_data_cmip6(doc):
    file = xr.open_dataset("./nc_files/needed_files/"+doc)
    df = pd.DataFrame()

    df["time"] = pd.to_datetime(file.time)
    df["tas"] = file.tas.mean(dim=['lat', 'lon'])
    df.set_index("time")

    # type of the date column -> datetime64[ns] !!
    # separating the time column
    df["date"] = df["time"].dt.date
    df["day"] = df["time"].dt.day
    df["month"] = df["time"].dt.month

    # changing the format of the date column by removing the hours

    # in .groupby only 2 columns date and tas are taken from the groupings
    # in .agg the dict is used to apply those functions only to tas column
    df_by_day = df.groupby("date")[["date", "tas"]].agg({"tas": ["mean", "min", "max"]}).rename(columns={"tas":"tas_daily"})   # temp above surface becomes daily and not hourly mesured

    df_by_day.columns = ['_'.join(col).strip() for col in df_by_day.columns.values]
    
    df = df.join(df_by_day, on="date").drop("date", axis = 1)
    
    return df

In [73]:
f3 = prepare_data_cmip6("tas_day_CNRM-CM6-1-HR_historical_r1i1p1f2_gr_20000101-20141231_v20191021.nc")
f4 = prepare_data_cmip6("tas_day_MRI-ESM2-0_historical_r1i1p1f1_gn_20000101-20141231_v20190603.nc")

# the date column format is NOT THE SAME!
#f5 = prepare_data_cmip6("tas_day_UKESM1-0-LL_historical_r14i1p1f2_gn_20000101-20141230_v20190627.nc")
#f6 = prepare_data_cmip6("tas_day_UKESM1-0-LL_historical_r1i1p1f2_gn_20000101-20141230_v20190627.nc")

f3

Unnamed: 0,time,tas,day,month,tas_daily_mean,tas_daily_min,tas_daily_max
0,2000-01-01 12:00:00,295.955475,1,1,295.955475,295.955475,295.955475
1,2000-01-02 12:00:00,298.928833,2,1,298.928833,298.928833,298.928833
2,2000-01-03 12:00:00,301.295563,3,1,301.295563,301.295563,301.295563
3,2000-01-04 12:00:00,302.222534,4,1,302.222534,302.222534,302.222534
4,2000-01-05 12:00:00,300.398193,5,1,300.398193,300.398193,300.398193
...,...,...,...,...,...,...,...
5474,2014-12-27 12:00:00,296.139771,27,12,296.139771,296.139771,296.139771
5475,2014-12-28 12:00:00,296.998627,28,12,296.998627,296.998627,296.998627
5476,2014-12-29 12:00:00,296.999023,29,12,296.999023,296.999023,296.999023
5477,2014-12-30 12:00:00,300.493958,30,12,300.493958,300.493958,300.493958


In [75]:
res

Unnamed: 0,time,t2m,day,month,t2m_daily_mean,t2m_daily_min,t2m_daily_max
0,2000-01-01 00:00:00,295.123260,1,1,295.357391,289.210754,301.250488
1,2000-01-01 01:00:00,294.918762,1,1,295.357391,289.210754,301.250488
2,2000-01-01 02:00:00,294.258972,1,1,295.357391,289.210754,301.250488
3,2000-01-01 03:00:00,291.075500,1,1,295.357391,289.210754,301.250488
4,2000-01-01 04:00:00,290.787567,1,1,295.357391,289.210754,301.250488
...,...,...,...,...,...,...,...
96427,2021-12-31 19:00:00,306.261017,31,12,300.427338,293.982544,306.261017
96428,2021-12-31 20:00:00,305.979279,31,12,300.427338,293.982544,306.261017
96429,2021-12-31 21:00:00,305.094940,31,12,300.427338,293.982544,306.261017
96430,2021-12-31 22:00:00,303.514465,31,12,300.427338,293.982544,306.261017


In [77]:
res = pd.concat([res, f3], axis=1).drop("time", axis = 1)
res.head()

Unnamed: 0,t2m,day,month,t2m_daily_mean,t2m_daily_min,t2m_daily_max,tas,day.1,month.1,tas_daily_mean,tas_daily_min,tas_daily_max
0,295.12326,1,1,295.357391,289.210754,301.250488,295.955475,1.0,1.0,295.955475,295.955475,295.955475
1,294.918762,1,1,295.357391,289.210754,301.250488,298.928833,2.0,1.0,298.928833,298.928833,298.928833
2,294.258972,1,1,295.357391,289.210754,301.250488,301.295563,3.0,1.0,301.295563,301.295563,301.295563
3,291.0755,1,1,295.357391,289.210754,301.250488,302.222534,4.0,1.0,302.222534,302.222534,302.222534
4,290.787567,1,1,295.357391,289.210754,301.250488,300.398193,5.0,1.0,300.398193,300.398193,300.398193
