In [1]:
import numpy as np                        # numerical computations
from matplotlib import pyplot as plt      # visualisation
import xarray as xr                       # for netcdf
import pandas as pd                       
import os

In [2]:
first_file = xr.open_dataset("../data/Archivos_reanalisisERA5_realesINUMET/Reanalisis/Datos_t2m_horario_2000a2010_uy.nc")
first_file.head()

In [3]:
def prepare_data(file):
    df = pd.DataFrame()

    df["time"] = pd.to_datetime(file.time)
    df["t2m"] = file.t2m.mean(dim=['latitude', 'longitude'])
    df.set_index("time")

    # type of the date column -> datetime64[ns] !!
    # separating the time column
    df["date"] = df["time"].dt.date
    df["day"] = df["time"].dt.day
    df["month"] = df["time"].dt.month

    # changing the format of the date column by removing the hours

    # in .groupby only 2 columns date and t2m are taken from the groupings
    # in .agg the dict is used to apply those functions only to t2m column
    df_by_day = df.groupby("date")[["date", "t2m"]].agg({"t2m": ["mean", "min", "max"]}).rename(columns={"t2m":"t2m_daily"})   # temp precipitation becomes daily and not hourly mesured

    df_by_day.columns = ['_'.join(col).strip() for col in df_by_day.columns.values]
    
    df = df.join(df_by_day, on="date").drop("date", axis = 1)
    
    return df

f1 = prepare_data(first_file)

In [5]:
second_file = xr.open_dataset("../data/Archivos_reanalisisERA5_realesINUMET/Reanalisis/Datos_t2m_horario_2011a2021_uy.nc")
second_file.head()

In [6]:
f2 = prepare_data(second_file)

In [7]:
# f1.append(f2) # cette ligne est ok, mais il faudrait modifier le append pour un concat

res = pd.concat([f1, f2])
res.tail()

Unnamed: 0,time,t2m,day,month,t2m_daily_mean,t2m_daily_min,t2m_daily_max
96427,2021-12-31 19:00:00,306.261017,31,12,300.427338,293.982544,306.261017
96428,2021-12-31 20:00:00,305.979279,31,12,300.427338,293.982544,306.261017
96429,2021-12-31 21:00:00,305.09494,31,12,300.427338,293.982544,306.261017
96430,2021-12-31 22:00:00,303.514465,31,12,300.427338,293.982544,306.261017
96431,2021-12-31 23:00:00,301.792786,31,12,300.427338,293.982544,306.261017


In [8]:
# created a folder within nc_files with needed files -> tas variable and historical scenario
def prepare_data_cmip6(doc):
    file = xr.open_dataset("./nc_files/needed_files/"+doc)
    df = pd.DataFrame()

    df["time"] = pd.to_datetime(file.time)
    df["tas"] = file.tas.mean(dim=['lat', 'lon'])
    df.set_index("time")

    # type of the date column -> datetime64[ns] !!
    # separating the time column
    df["date"] = df["time"].dt.date
    df["day"] = df["time"].dt.day
    df["month"] = df["time"].dt.month

    # changing the format of the date column by removing the hours

    # in .groupby only 2 columns date and tas are taken from the groupings
    # in .agg the dict is used to apply those functions only to tas column
    df_by_day = df.groupby("date")[["date", "tas"]].agg({"tas": ["mean", "min", "max"]}).rename(columns={"tas":"tas_daily"})   # temp above surface becomes daily and not hourly mesured

    df_by_day.columns = ['_'.join(col).strip() for col in df_by_day.columns.values]
    
    df = df.join(df_by_day, on="date").drop("date", axis = 1)
    
    return df

In [17]:
f3 = prepare_data_cmip6("tas_day_CNRM-CM6-1-HR_historical_r1i1p1f2_gr_20000101-20141231_v20191021.nc")
f4 = prepare_data_cmip6("tas_day_MRI-ESM2-0_historical_r1i1p1f1_gn_20000101-20141231_v20190603.nc")

# the date column format is NOT THE SAME!
f5 = prepare_data_cmip6("tas_day_UKESM1-0-LL_historical_r14i1p1f2_gn_20000101-20141230_v20190627.nc")
#f6 = prepare_data_cmip6("tas_day_UKESM1-0-LL_historical_r1i1p1f2_gn_20000101-20141230_v20190627.nc")

f5

TypeError: <class 'cftime._cftime.Datetime360Day'> is not convertible to datetime

In [22]:
df = xr.open_dataset("./nc_files/needed_files/tas_day_UKESM1-0-LL_historical_r14i1p1f2_gn_20000101-20141230_v20190627.nc")
print(df.time)

<xarray.DataArray 'time' (time: 5400)>
array([cftime.Datetime360Day(2000, 1, 1, 12, 0, 0, 0, has_year_zero=True),
       cftime.Datetime360Day(2000, 1, 2, 12, 0, 0, 0, has_year_zero=True),
       cftime.Datetime360Day(2000, 1, 3, 12, 0, 0, 0, has_year_zero=True), ...,
       cftime.Datetime360Day(2014, 12, 28, 12, 0, 0, 0, has_year_zero=True),
       cftime.Datetime360Day(2014, 12, 29, 12, 0, 0, 0, has_year_zero=True),
       cftime.Datetime360Day(2014, 12, 30, 12, 0, 0, 0, has_year_zero=True)],
      dtype=object)
Coordinates:
  * time     (time) object 2000-01-01 12:00:00 ... 2014-12-30 12:00:00
    height   float64 ...
Attributes:
    bounds:         time_bnds
    axis:           T
    long_name:      time
    standard_name:  time


In [10]:
res

Unnamed: 0,time,t2m,day,month,t2m_daily_mean,t2m_daily_min,t2m_daily_max
0,2000-01-01 00:00:00,295.123260,1,1,295.357391,289.210754,301.250488
1,2000-01-01 01:00:00,294.918762,1,1,295.357391,289.210754,301.250488
2,2000-01-01 02:00:00,294.258972,1,1,295.357391,289.210754,301.250488
3,2000-01-01 03:00:00,291.075500,1,1,295.357391,289.210754,301.250488
4,2000-01-01 04:00:00,290.787567,1,1,295.357391,289.210754,301.250488
...,...,...,...,...,...,...,...
96427,2021-12-31 19:00:00,306.261017,31,12,300.427338,293.982544,306.261017
96428,2021-12-31 20:00:00,305.979279,31,12,300.427338,293.982544,306.261017
96429,2021-12-31 21:00:00,305.094940,31,12,300.427338,293.982544,306.261017
96430,2021-12-31 22:00:00,303.514465,31,12,300.427338,293.982544,306.261017


In [16]:
res = res.join(f3, on="time", how = "outer")
res.head()

ValueError: The column label 'time' is not unique.