In [9]:
import numpy as np                       
from matplotlib import pyplot as plt     
import xarray as xr                      
import pandas as pd     
import cftime                  
from datetime import datetime
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import os

In [2]:
reanalysis_nc = xr.open_dataset('../Archivos_reanalisisERA5_realesINUMET/Reanalisis/Datos_t2m_horario_2000a2010_uy.nc')
reanalysis_nc_2 = xr.open_dataset('../Archivos_reanalisisERA5_realesINUMET/Reanalisis/Datos_t2m_horario_2011a2021_uy.nc')

def transform_reanalysis(data_nc):
    reanalysis = pd.DataFrame()
    reanalysis['time'] = pd.to_datetime(data_nc.time)
    reanalysis['temp'] = data_nc.t2m.mean(dim=["latitude","longitude"]) #lon: (-58,-53), lat: (-30,-35)
    reanalysis = reanalysis.loc[reanalysis["time"] < datetime.fromisoformat('2015-01-01T00:00:00')]
    reanalysis['time'] = reanalysis['time'].dt.date
    reanalysis['time'] = reanalysis['time'].apply(lambda x: str(x))
    reanalysis = reanalysis.groupby('time').mean()
    return reanalysis

reanalysis = pd.concat([transform_reanalysis(reanalysis_nc), transform_reanalysis(reanalysis_nc_2)])

## CESM2

In [3]:
CESM2_nc = xr.open_dataset('../data/tas_day_CESM2_historical_r1i1p1f1_gn_20000101-20141231_v20190308.nc')
CESM2 = pd.DataFrame()
CESM2['time'] = CESM2_nc.time
CESM2['CESM2'] = CESM2_nc.tas.mean(dim=["lat","lon"])
CESM2['time'] = CESM2['time'].apply(lambda x: str(datetime.fromisoformat(x.isoformat()).date()))
CESM2 = CESM2.set_index('time')

## CESM2_WACCM

In [4]:
CESM2_WACCM_nc = xr.open_dataset('../data/tas_day_CESM2-WACCM_historical_r1i1p1f1_gn_20000101-20141231_v20190227.nc')
CESM2_WACCM = pd.DataFrame()
CESM2_WACCM['time'] = CESM2_WACCM_nc.time
CESM2_WACCM['CESM2_WACCM'] = CESM2_WACCM_nc.tas.mean(dim=["lat","lon"])
CESM2_WACCM['time'] = CESM2_WACCM['time'].apply(lambda x: str(datetime.fromisoformat(x.isoformat()).date()))
CESM2_WACCM = CESM2_WACCM.set_index('time')

## CNRM_CM6_1

In [5]:
CNRM_CM6_1_nc = xr.open_dataset('../data/tas_day_CNRM-CM6-1-HR_historical_r1i1p1f2_gr_20000101-20141231_v20191021.nc')
CNRM_CM6_1 = pd.DataFrame()
CNRM_CM6_1['time'] = CNRM_CM6_1_nc.time
CNRM_CM6_1['CNRM_CM6_1'] = CNRM_CM6_1_nc.tas.mean(dim=["lat","lon"])
CNRM_CM6_1['time'] = CNRM_CM6_1['time'].apply(lambda x: str(x.date()))
CNRM_CM6_1 = CNRM_CM6_1.set_index('time')

## EC_EARTH3

In [6]:
EC_EARTH3_nc = xr.open_dataset('../data/tas_day_EC-Earth3-CC_historical_r1i1p1f1_gr_20000101-20141231_v20210113.nc')
EC_EARTH3 = pd.DataFrame()
EC_EARTH3['time'] = EC_EARTH3_nc.time
EC_EARTH3['EC_EARTH3'] = EC_EARTH3_nc.tas.mean(dim=["lat","lon"])
EC_EARTH3['time'] = EC_EARTH3['time'].apply(lambda x: str(x.date()))
EC_EARTH3 = EC_EARTH3.set_index('time')

## MRI_ESM2

In [7]:
MRI_ESM2_nc = xr.open_dataset('../data/tas_day_MRI-ESM2-0_historical_r1i1p1f1_gn_20000101-20141231_v20190603.nc')
MRI_ESM2 = pd.DataFrame()
MRI_ESM2['time'] = MRI_ESM2_nc.time
MRI_ESM2['MRI_ESM2'] = MRI_ESM2_nc.tas.mean(dim=["lat","lon"])
MRI_ESM2['time'] = MRI_ESM2['time'].apply(lambda x: str(x.date()))
MRI_ESM2 = MRI_ESM2.set_index('time')

In [12]:
# WARNING!: Some dataframes don't have the 29th of February
final = reanalysis.merge(CESM2_WACCM, left_index=True, right_index=True) \
                   .merge(CESM2, left_index=True, right_index=True) \
                   .merge(CNRM_CM6_1, left_index=True, right_index=True) \
                   .merge(EC_EARTH3, left_index=True, right_index=True) \
                   .merge(MRI_ESM2, left_index=True, right_index=True)

#RMSE
print(f'''
    CESM2_WACCM: {np.sqrt(mean_squared_error(final["temp"], final["CESM2_WACCM"]))} 
    CESM2: {np.sqrt(mean_squared_error(final["temp"], final["CESM2"]))}
    CNRM_CM6_1: {np.sqrt(mean_squared_error(final['temp'], final['CNRM_CM6_1']))}       
    EC_EARTH3: {np.sqrt(mean_squared_error(final['temp'], final['EC_EARTH3']))}
    MRI_ESM2: {np.sqrt(mean_squared_error(final['temp'], final['MRI_ESM2']))}
''')

#MAPE
print(f'''
    CESM2_WACCM: {mean_absolute_percentage_error(final["temp"], final["CESM2_WACCM"])} 
    CESM2: {mean_squared_error(final["temp"], final["CESM2"])}
    CNRM_CM6_1: {mean_absolute_percentage_error(final['temp'], final['CNRM_CM6_1'])}       
    EC_EARTH3: {mean_absolute_percentage_error(final['temp'], final['EC_EARTH3'])}
    MRI_ESM2: {mean_absolute_percentage_error(final['temp'], final['MRI_ESM2'])}
''')


    CESM2_WACCM: 4.268655300140381 
    CESM2: 4.372464656829834
    CNRM_CM6_1: 4.583861827850342       
    EC_EARTH3: 4.310638427734375
    MRI_ESM2: 4.691719055175781


    CESM2_WACCM: 0.011751193553209305 
    CESM2: 19.11844825744629
    CNRM_CM6_1: 0.012528566643595695       
    EC_EARTH3: 0.01180186402052641
    MRI_ESM2: 0.012861620634794235

