In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import cdo
CDO = cdo.Cdo()
import netCDF4
import os
import datetime
import matplotlib.pyplot as plt 
from matplotlib import cm
import cartopy.crs as ccrs
from scipy import stats
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly_express as px
import shutil


## Authors
* Martin Wegmann (martin.wegmann@giub.unibe.ch)
* Fernando Jaume Santero (fernando.jaume@unige.ch)

### Set data paths

In [None]:
# set paths to data folders
save_folder="/Volumes/SPARK/RNN_savestates/"
input_folder="/Volumes/DATAHUB/RNN_savestates/"
save_plots_folder="/Volumes/SPARK/RNN_savestates/plots/"
save_folder="/Volumes/DATAHUB/RNN_savestates/postprocess/"

In [None]:
os.chdir(save_folder)
os.system("rm *.nc")

In [None]:
# here you have to change the input parameters for what you want to postprocess
# model: 20cr, mpige or csmlme
# amount locations: either 25 or 29
# sample size: 1979 or 20000

model="20cr"
amount_locations=25 
sample_size=1979
resolution="lowres"
output="anoms"






### Set output paths

In [None]:


RNN1="best_model_ekf400_" + output + "_" + model + "_" + resolution +"_" +str(amount_locations) + "_" + str(sample_size) + "_RNN1_det"
RNN1lstm="best_model_ekf400_" + output + "_" + model + "_" + resolution +"_" +str(amount_locations) + "_" + str(sample_size) + "_RNN1lstm_det"


RNN1_path_nc=os.path.join(save_folder, RNN1)
RNN1_path_png=os.path.join(save_plots_folder, RNN1)
RNN1lstm_path_nc=os.path.join(save_folder, RNN1lstm)
RNN1lstm_path_png=os.path.join(save_plots_folder, RNN1lstm)



### check how many files are in the input folder

In [None]:
arr = os.listdir(input_folder)
input_paths=[]
for file in os.listdir(input_folder):
    if file.startswith(RNN1):
        input_folder_file=os.path.join(input_folder, file)
        input_paths.append(input_folder_file)
len(input_paths)

In [None]:
input_paths

In [None]:
input_paths[1]

### Set how many reconstructions we have. in the simplest case its 1 deterministic reconstruction

In [None]:
member_index=["01"]
member_index

### Set output paths for the postprocessed data

In [None]:
remap_paths=[]
monthly_paths=[]
DJF_paths=[]
yearmean_paths=[]
JJA_paths=[]
NH_paths=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_remap.nc')
    remap_paths.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_monthly.nc')
    monthly_paths.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_DJF.nc')
    DJF_paths.append(output)
    
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_JJA.nc')
    JJA_paths.append(output)
    
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_yearly.nc')
    yearmean_paths.append(output)
    
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_monthly_NH.nc')
    NH_paths.append(output)
    
    

In [None]:
remap_paths_short=[]
monthly_paths_short=[]
DJF_paths_short=[]
yearmean_paths_short=[]
JJA_paths_short=[]
NH_paths_short=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_short_remap.nc')
    remap_paths_short.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_short_monthly.nc')
    monthly_paths_short.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_short_DJF.nc')
    DJF_paths_short.append(output)
    
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_short_JJA.nc')
    JJA_paths_short.append(output)
    
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_short_yearly.nc')
    yearmean_paths_short.append(output)

yearmean_paths_lmr=[]
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_lmr_yearly.nc')
    yearmean_paths_lmr.append(output)
    
for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_short_monthly_NH.nc')
    NH_paths_short.append(output)
    

### Postprocessing the reconstructed gridded data

In [None]:
# piping in python CDO is faulty and 20cr is weirdly packed, so we do this wrap around. We delete this data later on.

for i in range(member_index_length):
    CDO.remapbil("/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms.nc",input=input_paths[i],output=remap_paths[i])
    CDO.settaxis("16020116,00:00,1mon",input=remap_paths[i],output=monthly_paths[i])
    CDO.sellonlatbox("-180,180,90,0",input=monthly_paths[i],output=NH_paths[i])
    CDO.yearmean(input=monthly_paths[i],output=yearmean_paths[i])
    CDO.yearmean(input="-selmon,6,7,8 "+monthly_paths[i],output=JJA_paths[i])
    CDO.selmon("1,2,12",input=monthly_paths[i],output="temp.nc")
    CDO.seltimestep("3/1205",input="temp.nc",output="tempp.nc")
    os.remove("temp.nc")
    CDO.timselmean("3",input="tempp.nc",output=DJF_paths[i])
    os.remove("tempp.nc")
    os.remove(remap_paths[i])

    
CDO.ensmean(input="*_monthly.nc",output=RNN1_path_nc +"_all_monthly.nc")
CDO.ensmean(input="*_yearly.nc",output=RNN1_path_nc +"_all_ym.nc")
CDO.ensmean(input="*_JJA.nc",output=RNN1_path_nc +"_all_JJA.nc")
CDO.ensmean(input="*_monthly_NH.nc",output=RNN1_path_nc +"_all_monthly_NH.nc")
CDO.ensmean(input="*_DJF.nc",output=RNN1_path_nc +"_all_DJF.nc")


for i in range(member_index_length):
    CDO.selyear("1836/1850",input=monthly_paths[i],output=monthly_paths_short[i])
    CDO.selyear("1836/1850",input=NH_paths[i],output=NH_paths_short[i])
    CDO.selyear("1836/1850",input=yearmean_paths[i],output=yearmean_paths_short[i])
    CDO.selyear("1602/2000",input=yearmean_paths[i],output=yearmean_paths_lmr[i])
    CDO.selyear("1836/1850",input=JJA_paths[i],output=JJA_paths_short[i])
    CDO.selyear("1837/1850",input=DJF_paths[i],output=DJF_paths_short[i])

    
CDO.ensmean(input="*_short_monthly.nc",output=RNN1_path_nc +"_all_short_monthly.nc")
CDO.ensmean(input="*_short_yearly.nc",output=RNN1_path_nc +"_all_short_ym.nc")
CDO.ensmean(input="*_lmr_yearly.nc",output=RNN1_path_nc +"_all_lmr_ym.nc")
CDO.ensmean(input="*_short_JJA.nc",output=RNN1_path_nc +"_all_short_JJA.nc")
CDO.ensmean(input="*_short_monthly_NH.nc",output=RNN1_path_nc +"_all_short_monthly_NH.nc")
CDO.ensmean(input="*_short_DJF.nc",output=RNN1_path_nc +"_all_short_DJF.nc")



### Set output paths for the correlation analysis

In [None]:
timcor_paths_monthly=[]
fldcor_paths_monthly=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_monthly_fldcor.nc')
    fldcor_paths_monthly.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_monthly_timcor.nc')
    timcor_paths_monthly.append(output)
    
timcor_paths_NH=[]
fldcor_paths_NH=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_NH_fldcor.nc')
    fldcor_paths_NH.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_NH_timcor.nc')
    timcor_paths_NH.append(output)

    
timcor_paths_ym=[]
fldcor_paths_ym=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_ym_fldcor.nc')
    fldcor_paths_ym.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_ym_timcor.nc')
    timcor_paths_ym.append(output)
    
timcor_paths_ym_lmr=[]
fldcor_paths_ym_lmr=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_ym_fldcor_lmr.nc')
    fldcor_paths_ym_lmr.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_ym_timcor_lmr.nc')
    timcor_paths_ym_lmr.append(output)



timcor_paths_DJF=[]
fldcor_paths_DJF=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_DJF_fldcor.nc')
    fldcor_paths_DJF.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_DJF_timcor.nc')
    timcor_paths_DJF.append(output)

timcor_paths_JJA=[]
fldcor_paths_JJA=[]
member_index_length = len(input_paths)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_JJA_fldcor.nc')
    fldcor_paths_JJA.append(output)

for i in range(member_index_length):
    output=os.path.join(save_folder, RNN1_path_nc + str(member_index[i])+'_JJA_timcor.nc')
    timcor_paths_JJA.append(output)
  

### Perform the correlation analysis

In [None]:
for i in range(member_index_length):
    CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms.nc "+monthly_paths[i],output=fldcor_paths_monthly[i])
    CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms.nc "+monthly_paths[i],output=timcor_paths_monthly[i])

CDO.ensmean(input="*_monthly_timcor.nc",output=RNN1_path_nc +"_monthly_all_timcor.nc")
CDO.merge(input="*_monthly_fldcor.nc",output=RNN1_path_nc +"_monthly_all_fldcor.nc")

for i in range(member_index_length):
    CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_NH.nc "+NH_paths[i],output=fldcor_paths_NH[i])
    CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_NH.nc "+NH_paths[i],output=timcor_paths_NH[i])

CDO.ensmean(input="*_NH_timcor.nc",output=RNN1_path_nc +"_NH_all_timcor.nc")
CDO.merge(input="*_NH_fldcor.nc",output=RNN1_path_nc +"_NH_all_fldcor.nc")

for i in range(member_index_length):
    CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_ym.nc "+yearmean_paths[i],output=fldcor_paths_ym[i])
    CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_ym.nc "+yearmean_paths[i],output=timcor_paths_ym[i])

CDO.ensmean(input="*_ym_timcor.nc",output=RNN1_path_nc +"_ym_all_timcor.nc")
CDO.merge(input="*_ym_fldcor.nc",output=RNN1_path_nc +"_ym_all_fldcor.nc")

for i in range(member_index_length):
    CDO.fldcor(input="/Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_ekf400_anoms_ym.nc "+yearmean_paths_lmr[i],output=fldcor_paths_ym_lmr[i])
    CDO.timcor(input="/Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_ekf400_anoms_ym.nc "+yearmean_paths_lmr[i],output=timcor_paths_ym_lmr[i])

CDO.ensmean(input="*_ym_timcor_lmr.nc",output=RNN1_path_nc +"_ym_all_timcor_lmr.nc")
CDO.merge(input="*_ym_fldcor_lmr.nc",output=RNN1_path_nc +"_ym_all_fldcor_lmr.nc")

for i in range(member_index_length):
    CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_DJF.nc "+DJF_paths[i],output=fldcor_paths_DJF[i])
    CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_DJF.nc "+DJF_paths[i],output=timcor_paths_DJF[i])

CDO.ensmean(input="*_DJF_timcor.nc",output=RNN1_path_nc +"_DJF_all_timcor.nc")
CDO.merge(input="*_DJF_fldcor.nc",output=RNN1_path_nc +"_DJF_all_fldcor.nc")

for i in range(member_index_length):
    CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_JJA.nc "+JJA_paths[i],output=fldcor_paths_JJA[i])
    CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms_JJA.nc "+JJA_paths[i],output=timcor_paths_JJA[i])

CDO.ensmean(input="*_JJA_timcor.nc",output=RNN1_path_nc +"_JJA_all_timcor.nc")
CDO.merge(input="*_JJA_fldcor.nc",output=RNN1_path_nc +"_JJA_all_fldcor.nc")

In [None]:
CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc",output=save_folder+"ekf400_20crv3_monthly_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc",output=save_folder+"ekf400_20crv3_ym_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_NH.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_NH.nc",output=save_folder+"ekf400_20crv3_NH_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_DJF.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_DJF.nc",output=save_folder+"ekf400_20crv3_DJF_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_JJA.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_JJA.nc",output=save_folder+"ekf400_20crv3_JJA_fldcor.nc")
CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc",output=save_folder+"ekf400_20crv3_monthly_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc",output=save_folder+"ekf400_20crv3_ym_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_NH.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_NH.nc",output=save_folder+"ekf400_20crv3_NH_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_DJF.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_DJF.nc",output=save_folder+"ekf400_20crv3_DJF_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_JJA.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_JJA.nc",output=save_folder+"ekf400_20crv3_JJA_timcor.nc")

CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc /Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_short_anoms_ym.nc",output=save_folder+"ekf400_lmr_short_ym_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc /Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_short_anoms_ym.nc",output=save_folder+"20crv3_lmr_ym_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_lmr_anoms_ym.nc /Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_ekf400_anoms_ym.nc",output=save_folder+"ekf400_lmr_ym_timcor.nc")


CDO.fldcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc "+ RNN1_path_nc+"_all_short_monthly.nc" ,output=RNN1_path_nc +"_20crv3_monthly_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc "+ RNN1_path_nc+"_all_short_ym.nc" ,output=RNN1_path_nc +"_20crv3_ym_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_NH.nc "+ RNN1_path_nc+"_all_short_monthly_NH.nc" ,output=RNN1_path_nc +"_20crv3_NH_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_DJF.nc "+ RNN1_path_nc+"_all_short_DJF.nc" ,output=RNN1_path_nc +"_20crv3_DJF_fldcor.nc")
CDO.fldcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_JJA.nc "+ RNN1_path_nc+"_all_short_JJA.nc" ,output=RNN1_path_nc +"_20crv3_JJA_fldcor.nc")


CDO.timcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc "+ RNN1_path_nc+"_all_short_monthly.nc" ,output=RNN1_path_nc +"_20crv3_monthly_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc "+ RNN1_path_nc+"_all_short_ym.nc" ,output=RNN1_path_nc +"_20crv3_ym_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_NH.nc "+ RNN1_path_nc+"_all_short_monthly_NH.nc" ,output=RNN1_path_nc +"_20crv3_NH_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_DJF.nc "+ RNN1_path_nc+"_all_short_DJF.nc" ,output=RNN1_path_nc +"_20crv3_DJF_timcor.nc")
CDO.timcor(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_JJA.nc "+ RNN1_path_nc+"_all_short_JJA.nc" ,output=RNN1_path_nc +"_20crv3_JJA_timcor.nc")


### Perform the mean bias analysis

In [None]:
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc",output=save_folder+"ekf400_20crv3_monthly_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc",output=save_folder+"ekf400_20crv3_ym_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_NH.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_NH.nc",output=save_folder+"ekf400_20crv3_NH_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_DJF.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_DJF.nc",output=save_folder+"ekf400_20crv3_DJF_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_JJA.nc /Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_JJA.nc",output=save_folder+"ekf400_20crv3_JJA_differences.nc")

CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_lmr_anoms_ym.nc /Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_ekf400_anoms_ym.nc",output=save_folder+"ekf400_lmr_ym_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc /Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_short_anoms_ym.nc",output=save_folder+"ekf400_lmr_short_ym_differences.nc")

CDO.sub(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc /Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_short_anoms_ym.nc",output=save_folder+"20cr_lmr_short_ym_differences.nc")





CDO.sub(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc "+ RNN1_path_nc+"_all_short_monthly.nc" ,output=RNN1_path_nc +"_20crv3_monthly_differences.nc")
CDO.sub(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_ym.nc "+ RNN1_path_nc+"_all_short_ym.nc" ,output=RNN1_path_nc +"_20crv3_ym_differences.nc")
CDO.sub(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_NH.nc "+ RNN1_path_nc+"_all_short_monthly_NH.nc" ,output=RNN1_path_nc +"_20crv3_NH_differences.nc")
CDO.sub(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_DJF.nc "+ RNN1_path_nc+"_all_short_DJF.nc" ,output=RNN1_path_nc +"_20crv3_DJF_differences.nc")
CDO.sub(input="/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap_JJA.nc "+ RNN1_path_nc+"_all_short_JJA.nc" ,output=RNN1_path_nc +"_20crv3_JJA_differences.nc")


CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms.nc "+ RNN1_path_nc+"_all_short_monthly.nc" ,output=RNN1_path_nc +"_ekf_monthly_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc "+ RNN1_path_nc+"_all_short_ym.nc" ,output=RNN1_path_nc +"_ekf_ym_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_NH.nc "+ RNN1_path_nc+"_all_short_monthly_NH.nc" ,output=RNN1_path_nc +"_ekf_NH_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_DJF.nc "+ RNN1_path_nc+"_all_short_DJF.nc" ,output=RNN1_path_nc +"_ekf_DJF_differences.nc")
CDO.sub(input="/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms_JJA.nc "+ RNN1_path_nc+"_all_short_JJA.nc" ,output=RNN1_path_nc +"_ekf_JJA_differences.nc")

CDO.sub(input="/Volumes/SPARK/lmr/air_MCruns_ensemble_mean_LMRv2.1_short_anoms_ym.nc "+ RNN1_path_nc+"_all_short_ym.nc" ,output=RNN1_path_nc +"_lmr_ym_differences.nc")
CDO.timmean(input=RNN1_path_nc +"_lmr_ym_differences.nc",output=RNN1_path_nc +"_lmr_ym_differences_timmean.nc")





CDO.timmean(input=save_folder+"ekf400_20crv3_monthly_differences.nc",output=save_folder+"ekf400_20crv3_monthly_differences_timmean.nc")
CDO.timmean(input=save_folder+"ekf400_20crv3_ym_differences.nc",output=save_folder+"ekf400_20crv3_ym_differences_timmean.nc")
CDO.timmean(input=save_folder+"ekf400_20crv3_NH_differences.nc",output=save_folder+"ekf400_20crv3_NH_differences_timmean.nc")
CDO.timmean(input=save_folder+"ekf400_20crv3_DJF_differences.nc",output=save_folder+"ekf400_20crv3_DJF_differences_timmean.nc")
CDO.timmean(input=save_folder+"ekf400_20crv3_JJA_differences.nc",output=save_folder+"ekf400_20crv3_JJA_differences_timmean.nc")

CDO.timmean(input=save_folder+"ekf400_lmr_ym_differences.nc",output=save_folder+"ekf400_lmr_ym_differences_timmean.nc")
CDO.timmean(input=save_folder+"ekf400_lmr_short_ym_differences.nc",output=save_folder+"ekf400_lmr_short_ym_differences_timmean.nc")
CDO.timmean(input=save_folder+"20cr_lmr_short_ym_differences.nc",output=save_folder+"20cr_lmr_short_ym_differences_timmean.nc")





CDO.timmean(input=RNN1_path_nc +"_20crv3_monthly_differences.nc",output=RNN1_path_nc +"_20crv3_monthly_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_20crv3_NH_differences.nc",output=RNN1_path_nc +"_20crv3_NH_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_20crv3_ym_differences.nc",output=RNN1_path_nc +"_20crv3_ym_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_20crv3_JJA_differences.nc",output=RNN1_path_nc +"_20crv3_JJA_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_20crv3_DJF_differences.nc",output=RNN1_path_nc +"_20crv3_DJF_differences_timmean.nc")



CDO.timmean(input=RNN1_path_nc +"_ekf_monthly_differences.nc",output=RNN1_path_nc +"_ekf_monthly_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_ekf_NH_differences.nc",output=RNN1_path_nc +"_ekf_NH_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_ekf_ym_differences.nc",output=RNN1_path_nc +"_ekf_ym_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_ekf_JJA_differences.nc",output=RNN1_path_nc +"_ekf_JJA_differences_timmean.nc")
CDO.timmean(input=RNN1_path_nc +"_ekf_DJF_differences.nc",output=RNN1_path_nc +"_ekf_DJF_differences_timmean.nc")



## Correlation Plots

#### Okay so to begin with, we gonna check out the correlation between the full 12*405 years of RNN output and EKF400 output.

In [None]:
 # set some color map so we can see the non-significant grids
current_cmap = cm.get_cmap("seismic")
current_cmap.set_bad(color='black')

In [None]:
ekf400=xr.open_dataset("/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_anoms.nc")
rnn=xr.open_dataset(RNN1_path_nc +"_all_monthly.nc",decode_times=False)

ekf400_values=ekf400.air_temperature
rnn_values=rnn.t2m
timesteps=ekf400_values.shape[0]

ekf400_values_4_corr=ekf400_values.values.reshape(timesteps,ekf400_values.shape[2]*ekf400_values.shape[1])
ekf400_values_4_corr.shape

rnn_values_4_corr=rnn_values.values.reshape(timesteps,rnn_values.shape[2]*rnn_values.shape[1])
rnn_values_4_corr.shape


structure_R= np.arange(ekf400_values_4_corr.shape[1], dtype=float)
structure_R.shape

structure_R_masked= np.arange(ekf400_values_4_corr.shape[1], dtype=float)
structure_R_masked.shape

structure_p= np.arange(ekf400_values_4_corr.shape[1], dtype=float)
structure_p.shape

for a in range(0,rnn_values_4_corr.shape[1]):
    one_R=stats.pearsonr(rnn_values_4_corr[:,a], ekf400_values_4_corr[:,a])
    structure_R[a]=one_R[0]
    if one_R[1] <= 0.05:
        structure_R_masked[a]=one_R[0]
    else:
        structure_R_masked[a] = np.ma.masked_greater(2, 0.05)
    structure_p[a]=one_R[1]
corr_matrix_rnn_ekf400_monthly=structure_R.reshape(ekf400_values.shape[1],ekf400_values.shape[2])
corr_matrix_rnn_ekf400_monthly_masked=structure_R_masked.reshape(ekf400_values.shape[1],ekf400_values.shape[2])
p_matrix_rnn_ekf400_monthly=structure_p.reshape(ekf400_values.shape[1],ekf400_values.shape[2])
print(structure_R.mean())
fig = plt.figure(figsize=(10, 5))
plt.imshow(corr_matrix_rnn_ekf400_monthly_masked, vmin=-1, vmax=1, cmap=current_cmap,origin='upper',interpolation="none") 
plt.colorbar()

In [None]:

# ---------------------
import numpy as np
import datetime
from netCDF4 import Dataset,num2date,date2num
# -----------------------


nyears = 1;
output=os.path.join(save_folder, RNN1+"corr_matrix_rnn_ekf400_monthly_masked.nc")
unout = 'days since 1900-01-01 00:00:00'
# -----------------------
ny, nx = (corr_matrix_rnn_ekf400_monthly_masked.shape[0], corr_matrix_rnn_ekf400_monthly_masked.shape[1])
lon = longitudes
lat = latitudes

dataout = corr_matrix_rnn_ekf400_monthly_masked; # create some random data
datesout = [datetime.datetime(1900+iyear,1,1) for iyear in range(nyears)]; # create datevalues
# =========================
ncout = Dataset(output,'w','NETCDF4'); # using netCDF3 for output format 
ncout.createDimension('lon',nx);
ncout.createDimension('lat',ny);
ncout.createDimension('time',nyears);
lonvar = ncout.createVariable('lon','float32',('lon'));lonvar[:] = lon;
latvar = ncout.createVariable('lat','float32',('lat'));latvar[:] = lat;
timevar = ncout.createVariable('time','float64',('time'));timevar.setncattr('units',unout);timevar[:]=date2num(datesout,unout);
myvar = ncout.createVariable("t2m",'float32',('time','lat','lon'));myvar.setncattr('units',"K");myvar[:] = dataout;
ncout.close();

#### We can also check out the evolution of the field correlation through time

In [None]:
### monthly resolution and NH
ekf_RNN_fldcor_monthly = xr.open_dataset(RNN1_path_nc +"_monthly_all_fldcor.nc")
ekf_RNN_fldcor_monthly=ekf_RNN_fldcor_monthly.air_temperature.values[:,:,0,0]

ekf_RNN_fldcor_monthly_NH = xr.open_dataset(RNN1_path_nc +"_NH_all_fldcor.nc")
ekf_RNN_fldcor_monthly_NH=ekf_RNN_fldcor_monthly_NH.air_temperature.values[:,:,0,0]

fig = plt.figure(figsize=(10, 5))
plt.plot(ekf_RNN_fldcor_monthly.mean(axis=1))
plt.plot(ekf_RNN_fldcor_monthly_NH.mean(axis=1))

plt.show()
fig.savefig(RNN1_path_png+"_monthly_all_fldcor.png")



#### We can also have a look at the independent 20CR time period of 1836-1850 and see how both EKF400 and the RNN based reconstruction are doing

In [None]:
ekf400=xr.open_dataset("/Volumes/SPARK/ekf400v2/ensmean/EKF400_ensmean_v2.0_t2m_short_anoms.nc")
tcr=xr.open_dataset("/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc")

ekf400_values=ekf400.air_temperature
tcr_values=tcr.air
timesteps=ekf400_values.shape[0]

ekf400_values_4_corr=ekf400_values.values.reshape(timesteps,ekf400_values.shape[2]*ekf400_values.shape[1])
ekf400_values_4_corr.shape

tcr_values_4_corr=tcr_values.values.reshape(timesteps,tcr_values.shape[2]*tcr_values.shape[1])
tcr_values_4_corr.shape


structure_R= np.arange(ekf400_values_4_corr.shape[1], dtype=float)
structure_R.shape

structure_R_masked= np.arange(ekf400_values_4_corr.shape[1], dtype=float)
structure_R_masked.shape

structure_p= np.arange(ekf400_values_4_corr.shape[1], dtype=float)
structure_p.shape

for a in range(0,tcr_values_4_corr.shape[1]):
    one_R=stats.pearsonr(tcr_values_4_corr[:,a], ekf400_values_4_corr[:,a])
    structure_R[a]=one_R[0]
    if one_R[1] <= 0.05:
        structure_R_masked[a]=one_R[0]
    else:
        structure_R_masked[a] = np.ma.masked_greater(2, 0.05)
    structure_p[a]=one_R[1]
corr_matrix_tcr_ekf400_short_monthly=structure_R.reshape(ekf400_values.shape[1],ekf400_values.shape[2])
corr_matrix_tcr_ekf400_short_monthly_masked=structure_R_masked.reshape(ekf400_values.shape[1],ekf400_values.shape[2])
p_matrix_tcr_ekf400_short_monthly=structure_p.reshape(ekf400_values.shape[1],ekf400_values.shape[2])
print(structure_R.mean())
fig = plt.figure(figsize=(10, 5))
plt.imshow(corr_matrix_tcr_ekf400_short_monthly_masked, vmin=-1, vmax=1, cmap=current_cmap,origin='upper',interpolation="none") 
plt.colorbar()

In [None]:
# ---------------------
import numpy as np
import datetime
from netCDF4 import Dataset,num2date,date2num
# -----------------------


nyears = 1;
output=os.path.join(save_folder, RNN1+"corr_matrix_tcr_ekf400_short_monthly_masked.nc")
unout = 'days since 1900-01-01 00:00:00'
# -----------------------
ny, nx = (corr_matrix_tcr_ekf400_short_monthly_masked.shape[0], corr_matrix_tcr_ekf400_short_monthly_masked.shape[1])
lon = longitudes
lat = latitudes

dataout = corr_matrix_tcr_ekf400_short_monthly_masked; # create some random data
datesout = [datetime.datetime(1900+iyear,1,1) for iyear in range(nyears)]; # create datevalues
# =========================
ncout = Dataset(output,'w','NETCDF4'); # using netCDF3 for output format 
ncout.createDimension('lon',nx);
ncout.createDimension('lat',ny);
ncout.createDimension('time',nyears);
lonvar = ncout.createVariable('lon','float32',('lon'));lonvar[:] = lon;
latvar = ncout.createVariable('lat','float32',('lat'));latvar[:] = lat;
timevar = ncout.createVariable('time','float64',('time'));timevar.setncattr('units',unout);timevar[:]=date2num(datesout,unout);
myvar = ncout.createVariable("t2m",'float32',('time','lat','lon'));myvar.setncattr('units',"K");myvar[:] = dataout;
ncout.close();

In [None]:
rnn=xr.open_dataset(RNN1_path_nc +"_all_short_monthly.nc",decode_times=False)
tcr=xr.open_dataset("/Volumes/SPARK/20crv3/air.2m.mon.mean_18361850_anoms_remap.nc")

rnn_values=rnn.t2m
tcr_values=tcr.air
timesteps=rnn_values.shape[0]

rnn_values_4_corr=rnn_values.values.reshape(timesteps,rnn_values.shape[2]*rnn_values.shape[1])
rnn_values_4_corr.shape

tcr_values_4_corr=tcr_values.values.reshape(timesteps,tcr_values.shape[2]*tcr_values.shape[1])
tcr_values_4_corr.shape


structure_R= np.arange(rnn_values_4_corr.shape[1], dtype=float)
structure_R.shape

structure_R_masked= np.arange(rnn_values_4_corr.shape[1], dtype=float)
structure_R_masked.shape

structure_p= np.arange(rnn_values_4_corr.shape[1], dtype=float)
structure_p.shape

for a in range(0,tcr_values_4_corr.shape[1]):
    one_R=stats.pearsonr(tcr_values_4_corr[:,a], rnn_values_4_corr[:,a])
    structure_R[a]=one_R[0]
    if one_R[1] <= 0.05:
        structure_R_masked[a]=one_R[0]
    else:
        structure_R_masked[a] = np.ma.masked_greater(2, 0.05)
    structure_p[a]=one_R[1]
corr_matrix_tcr_rnn_short_monthly=structure_R.reshape(rnn_values.shape[1],rnn_values.shape[2])
corr_matrix_tcr_rnn_short_monthly_masked=structure_R_masked.reshape(rnn_values.shape[1],rnn_values.shape[2])
p_matrix_tcr_rnn_short_monthly=structure_p.reshape(rnn_values.shape[1],rnn_values.shape[2])
print(structure_R.mean())
fig = plt.figure(figsize=(10, 5))
plt.imshow(corr_matrix_tcr_rnn_short_monthly_masked, vmin=-1, vmax=1, cmap=current_cmap,origin='upper',interpolation="none") 
plt.colorbar()

In [None]:
# ---------------------
import numpy as np
import datetime
from netCDF4 import Dataset,num2date,date2num
# -----------------------


nyears = 1;
output=os.path.join(save_folder, RNN1+"corr_matrix_tcr_rnn_short_monthly_masked.nc")
unout = 'days since 1900-01-01 00:00:00'
# -----------------------
ny, nx = (corr_matrix_tcr_rnn_short_monthly_masked.shape[0], corr_matrix_tcr_rnn_short_monthly_masked.shape[1])
lon = longitudes
lat = latitudes

dataout = corr_matrix_tcr_rnn_short_monthly_masked; # create some random data
datesout = [datetime.datetime(1900+iyear,1,1) for iyear in range(nyears)]; # create datevalues
# =========================
ncout = Dataset(output,'w','NETCDF4'); # using netCDF3 for output format 
ncout.createDimension('lon',nx);
ncout.createDimension('lat',ny);
ncout.createDimension('time',nyears);
lonvar = ncout.createVariable('lon','float32',('lon'));lonvar[:] = lon;
latvar = ncout.createVariable('lat','float32',('lat'));latvar[:] = lat;
timevar = ncout.createVariable('time','float64',('time'));timevar.setncattr('units',unout);timevar[:]=date2num(datesout,unout);
myvar = ncout.createVariable("t2m",'float32',('time','lat','lon'));myvar.setncattr('units',"K");myvar[:] = dataout;
ncout.close();

## Difference Plots


#### Okay, so for the time being we compare the differences between EKF400 and 20CR and the RNN output and 20CR for the time period 1836-1850. 

In [None]:
# monthly resolution
ekf_20c_diff=xr.open_dataset(save_folder+"ekf400_20crv3_monthly_differences_timmean.nc")
ekf_20c_diff=ekf_20c_diff.air_temperature

fig = plt.figure(figsize=(10, 5))
ax = plt.axes(projection=ccrs.EqualEarth(central_longitude=0, globe=None)) #EqualEarth PlateCarree
tplot=ekf_20c_diff.isel(time=0).plot.contourf(ax=ax,
levels = 17, transform=ccrs.PlateCarree(), cmap=cm.seismic, vmin=-5, vmax=5, cbar_kwargs={'orientation':'vertical',
'fraction':0.012, 'pad':0.015, 'aspect':35})

tplot.colorbar.set_label('Diff', size=16) 
tplot.ylabel_style = {'size':16}
ax.set_global()
ax.coastlines();

plt.show()
fig.savefig(save_plots_folder+"ekf400_20crv3_monthly_differences_timmean.png")





In [None]:
# monthly resolution
RNN_20c_diff=xr.open_dataset(RNN1_path_nc +"_20crv3_monthly_differences_timmean.nc")
RNN_20c_diff=RNN_20c_diff.air*-1 # -1 because I switched the subtraction above

fig = plt.figure(figsize=(10, 5))
ax = plt.axes(projection=ccrs.EqualEarth(central_longitude=0, globe=None)) #EqualEarth PlateCarree
tplot=RNN_20c_diff.isel(time=0).plot.contourf(ax=ax,
levels = 17, transform=ccrs.PlateCarree(), cmap=cm.seismic, vmin=-5, vmax=5, cbar_kwargs={'orientation':'vertical',
'fraction':0.012, 'pad':0.015, 'aspect':35})

tplot.colorbar.set_label('Diff', size=16) 
tplot.ylabel_style = {'size':16}
ax.set_global()
ax.coastlines();

plt.show()
fig.savefig(RNN1_path_png +"_20crv3_monthly_differences_timmean.png")




