In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import cdo
CDO = cdo.Cdo()
import netCDF4
import os
import datetime

## Authors
* Martin Wegmann (martin.wegmann@giub.unibe.ch)
* Fernando Jaume Santero (fernando.jaume@unige.ch)

In [None]:
# set paths to data folders
pathTo20cr= '/Volumes/SPARK/20crv3/' 
pathToMPIGE_t2m = '/Volumes/SPARK/mpi_ge/t2m/atmos/'
pathToEKF = '/Volumes/SPARK/ekf400v2/ensmean/' 
pathToEKFmember = '/Volumes/SPARK/ekf400v2/member/' 
pathToLMR = '/Volumes/SPARK/lmr/' 
pathToCSMLME_t2m = '/Volumes/SPARK/csm_lme/t2m/' 

You can download the ensemble mean 20CRv3 data here: https://psl.noaa.gov/data/gridded/data.20thC_ReanV3.html

You can download the MPIGE data here: https://mpimet.mpg.de/en/grand-ensemble/

You can download the CSMLME data here: https://www.cesm.ucar.edu/projects/community-projects/LME/data-sets.html

You can download the EKF400v2 data here: https://cera-www.dkrz.de/WDCC/ui/cerasearch/entry?acronym=EKF400_v2.0

You can download the LMR data here: https://atmos.washington.edu/~hakim/lmr/LMRv2/

## Preprocessing 20CRv3 data

In [None]:
os.chdir(pathTo20cr)

### 2m temperature data

In [None]:
# piping in python CDO is faulty and 20cr is weirdly packed, so we do this wrap around. We delete this data later on.
CDO.selname("air",input="air.2m.mon.mean.nc",output="air.2m.mon.mean_var.nc")
# remap to EKF400V2 resolution
CDO.remapbil("r192x96",input="air.2m.mon.mean_var.nc",output="air.2m.mon.mean_var_remap.nc")
# remap the spatial dimensions of 20CRv3
CDO.sellonlatbox("-180,180,-90,90",input="air.2m.mon.mean_var_remap.nc",output="air.2m.mon.mean_data_remap.nc")
CDO.invertlat(input="air.2m.mon.mean_data_remap.nc",output="air.2m.mon.mean_data_remap_inv.nc")

In [None]:
# creating the 20CR monthly climatology 1951-1980
CDO.ymonmean(input="-selyear,1951/1980 air.2m.mon.mean_data_remap_inv.nc",output="air.2m.mon.mean_remap_climate.nc")

In [None]:
# creating the 20CR absolute and anomaly time windows
CDO.selyear("1851/2015",input="air.2m.mon.mean_data_remap_inv.nc",output="air.2m.mon.mean_18512015_remap.nc")
CDO.selyear("1836/1850",input="air.2m.mon.mean_data_remap_inv.nc",output="air.2m.mon.mean_18361850_remap.nc")
CDO.selyear("1836/1990",input="air.2m.mon.mean_data_remap_inv.nc",output="air.2m.mon.mean_18361990_remap.nc")
CDO.selyear("1991/2015",input="air.2m.mon.mean_data_remap_inv.nc",output="air.2m.mon.mean_19912015_remap.nc")
#
CDO.sub(input="air.2m.mon.mean_18512015_remap.nc air.2m.mon.mean_remap_climate.nc", output="air.2m.mon.mean_18512015_anoms_remap.nc")
CDO.sub(input="air.2m.mon.mean_18361850_remap.nc air.2m.mon.mean_remap_climate.nc", output="air.2m.mon.mean_18361850_anoms_remap.nc")
CDO.sub(input="air.2m.mon.mean_18361990_remap.nc air.2m.mon.mean_remap_climate.nc", output="air.2m.mon.mean_18361990_anoms_remap.nc")
CDO.sub(input="air.2m.mon.mean_19912015_remap.nc air.2m.mon.mean_remap_climate.nc", output="air.2m.mon.mean_19912015_anoms_remap.nc")

In [None]:
os.remove("air.2m.mon.mean_var.nc")
os.remove("air.2m.mon.mean_data.nc")
os.remove("air.2m.mon.mean_var_remap.nc")
os.remove("air.2m.mon.mean_data_remap.nc")
os.remove("air.2m.mon.mean_data_remap_inv.nc")

In [None]:
# create more data to compare with RNN output later on
CDO.sellonlatbox("-180,180,90,0",input="air.2m.mon.mean_18512015_anoms_remap.nc",output="air.2m.mon.mean_18512015_anoms_remap_NH.nc")
CDO.sellonlatbox("-180,180,90,0",input="air.2m.mon.mean_18361850_anoms_remap.nc",output="air.2m.mon.mean_18361850_anoms_remap_NH.nc")
CDO.yearmean(input="air.2m.mon.mean_18361850_anoms_remap.nc",output="air.2m.mon.mean_18361850_anoms_remap_ym.nc")
CDO.yearmean(input="-selmon,6,7,8 "+"air.2m.mon.mean_18361850_anoms_remap.nc",output="air.2m.mon.mean_18361850_anoms_remap_JJA.nc")
CDO.selmon("1,2,12",input="air.2m.mon.mean_18361850_anoms_remap.nc",output="temp.nc")
CDO.seltimestep("3/44",input="temp.nc",output="tempp.nc")
os.remove("temp.nc")
CDO.timselmean("3",input="tempp.nc",output="air.2m.mon.mean_18361850_anoms_remap_DJF.nc")
os.remove("tempp.nc")

## Preprocessing MPIGE data

### 2m temperature data

In [None]:
os.chdir(pathToMPIGE_t2m)

In [None]:
# merging all members and all months into one file
CDO.cat(input="tas_Amon*.nc",output="tas_Amon_all_datapoints.nc")

In [None]:
# remap to EKF400V2 resolution
CDO.remapbil(pathTo20cr+"air.2m.mon.mean_remap_climate.nc",input="tas_Amon_all_datapoints.nc",output="tas_Amon_all_datapoints_remap.nc")
os.remove("tas_Amon_all_datapoints.nc")

In [None]:
# create climatology
CDO.ymonmean(input="-selyear,1951/1980 tas_Amon_all_datapoints_remap.nc",output="tas_Amon_climate.nc")

In [None]:
# create anomalies
CDO.sub(input="tas_Amon_all_datapoints_remap.nc tas_Amon_climate.nc",output="tas_Amon_all_datapoints_anoms.nc")

## Preprocessing CSMLME data

### 2m temperature data

In [None]:
os.chdir(pathToCSMLME_t2m)

In [None]:
arr = os.listdir(pathToCSMLME_t2m)
input_paths=[]
for file in os.listdir(pathToCSMLME_t2m):
    if file.endswith(".nc"):
        pathToCSMLME_t2m_file=os.path.join(pathToCSMLME_t2m, file)
        input_paths.append(pathToCSMLME_t2m_file)
len(input_paths)

In [None]:
amount_files=len(input_paths)
for i in range(amount_files):
    print(i)
    CDO.selname("TREFHT",input=input_paths[i], output=os.path.join(pathToCSMLME_t2m, "TREFHT_"+str(i)+"_data.nc"))

In [None]:
# merging all members and all months into one file
# remove the X for your data
CDO.cat(input="*_data.nc",output="TREFHT_Amon_all_datapoints.nc")

In [None]:
# the year 850 and 1850 do not have 12 months for some reason
# the year 2006 does have one month
CDO.selyear("851/1849,1851/2005",input="TREFHT_Amon_all_datapoints.nc",output="TREFHT_Amon_all_datapoints_no850.nc")
os.remove("TREFHT_Amon_all_datapoints.nc")

In [None]:
# remap to EKF400V2 resolution
CDO.remapbil(pathTo20cr+"air.2m.mon.mean_remap_climate.nc",input="TREFHT_Amon_all_datapoints_no850.nc",output="TREFHT_Amon_all_datapoints_remap.nc")
os.remove("TREFHT_Amon_all_datapoints_no850.nc")

In [None]:
# remove leftover files
for file in os.listdir(pathToCSMLME_t2m):
    if file.endswith("_data.nc"):
        os.remove(file)

In [None]:
# create climatology
CDO.ymonmean(input="-selyear,1951/1980 TREFHT_Amon_all_datapoints_remap.nc",output="TREFHT_Amon_climate.nc")

In [None]:
# create anomalies
CDO.sub(input="TREFHT_Amon_all_datapoints_remap.nc TREFHT_Amon_climate.nc",output="TREFHT_Amon_all_datapoints_anoms.nc")

## Preprocessing EKF400v2 data

In [None]:
os.chdir(pathToEKF)

In [None]:
# if there are too many open files, do the mergetime in batches of 100 years
CDO.mergetime(input="EKF400_ensmean_????_v2.0.nc",output="EKF400_ensmean_v2.0.nc")

In [None]:
# select 2m temperature
CDO.selname("air_temperature",input="EKF400_ensmean_v2.0.nc",output="EKF400_ensmean_v2.0_t2m.nc")

In [None]:
# remap to EKF400V2 resolution
CDO.remapbil(pathTo20cr+"air.2m.mon.mean_remap_climate.nc",input="EKF400_ensmean_v2.0_t2m.nc",output="EKF400_ensmean_v2.0_t2m_remap.nc")

In [None]:
# create climatology
CDO.ymonmean(input="-selyear,1951/1980 EKF400_ensmean_v2.0_t2m_remap.nc",output="EKF400_ensmean_v2.0_t2m_climate.nc")

In [None]:
# create anomalies
CDO.sub(input="EKF400_ensmean_v2.0_t2m_remap.nc EKF400_ensmean_v2.0_t2m_climate.nc",output="EKF400_ensmean_v2.0_t2m_anoms.nc")

In [None]:
# create more data to compare with RNN output later on
CDO.selyear("1836/1850",input="EKF400_ensmean_v2.0_t2m_anoms.nc",output="EKF400_ensmean_v2.0_t2m_short_anoms.nc")
CDO.sellonlatbox("-180,180,90,0",input="EKF400_ensmean_v2.0_t2m_anoms.nc",output="EKF400_ensmean_v2.0_t2m_anoms_NH.nc")
CDO.selyear("1836/1850",input="EKF400_ensmean_v2.0_t2m_anoms_NH.nc",output="EKF400_ensmean_v2.0_t2m_short_anoms_NH.nc")
CDO.yearmean(input="EKF400_ensmean_v2.0_t2m_anoms.nc",output="EKF400_ensmean_v2.0_t2m_anoms_ym.nc")
CDO.selyear("1836/1850",input="EKF400_ensmean_v2.0_t2m_anoms_ym.nc",output="EKF400_ensmean_v2.0_t2m_short_anoms_ym.nc")
CDO.selyear("1602/2000",input="EKF400_ensmean_v2.0_t2m_anoms_ym.nc",output="EKF400_ensmean_v2.0_t2m_lmr_anoms_ym.nc")
CDO.yearmean(input="-selmon,6,7,8 "+"EKF400_ensmean_v2.0_t2m_anoms.nc",output="EKF400_ensmean_v2.0_t2m_anoms_JJA.nc")
CDO.selyear("1836/1850",input="EKF400_ensmean_v2.0_t2m_anoms_JJA.nc",output="EKF400_ensmean_v2.0_t2m_short_anoms_JJA.nc")
CDO.selmon("1,2,12",input="EKF400_ensmean_v2.0_t2m_anoms.nc",output="temp.nc")
CDO.seltimestep("3/1205",input="temp.nc",output="tempp.nc")
os.remove("temp.nc")
CDO.timselmean("3",input="tempp.nc",output="EKF400_ensmean_v2.0_t2m_anoms_DJF.nc")
CDO.selyear("1837/1850",input="EKF400_ensmean_v2.0_t2m_anoms_DJF.nc",output="EKF400_ensmean_v2.0_t2m_short_anoms_DJF.nc")
os.remove("tempp.nc")

In [None]:
os.remove("EKF400_ensmean_v2.0_t2m.nc")

## Preprocessing EKF400v2 member data

In [None]:
os.chdir(pathToEKFmember)

In [None]:
# lets see what data we have
arr = os.listdir(pathToEKFmember)
input_paths=[]
for file in os.listdir(pathToEKFmember):
    if file.endswith("t2m.nc"):
        print(os.path.join(pathToEKFmember, file))
        pathToEKFmember_file=os.path.join(pathToEKFmember, file)
        input_paths.append(pathToEKFmember_file)

In [None]:
# 
member_index=[f"{i:01}" for i in range(1,31)]
member_index_length = len(member_index)

In [None]:
# remap to EKF400V2 resolution
remapbil_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_remap.nc')
    remapbil_paths.append(output)

for i in range(member_index_length):
    CDO.remapbil(pathTo20cr+"air.2m.mon.mean_remap_climate.nc", input=input_paths[i], output=remapbil_paths[i])

In [None]:
# create climatology
climate_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_climate.nc')
    climate_paths.append(output)

for i in range(member_index_length): 
    CDO.ymonmean(input="-selyear,1951/1980 "+ remapbil_paths[i], output=climate_paths[i])    


In [None]:
# create anomalies
anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_anoms.nc')
    anom_paths.append(output)

for i in range(member_index_length): 
    CDO.sub(input=remapbil_paths[i]+" "+climate_paths[i],output=anom_paths[i])



In [None]:
# time windows for later analysis
short_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_short_anoms.nc')
    short_anom_paths.append(output)

ym_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_anoms_ym.nc')
    ym_anom_paths.append(output)
    
lmr_ym_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_lmr_anoms_ym.nc')
    lmr_ym_anom_paths.append(output)
    
JJA_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_anoms_JJA.nc')
    JJA_anom_paths.append(output)
    
short_JJA_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_short_anoms_JJA.nc')
    short_JJA_anom_paths.append(output)
    
short_DJF_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_short_anoms_DJF.nc')
    short_DJF_anom_paths.append(output)

DJF_anom_paths=[]
for i in range(member_index_length):
    output=os.path.join(pathToEKFmember, "EKF400v2_ens_mem_" + str(member_index[i])+'_allyears_t2m_anoms_DJF.nc')
    DJF_anom_paths.append(output)

    

for i in range(member_index_length): 
    CDO.selyear("1836/1850",input=anom_paths[i],output=short_anom_paths[i])

for i in range(member_index_length): 
    CDO.yearmean(input=anom_paths[i],output=ym_anom_paths[i])

for i in range(member_index_length): 
    CDO.selyear("1602/2000",input=ym_anom_paths[i],output=lmr_ym_anom_paths[i])

for i in range(member_index_length): 
    CDO.yearmean(input="-selmon,6,7,8 "+anom_paths[i],output=JJA_anom_paths[i])

for i in range(member_index_length): 
    CDO.selyear("1836/1850",input=JJA_anom_paths[i],output=short_JJA_anom_paths[i])

for i in range(member_index_length): 
    CDO.selmon("1,2,12",input=anom_paths[i],output="temp.nc")
    CDO.seltimestep("3/1205",input="temp.nc",output="tempp.nc")
    os.remove("temp.nc")
    CDO.timselmean("3",input="tempp.nc",output=DJF_anom_paths[i])

for i in range(member_index_length): 
    CDO.selyear("1837/1850",input=DJF_anom_paths[i],output=short_DJF_anom_paths[i])

    
os.remove("tempp.nc")

In [None]:
# merging all members and all months into one file
CDO.cat(input="*_allyears_t2m_remap.nc",output="EKF400v2_allyears_allmembers_t2m_remap.nc")

In [None]:
# create climatology
CDO.ymonmean(input="-selyear,1951/1980 EKF400v2_allyears_allmembers_t2m_remap.nc",output="EKF400v2_allyears_allmembers_t2m_climate.nc")

In [None]:
# create anomalies
CDO.sub(input="EKF400v2_allyears_allmembers_t2m_remap.nc EKF400v2_allyears_allmembers_t2m_climate.nc",output="EKF400v2_allyears_allmembers_t2m_anoms.nc")

## Preprocessing Last Millenium Reanalysis data

So the LMR fields are already anomalies with respect to the 1951-1980 climatology

In [None]:
os.chdir(pathToLMR)

In [None]:
CDO.remapbil(pathTo20cr+"air.2m.mon.mean_remap_climate.nc",input="air_MCruns_ensemble_mean_LMRv2.1_ensmean.nc",output="air_MCruns_ensemble_mean_LMRv2.1_ensmean_remap.nc")



In [None]:
CDO.selyear("1836/1850",input="air_MCruns_ensemble_mean_LMRv2.1_ensmean_remap.nc",output="air_MCruns_ensemble_mean_LMRv2.1_short_anoms_ym.nc")
CDO.selyear("1602/2000",input="air_MCruns_ensemble_mean_LMRv2.1_ensmean_remap.nc",output="air_MCruns_ensemble_mean_LMRv2.1_ekf400_anoms_ym.nc")


