<h1> Comparisons v Climatology </h1>
<ul>
<li> This notebook helps compare new result sets against (shifted) climatology estimates </li>
<li> Steps are described using markdown </li>
<li> Links to download external files are provided where required </li>
</ul>

In [2]:
#STEP 1 - IMPORT LIBRARIES
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime

In [3]:
#STEP 2 - LOAD THE RESULTS SET i.e. NEW FORECASTS GENERATED USING TUNED MACHINE LEARNING MODELS
#SPECIFY THE SPLIT USING 'TRAIN' or 'TEST' AS NEEDED (however keys aren't specific at the moment in this results .h5 file)
df = pd.read_hdf('./fixeddataset_subset_tmp2m_prediction_34w.h5', 'test')
df.reset_index(inplace=True)
df

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,day,tmp2m_daily_mean,tmp2m_daily_std,tmp2m_zscore,xgboost_tmp2m_pred
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12.0,4.0,14.463642,1.931331,-0.695742,15.264174
1,1998-12-04,27.25,277.75,-0.053104,19.220764,1.335648,17.885118,12.0,4.0,17.885119,2.932417,0.455476,17.729395
2,1998-12-04,27.25,279.75,-0.034797,20.347488,1.509570,18.837921,12.0,4.0,18.837921,2.769586,0.545052,18.741548
3,1998-12-04,29.25,259.75,0.244489,10.981704,-0.961662,11.943365,12.0,4.0,11.943366,1.448558,-0.663875,12.297523
4,1998-12-04,29.25,261.75,0.202258,9.672231,-2.372382,12.044614,12.0,4.0,12.044613,2.009451,-1.180612,12.451041
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2018-12-04,47.25,265.75,0.080455,-9.085908,1.751345,-10.837255,12.0,4.0,-10.837254,5.278904,0.331763,-10.412540
1439278,2018-12-04,47.25,267.75,0.115367,-8.280913,1.926876,-10.207788,12.0,4.0,-10.207789,4.820482,0.399727,-9.651663
1439279,2018-12-04,47.25,269.75,0.012582,-6.530766,2.556871,-9.087638,12.0,4.0,-9.087637,4.447800,0.574862,-9.031675
1439280,2018-12-04,47.25,271.75,0.135120,-3.761495,2.699106,-6.460601,12.0,4.0,-6.460601,3.420960,0.788991,-5.998360


In [4]:
#STEP 3 - FORMAT OR CREATE CERTAIN COLUMNS AS REQUIRED
df['err'] = df['xgboost_tmp2m_pred'] - df['tmp2m']
df['lon2'] = df['lon']-360
df['latlon'] = df['lat'].astype(str) + "|" + df['lon'].astype(str)
df['start_date'] = pd.to_datetime(df['start_date'])
df['month'] = pd.DatetimeIndex(df['start_date']).month
df['month2'] = df['month'].astype(str).str.zfill(2)
df['year'] = pd.DatetimeIndex(df['start_date']).year
df['yearmonth'] = df['year'].astype(str) + '-' + df['month2'].astype(str)
df['date'] = pd.DatetimeIndex(df['start_date']).day
df['date2'] = df['date'].astype(str).str.zfill(2)
df['err'] = round(df['err'],3)
df['mdll'] = df['month2'] + "|" + df['date2'] + "|" + df['latlon']
df = df.drop(columns={'day'})
df

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,tmp2m_daily_mean,tmp2m_daily_std,...,xgboost_tmp2m_pred,err,lon2,latlon,month2,year,yearmonth,date,date2,mdll
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12,14.463642,1.931331,...,15.264174,2.144,-98.25,27.25|261.75,12,1998,1998-12,4,04,12|04|27.25|261.75
1,1998-12-04,27.25,277.75,-0.053104,19.220764,1.335648,17.885118,12,17.885119,2.932417,...,17.729395,-1.491,-82.25,27.25|277.75,12,1998,1998-12,4,04,12|04|27.25|277.75
2,1998-12-04,27.25,279.75,-0.034797,20.347488,1.509570,18.837921,12,18.837921,2.769586,...,18.741548,-1.606,-80.25,27.25|279.75,12,1998,1998-12,4,04,12|04|27.25|279.75
3,1998-12-04,29.25,259.75,0.244489,10.981704,-0.961662,11.943365,12,11.943366,1.448558,...,12.297523,1.316,-100.25,29.25|259.75,12,1998,1998-12,4,04,12|04|29.25|259.75
4,1998-12-04,29.25,261.75,0.202258,9.672231,-2.372382,12.044614,12,12.044613,2.009451,...,12.451041,2.779,-98.25,29.25|261.75,12,1998,1998-12,4,04,12|04|29.25|261.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2018-12-04,47.25,265.75,0.080455,-9.085908,1.751345,-10.837255,12,-10.837254,5.278904,...,-10.412540,-1.327,-94.25,47.25|265.75,12,2018,2018-12,4,04,12|04|47.25|265.75
1439278,2018-12-04,47.25,267.75,0.115367,-8.280913,1.926876,-10.207788,12,-10.207789,4.820482,...,-9.651663,-1.371,-92.25,47.25|267.75,12,2018,2018-12,4,04,12|04|47.25|267.75
1439279,2018-12-04,47.25,269.75,0.012582,-6.530766,2.556871,-9.087638,12,-9.087637,4.447800,...,-9.031675,-2.501,-90.25,47.25|269.75,12,2018,2018-12,4,04,12|04|47.25|269.75
1439280,2018-12-04,47.25,271.75,0.135120,-3.761495,2.699106,-6.460601,12,-6.460601,3.420960,...,-5.998360,-2.237,-88.25,47.25|271.75,12,2018,2018-12,4,04,12|04|47.25|271.75


In [5]:
#STEP 4 - LOAD CLIMATOLOGY AVERAGES (SHIFTED) 5y and 28y AND MERGE IT ONTO THE DATAFRAME
#Download climatology averages from: https://github.com/vayuh/climatology-comparisons
clim5shifted = pd.read_csv('clim5shifted.csv', encoding='utf-8')
clim5shifted = clim5shifted.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'month2', 'date2', 'latlon'])
clim28shifted = pd.read_csv('clim28shifted.csv', encoding='utf-8')
clim28shifted = clim28shifted.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'month2', 'date2', 'latlon'])
df = pd.merge(df, clim5shifted, on='mdll')
df = pd.merge(df, clim28shifted, on='mdll')
df

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,tmp2m_daily_mean,tmp2m_daily_std,...,month2,year,yearmonth,date,date2,mdll,5y climatology,5y_climf,28y climatology,28y_climf
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12,14.463642,1.931331,...,12,1998,1998-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642
1,1999-12-04,27.25,261.75,0.335135,14.154114,-0.309528,14.463640,12,14.463642,1.931331,...,12,1999,1999-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642
2,2000-12-04,27.25,261.75,-0.246050,11.485272,-2.978370,14.463640,12,14.463642,1.931331,...,12,2000,2000-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642
3,2001-12-04,27.25,261.75,-0.179883,12.315877,-2.147764,14.463640,12,14.463642,1.931331,...,12,2001,2001-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642
4,2002-12-04,27.25,261.75,-0.058650,15.585264,1.121623,14.463640,12,14.463642,1.931331,...,12,2002,2002-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2000-02-29,47.25,273.75,0.504896,1.243469,3.149848,-1.906379,2,-1.781044,4.385852,...,02,2000,2000-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379
1439278,2004-02-29,47.25,273.75,0.005053,-3.050579,-1.144200,-1.906379,2,-1.781044,4.385852,...,02,2004,2004-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379
1439279,2008-02-29,47.25,273.75,-0.006250,-5.090876,-3.184498,-1.906379,2,-1.781044,4.385852,...,02,2008,2008-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379
1439280,2012-02-29,47.25,273.75,1.227926,5.702294,7.608674,-1.906379,2,-1.781044,4.385852,...,02,2012,2012-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379


In [6]:
#STEP 5 - COMPUTE CLIMATOLOGY ERRORS
df['clim_err_5'] = df['5y_climf'] - df['tmp2m']
df['clim_err_28'] = df['28y_climf'] - df['tmp2m']
df
#Note that 28y_climf is exactly the same as tmp2m_clim :)

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,tmp2m_daily_mean,tmp2m_daily_std,...,yearmonth,date,date2,mdll,5y climatology,5y_climf,28y climatology,28y_climf,clim_err_5,clim_err_28
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12,14.463642,1.931331,...,1998-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,0.941869,1.343709
1,1999-12-04,27.25,261.75,0.335135,14.154114,-0.309528,14.463640,12,14.463642,1.931331,...,1999-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,-0.092312,0.309528
2,2000-12-04,27.25,261.75,-0.246050,11.485272,-2.978370,14.463640,12,14.463642,1.931331,...,2000-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,2.576530,2.978370
3,2001-12-04,27.25,261.75,-0.179883,12.315877,-2.147764,14.463640,12,14.463642,1.931331,...,2001-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,1.745925,2.147765
4,2002-12-04,27.25,261.75,-0.058650,15.585264,1.121623,14.463640,12,14.463642,1.931331,...,2002-12,4,04,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,-1.523462,-1.121622
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2000-02-29,47.25,273.75,0.504896,1.243469,3.149848,-1.906379,2,-1.781044,4.385852,...,2000-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,-0.879473,-3.149848
1439278,2004-02-29,47.25,273.75,0.005053,-3.050579,-1.144200,-1.906379,2,-1.781044,4.385852,...,2004-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,3.414575,1.144200
1439279,2008-02-29,47.25,273.75,-0.006250,-5.090876,-3.184498,-1.906379,2,-1.781044,4.385852,...,2008-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,5.454873,3.184498
1439280,2012-02-29,47.25,273.75,1.227926,5.702294,7.608674,-1.906379,2,-1.781044,4.385852,...,2012-02,29,29,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,-5.338298,-7.608673


In [7]:
#STEP 6 - DOWNLOAD THE LOCATIONS DICTIONARY FROM THE LINK BELOW - 
#https://console.aws.amazon.com/s3/buckets/vayuh-subseasonal?region=us-east-2&prefix=results/dictionaries/&showversions=false
#MAP ON STATE, REGION AND SUB-REGION
locations = pd.read_csv("locations.csv", encoding="utf-8")
locations.rename(columns={"Location": "latlon"}, inplace=True)
locations.sort_values(by=['latlon'], ascending=False)
df = pd.merge(df, locations, on='latlon')
df

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,tmp2m_daily_mean,tmp2m_daily_std,...,mdll,5y climatology,5y_climf,28y climatology,28y_climf,clim_err_5,clim_err_28,State,Sub-Region,Region
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12,14.463642,1.931331,...,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,0.941869,1.343709,Texas,West South Central,South
1,1999-12-04,27.25,261.75,0.335135,14.154114,-0.309528,14.463640,12,14.463642,1.931331,...,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,-0.092312,0.309528,Texas,West South Central,South
2,2000-12-04,27.25,261.75,-0.246050,11.485272,-2.978370,14.463640,12,14.463642,1.931331,...,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,2.576530,2.978370,Texas,West South Central,South
3,2001-12-04,27.25,261.75,-0.179883,12.315877,-2.147764,14.463640,12,14.463642,1.931331,...,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,1.745925,2.147765,Texas,West South Central,South
4,2002-12-04,27.25,261.75,-0.058650,15.585264,1.121623,14.463640,12,14.463642,1.931331,...,12|04|27.25|261.75,15.661179,14.061802,16.716297,14.463642,-1.523462,-1.121622,Texas,West South Central,South
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2000-02-29,47.25,273.75,0.504896,1.243469,3.149848,-1.906379,2,-1.781044,4.385852,...,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,-0.879473,-3.149848,Michigan,East North Central,Midwest
1439278,2004-02-29,47.25,273.75,0.005053,-3.050579,-1.144200,-1.906379,2,-1.781044,4.385852,...,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,3.414575,1.144200,Michigan,East North Central,Midwest
1439279,2008-02-29,47.25,273.75,-0.006250,-5.090876,-3.184498,-1.906379,2,-1.781044,4.385852,...,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,5.454873,3.184498,Michigan,East North Central,Midwest
1439280,2012-02-29,47.25,273.75,1.227926,5.702294,7.608674,-1.906379,2,-1.781044,4.385852,...,02|29|47.25|273.75,0.234673,0.363996,-4.217903,-1.906379,-5.338298,-7.608673,Michigan,East North Central,Midwest


In [8]:
#STEP 7 - MAP ON SEASONS USING THE USER DEFINED FUNCTION BELOW - 
def season (x):
    if x in [12,1,2]:
        return 'winter'
    elif x in [3,4,5]:
        return 'spring'
    elif x in [6,7,8]:
        return 'summer'
    elif x in [9,10,11]:
        return 'fall'

df['season'] = df['month'].apply(season)
df

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,tmp2m_daily_mean,tmp2m_daily_std,...,5y climatology,5y_climf,28y climatology,28y_climf,clim_err_5,clim_err_28,State,Sub-Region,Region,season
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12,14.463642,1.931331,...,15.661179,14.061802,16.716297,14.463642,0.941869,1.343709,Texas,West South Central,South,winter
1,1999-12-04,27.25,261.75,0.335135,14.154114,-0.309528,14.463640,12,14.463642,1.931331,...,15.661179,14.061802,16.716297,14.463642,-0.092312,0.309528,Texas,West South Central,South,winter
2,2000-12-04,27.25,261.75,-0.246050,11.485272,-2.978370,14.463640,12,14.463642,1.931331,...,15.661179,14.061802,16.716297,14.463642,2.576530,2.978370,Texas,West South Central,South,winter
3,2001-12-04,27.25,261.75,-0.179883,12.315877,-2.147764,14.463640,12,14.463642,1.931331,...,15.661179,14.061802,16.716297,14.463642,1.745925,2.147765,Texas,West South Central,South,winter
4,2002-12-04,27.25,261.75,-0.058650,15.585264,1.121623,14.463640,12,14.463642,1.931331,...,15.661179,14.061802,16.716297,14.463642,-1.523462,-1.121622,Texas,West South Central,South,winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2000-02-29,47.25,273.75,0.504896,1.243469,3.149848,-1.906379,2,-1.781044,4.385852,...,0.234673,0.363996,-4.217903,-1.906379,-0.879473,-3.149848,Michigan,East North Central,Midwest,winter
1439278,2004-02-29,47.25,273.75,0.005053,-3.050579,-1.144200,-1.906379,2,-1.781044,4.385852,...,0.234673,0.363996,-4.217903,-1.906379,3.414575,1.144200,Michigan,East North Central,Midwest,winter
1439279,2008-02-29,47.25,273.75,-0.006250,-5.090876,-3.184498,-1.906379,2,-1.781044,4.385852,...,0.234673,0.363996,-4.217903,-1.906379,5.454873,3.184498,Michigan,East North Central,Midwest,winter
1439280,2012-02-29,47.25,273.75,1.227926,5.702294,7.608674,-1.906379,2,-1.781044,4.385852,...,0.234673,0.363996,-4.217903,-1.906379,-5.338298,-7.608673,Michigan,East North Central,Midwest,winter


In [9]:
#STEP 8 - OPTIONALLY MAP ON HEAT / COLDWAVE USING ARBITRARY DEFINITIONS
df['heatwave'] = df['tmp2m'] >34
df['coldwave'] = df['tmp2m'] <-10
df

Unnamed: 0,start_date,lat,lon,xgboost_tmp2m_pred_zscore,tmp2m,tmp2m_anom,tmp2m_clim,month,tmp2m_daily_mean,tmp2m_daily_std,...,28y climatology,28y_climf,clim_err_5,clim_err_28,State,Sub-Region,Region,season,heatwave,coldwave
0,1998-12-04,27.25,261.75,0.414498,13.119933,-1.343708,14.463640,12,14.463642,1.931331,...,16.716297,14.463642,0.941869,1.343709,Texas,West South Central,South,winter,False,False
1,1999-12-04,27.25,261.75,0.335135,14.154114,-0.309528,14.463640,12,14.463642,1.931331,...,16.716297,14.463642,-0.092312,0.309528,Texas,West South Central,South,winter,False,False
2,2000-12-04,27.25,261.75,-0.246050,11.485272,-2.978370,14.463640,12,14.463642,1.931331,...,16.716297,14.463642,2.576530,2.978370,Texas,West South Central,South,winter,False,False
3,2001-12-04,27.25,261.75,-0.179883,12.315877,-2.147764,14.463640,12,14.463642,1.931331,...,16.716297,14.463642,1.745925,2.147765,Texas,West South Central,South,winter,False,False
4,2002-12-04,27.25,261.75,-0.058650,15.585264,1.121623,14.463640,12,14.463642,1.931331,...,16.716297,14.463642,-1.523462,-1.121622,Texas,West South Central,South,winter,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439277,2000-02-29,47.25,273.75,0.504896,1.243469,3.149848,-1.906379,2,-1.781044,4.385852,...,-4.217903,-1.906379,-0.879473,-3.149848,Michigan,East North Central,Midwest,winter,False,False
1439278,2004-02-29,47.25,273.75,0.005053,-3.050579,-1.144200,-1.906379,2,-1.781044,4.385852,...,-4.217903,-1.906379,3.414575,1.144200,Michigan,East North Central,Midwest,winter,False,False
1439279,2008-02-29,47.25,273.75,-0.006250,-5.090876,-3.184498,-1.906379,2,-1.781044,4.385852,...,-4.217903,-1.906379,5.454873,3.184498,Michigan,East North Central,Midwest,winter,False,False
1439280,2012-02-29,47.25,273.75,1.227926,5.702294,7.608674,-1.906379,2,-1.781044,4.385852,...,-4.217903,-1.906379,-5.338298,-7.608673,Michigan,East North Central,Midwest,winter,False,False


In [10]:
#STEP 9 - DEFINE THE METRICS ONE WOULD WANT TO COMPUTE USING AGGFUNCS
#Functions / Metrics
def mbe (x):
    return round(np.mean(x),3)

def mae (x):
    return round(np.mean(abs(x)), 3)

def sd (x):
    return round(np.std(x), 3)

In [11]:
#HEREAFTER IT IS POSSIBLE TO PIVOT AND CHART STATEWISE / SEASONWISE / TIMEWISE USING THE NOTEBOOKS AT THE LINKS BELOW - 
##https://github.com/vayuh/climatology-comparisons
##https://github.com/vayuh/validations_analyses/tree/2degtmp

In [14]:
#Climatology comparisons by State
indexlist = ['State']

vayuhpivot = pd.pivot_table(data=df, index=indexlist, values='err', aggfunc=[mae])
vayuhpivot.columns = list(map("_".join, vayuhpivot.columns))
vayuhpivot.rename(columns={"mae_err":"Vayuh MAE"}, inplace=True)
vayuhpivot


clim5pivot = pd.pivot_table(data=df, index=indexlist, values='clim_err_5', aggfunc=[mae])
clim5pivot.columns = list(map("_".join, clim5pivot.columns))
clim5pivot.rename(columns={"mae_clim_err_5":"5y Climatology MAE"}, inplace=True)
clim5pivot

clim28pivot = pd.pivot_table(data=df, index=indexlist, values='clim_err_28', aggfunc=[mae])
clim28pivot.columns = list(map("_".join, clim28pivot.columns))
clim28pivot.rename(columns={"mae_clim_err_28":"28y Climatology MAE"}, inplace=True)
clim28pivot

chartdata = pd.merge(vayuhpivot, clim5pivot, on=indexlist)
chartdata = pd.merge(chartdata, clim28pivot, on=indexlist)
chartdata['ml>clim5'] = chartdata['Vayuh MAE'] < chartdata['5y Climatology MAE']
chartdata['ml>clim28'] = chartdata['Vayuh MAE'] < chartdata['28y Climatology MAE']
chartdata['MAE Reduction 5'] = (chartdata['5y Climatology MAE'] - chartdata['Vayuh MAE'])/chartdata['5y Climatology MAE']
chartdata['MAE Reduction 28'] = (chartdata['28y Climatology MAE'] - chartdata['Vayuh MAE'])/chartdata['28y Climatology MAE']
chartdata

Unnamed: 0_level_0,Vayuh MAE,5y Climatology MAE,28y Climatology MAE,ml>clim5,ml>clim28,MAE Reduction 5,MAE Reduction 28
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alabama,1.31,2.521,1.512,True,True,0.480365,0.133598
Arizona,1.195,2.978,1.444,True,True,0.598724,0.172438
Arkansas,1.483,2.991,1.727,True,True,0.504179,0.141285
California,1.204,2.818,1.489,True,True,0.572747,0.191404
Colorado,1.409,3.167,1.657,True,True,0.555099,0.149668
Florida,1.005,1.814,1.149,True,True,0.445976,0.125326
Georgia,1.295,2.573,1.499,True,True,0.496696,0.136091
Idaho,1.492,3.662,1.732,True,True,0.592572,0.138568
Illinois,1.863,3.619,2.137,True,True,0.485217,0.128217
Indiana,1.852,3.447,2.07,True,True,0.462721,0.105314


In [17]:
#Climatology comparisons by Season
indexlist = ['season']

vayuhpivot = pd.pivot_table(data=df, index=indexlist, values='err', aggfunc=[mae])
vayuhpivot.columns = list(map("_".join, vayuhpivot.columns))
vayuhpivot.rename(columns={"mae_err":"Vayuh MAE"}, inplace=True)
vayuhpivot


clim5pivot = pd.pivot_table(data=df, index=indexlist, values='clim_err_5', aggfunc=[mae])
clim5pivot.columns = list(map("_".join, clim5pivot.columns))
clim5pivot.rename(columns={"mae_clim_err_5":"5y Climatology MAE"}, inplace=True)
clim5pivot

clim28pivot = pd.pivot_table(data=df, index=indexlist, values='clim_err_28', aggfunc=[mae])
clim28pivot.columns = list(map("_".join, clim28pivot.columns))
clim28pivot.rename(columns={"mae_clim_err_28":"28y Climatology MAE"}, inplace=True)
clim28pivot

chartdata2 = pd.merge(vayuhpivot, clim5pivot, on=indexlist)
chartdata2 = pd.merge(chartdata2, clim28pivot, on=indexlist)
chartdata2['ml>clim5'] = chartdata2['Vayuh MAE'] < chartdata2['5y Climatology MAE']
chartdata2['ml>clim28'] = chartdata2['Vayuh MAE'] < chartdata2['28y Climatology MAE']
chartdata2['MAE Reduction 5'] = (chartdata2['5y Climatology MAE'] - chartdata2['Vayuh MAE'])/chartdata2['5y Climatology MAE']
chartdata2['MAE Reduction 28'] = (chartdata2['28y Climatology MAE'] - chartdata2['Vayuh MAE'])/chartdata2['28y Climatology MAE']
chartdata2

Unnamed: 0_level_0,Vayuh MAE,5y Climatology MAE,28y Climatology MAE,ml>clim5,ml>clim28,MAE Reduction 5,MAE Reduction 28
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
fall,1.713,3.687,2.017,True,True,0.535395,0.150719
spring,1.362,3.609,1.639,True,True,0.62261,0.169005
summer,1.072,2.258,1.296,True,True,0.525244,0.17284
winter,2.077,3.794,2.448,True,True,0.452557,0.151552
