# Compare gridMET data and modeled input

### What this notebook does: 
- Compare the 2015 gridMET data pulled at TE locations (using TE_fetcher_parser_nearest_neighbor_Daily_gridMET_data) to 2015 input data from Harris and Diehl (2019)

### Why the comparison/ big picture goal: 
- Develop bias correction for gridMET data. Since 2015 input data from Harris and Diehl (2019) involved a lot of QA/QC and used climate stations, we will capitalize on that knowledge to better understand where the gridMET data might have issues. 

### Environment -- use 'ofp_env_upd2.yml' 
### will check more into contextily, maybe will update ofp_for_te, but for now kept separate

In [1]:
# step 0- import the needed packages
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

import geopandas as gpd
import contextily as ctx
import pandas as pd
import os
import numpy as np
from shapely.geometry import Point, polygon
from matplotlib.backends.backend_pdf import PdfPages

Populating the interactive namespace from numpy and matplotlib


In [2]:
# step 1- bring in df with lat longs of TE plants, gridMET data, 2015 model input data
TEdir = r'C:\WBEEP\Thermoelectric-master\Climate_data_fetcher'
TE_shp = gpd.read_file(os.path.join(TEdir, '..\GIS','2015_TE_Model_Estimates_lat.long_COMIDs.shp'))
GM_data = pd.read_csv(os.path.join(TEdir, 'interpTE_plants_w_2015_daily_gridMET.csv'))
input_2015 = pd.read_csv(os.path.join(TEdir, '..\TE_Harris_Diehl_2015', '2015_TE_input_data_AEG.csv'))
# Combined longterm pond net evap2.xlsx with 2015 input data

In [3]:
# step 2- alter format of input_2015 for easier comparison to GM_data

# remove duplicate plant EIAs and names, the duplicates are for different system types at the same location
# but not relevant for comparing climate data.Remove columns relating to CD and WT- don't have those variables to compare to
input_2015.drop_duplicates(subset = 'EIA_PLANT_ID', inplace=True)
input_2015.drop(columns = ['COUNTY', 'STATE', 'NAME_OF_WATER_SOURCE','COOLING_TYPE', 'MODEL_TYPE', 'PERCENT_CD_ALLOCATION', 
                        'POND_AREA','CD_Jan', 'CD_Feb', 'CD_Mar', 'CD_Apr', 'CD_May', 'CD_Jun', 'CD_Jul', 'CD_Aug',
                       'CD_Sep', 'CD_Oct', 'CD_Nov', 'CD_Dec','WT_Jan', 'WT_Feb', 'WT_Mar', 'WT_Apr', 'WT_May', 'WT_Jun', 'WT_Jul',
                       'WT_Aug', 'WT_Sep', 'WT_Oct', 'WT_Nov', 'WT_Dec'], inplace=True)

    

In [4]:
# step 2.1 make rows into columns for input_2015 data
plants = input_2015['EIA_PLANT_ID']
cols = ['DB','WB','WS','EV']

for i, EIA in enumerate(plants):
    temp = input_2015[input_2015['EIA_PLANT_ID'] == EIA]
    DB = temp.loc[:, 'DB_01':'DB_12']
    WB = temp.loc[:, 'WB_01':'WB_12']
    WS = temp.loc[:, 'WS_01':'WS_12']
    EV = temp.loc[:, 'EV_01':'EV_12']
    variables = [DB,WB,WS,EV]
    
    for j, var in enumerate(variables):
        df = var.T.reset_index()        
        df['index'] = df['index'].str[-2:]
        df.columns = ['MONTH', cols[j]]
        if j == 0:
            df1 = df.copy(deep=True)
        else: 
            df1 = pd.merge(df1, df, on = 'MONTH')
    
    df1['EIA_PLANT_'] =  EIA
    
    if i == 0:
        df2 = df1.copy(deep=True)
    else:
        df2 = df2.append(df1)


In [5]:
df2.head()

Unnamed: 0,MONTH,DB,WB,WS,EV,EIA_PLANT_
0,1,7.1,9.7,7.7,,3
1,2,6.5,9.1,8.7,,3
2,3,15.3,18.0,7.2,,3
3,4,19.1,21.5,6.6,,3
4,5,20.8,24.0,6.4,,3


In [6]:
GM_data

Unnamed: 0.1,Unnamed: 0,EIA_P_DATE,air_tmp_avg_C,open_wtr_et_mm,wb_tmp_C,wnd_spd_m_s
0,0,10003_2015-01-01,-8.872880,0.735000,-11.122612,1.758667
1,1,10003_2015-01-02,-3.619223,0.997136,-5.875960,2.600000
2,2,10003_2015-01-03,-7.759709,0.823536,-9.584805,3.350347
3,3,10003_2015-01-04,-6.184874,0.945000,-8.277121,2.800000
4,4,10003_2015-01-05,4.933631,2.674672,0.222053,4.765334
...,...,...,...,...,...,...
409525,409525,997_2015-12-27,2.797656,0.180086,2.067477,7.105083
409526,409526,997_2015-12-28,2.698279,1.680000,-0.114928,10.431935
409527,409527,997_2015-12-29,0.811083,0.525000,-0.383144,6.898362
409528,409528,997_2015-12-30,-0.774675,0.420000,-2.041145,2.671510


In [7]:
# step 2.2 a little more data manipulation, ordering of columns, unit conv
conv_mm=25.4 #inches to mm
conv_ms = 1/60/60*1609.34
#reorder columns
p_mod_input =df2.copy(deep=True)
p_mod_input = p_mod_input[['EIA_PLANT_','MONTH','DB', 'WB', 'WS', 'EV']]
#conv Evap from inches to mm
p_mod_input['EV'] = p_mod_input['EV']*conv_mm
#conv WS from mph to m/s
p_mod_input['WS'] = p_mod_input['WS']*conv_ms

#rename cols
cols = ['EIA_PLANT_','MONTH','DB_C', 'WB_C', 'WS_ms','EV_mm']
p_mod_input.columns = cols

In [8]:
# Step 3- calculate monthly means and medians from gridMET data
#GM_data['YEAR'] = GM_data['day'].astype(str).str[0:4]
#GM_data['MONTH'] = GM_data['day'].astype(str).str[5:7]

# interp format
GM_data['YEAR'] = GM_data['EIA_P_DATE'].astype(str).str[-10:-6]
GM_data['MONTH'] = GM_data['EIA_P_DATE'].astype(str).str[-5:-3]
GM_data['EIA_PLANT_'] = GM_data['EIA_P_DATE'].astype(str).str[0:-11]
GM_data['EIA_PLANT_'] = GM_data['EIA_PLANT_'].astype('int64')

In [9]:
GM_data.drop(columns = ['Unnamed: 0'], inplace=True) # also drops HI and AK plants, no gridMET data there to compare, so 1106 
#plants instead of 1122

In [10]:
# Step 3- calculate monthly means and medians from gridMET data
#GM_data['YEAR'] = GM_data['day'].astype(str).str[0:4]
#GM_data['MONTH'] = GM_data['day'].astype(str).str[5:7]

month_means = pd.pivot_table(GM_data, index = ['EIA_PLANT_','MONTH'], aggfunc=np.mean)
month_medians = pd.pivot_table(GM_data, index = ['EIA_PLANT_','MONTH'], aggfunc=np.median)
month_means.reset_index(inplace=True)
month_medians.reset_index(inplace=True)
#GM_data.drop(columns = ['Unnamed: 0'], inplace=True) ##not sure why I cant do this before the aggfunc, but if I do
# the number of plants reduces to 1106. Maybe there are some plants that are getting aggregated wrong. for now this is the 
# work aroudn to drop that column after


In [11]:
month_means.head()

Unnamed: 0,EIA_PLANT_,MONTH,air_tmp_avg_C,open_wtr_et_mm,wb_tmp_C,wnd_spd_m_s
0,3,1,8.420684,2.245942,5.040449,4.056752
1,3,2,7.559216,2.485715,4.222238,4.472853
2,3,3,16.939187,3.530295,12.979736,3.904093
3,3,4,20.560345,4.360121,16.793513,3.780933
4,3,5,23.277537,5.518137,19.388452,3.260868


In [12]:
# Step 4: combine data for comparison
# Adding a column called "EIA_P_DATE" which combines EIA PLANT and DATE 
month_means['EIA_P_DATE'] = month_means.apply(lambda row: str(row.EIA_PLANT_) + "_2015-" + str(row.MONTH), axis = 1)
month_medians['EIA_P_DATE'] = month_medians.apply(lambda row: str(row.EIA_PLANT_) + "_2015-" + str(row.MONTH), axis = 1)
p_mod_input['EIA_P_DATE'] = p_mod_input.apply(lambda row: str(row.EIA_PLANT_) + "_2015-" + str(row.MONTH), axis = 1)


In [13]:
month_means.shape

(13272, 7)

In [14]:
13272/1106

12.0

In [15]:
#rename cols
cols_mean =['EIA_PLANT_','MONTH','gm_DB_mean', 'gm_EV_mean', 'gm_WB_mean','gm_WS_mean','EIA_P_DATE']
month_means.columns = cols_mean
cols_median =['EIA_PLANT_','MONTH','gm_DB_med', 'gm_EV_med', 'gm_WB_med','gm_WS_med','EIA_P_DATE']
month_medians.columns = cols_median

#reorder cols
month_means = month_means[['EIA_P_DATE','EIA_PLANT_','MONTH','gm_DB_mean', 'gm_WB_mean', 'gm_WS_mean', 'gm_EV_mean']]
month_medians = month_medians[['EIA_P_DATE','EIA_PLANT_','MONTH','gm_DB_med', 'gm_WB_med', 'gm_WS_med', 'gm_EV_med']]
p_mod_input = p_mod_input[['EIA_P_DATE','EIA_PLANT_','MONTH', 'DB_C','WB_C','WS_ms','EV_mm']]


In [16]:
len(month_means) == 1106*12
len(month_medians) == 1106*12


True

In [17]:
#merge the dfs on EIA_P_DATE
mean_stats = pd.merge(month_means, p_mod_input, on = 'EIA_P_DATE')

In [18]:
#compute the obs-sim column
mean_stats['OBS_SIM_DB'] = mean_stats['DB_C'] - mean_stats['gm_DB_mean']
mean_stats['OBS_SIM_WB'] = mean_stats['WB_C'] - mean_stats['gm_WB_mean']
mean_stats['OBS_SIM_WS'] = mean_stats['WS_ms'] - mean_stats['gm_WS_mean']
mean_stats['OBS_SIM_EV'] = mean_stats['EV_mm'] - mean_stats['gm_EV_mean']

In [19]:
# compute the squared resid column
mean_stats['DB_res_sq'] = (mean_stats['OBS_SIM_DB'])**2
mean_stats['WB_res_sq'] = (mean_stats['OBS_SIM_WB'])**2
mean_stats['WS_res_sq'] = (mean_stats['OBS_SIM_WS'])**2
mean_stats['EV_res_sq'] = (mean_stats['OBS_SIM_EV'])**2

In [20]:
mean_stats.head()

Unnamed: 0,EIA_P_DATE,EIA_PLANT__x,MONTH_x,gm_DB_mean,gm_WB_mean,gm_WS_mean,gm_EV_mean,EIA_PLANT__y,MONTH_y,DB_C,...,WS_ms,EV_mm,OBS_SIM_DB,OBS_SIM_WB,OBS_SIM_WS,OBS_SIM_EV,DB_res_sq,WB_res_sq,WS_res_sq,EV_res_sq
0,3_2015-01,3,1,8.420684,5.040449,4.056752,2.245942,3,1,7.1,...,3.442199,,-1.320684,4.659551,-0.614552,,1.744207,21.711412,0.377675,
1,3_2015-02,3,2,7.559216,4.222238,4.472853,2.485715,3,2,6.5,...,3.889238,,-1.059216,4.877762,-0.583615,,1.121939,23.792558,0.340606,
2,3_2015-03,3,3,16.939187,12.979736,3.904093,3.530295,3,3,15.3,...,3.21868,,-1.639187,5.020264,-0.685413,,2.686934,25.203054,0.469791,
3,3_2015-04,3,4,20.560345,16.793513,3.780933,4.360121,3,4,19.1,...,2.950457,,-1.460345,4.706487,-0.830477,,2.132608,22.151017,0.689692,
4,3_2015-05,3,5,23.277537,19.388452,3.260868,5.518137,3,5,20.8,...,2.861049,,-2.477537,4.611548,-0.399819,,6.138191,21.266374,0.159855,


In [21]:
# fix up the df 
mean_stats.drop(columns= ['EIA_PLANT__y', 'MONTH_y'], inplace = True)

In [22]:
mean_stats.columns

Index(['EIA_P_DATE', 'EIA_PLANT__x', 'MONTH_x', 'gm_DB_mean', 'gm_WB_mean',
       'gm_WS_mean', 'gm_EV_mean', 'DB_C', 'WB_C', 'WS_ms', 'EV_mm',
       'OBS_SIM_DB', 'OBS_SIM_WB', 'OBS_SIM_WS', 'OBS_SIM_EV', 'DB_res_sq',
       'WB_res_sq', 'WS_res_sq', 'EV_res_sq'],
      dtype='object')

In [23]:
cols = ['EIA_P_DATE', 'EIA_PLANT_', 'MONTH', 'gm_DB_mean', 'gm_WB_mean',
       'gm_WS_mean', 'gm_EV_mean', 'DB_C', 'WB_C',
       'WS_ms', 'EV_mm', 'OBS_SIM_DB', 'OBS_SIM_WB',
       'OBS_SIM_WS', 'OBS_SIM_EV', 'DB_res_sq',
       'WB_res_sq', 'WS_res_sq', 'EV_res_sq']
mean_stats.columns = cols

In [24]:
# and add lat long for plotting
mean_plot = pd.merge(mean_stats, TE_shp, on = 'EIA_PLANT_')

In [25]:
mean_plot.head()
#mean_plot.fillna(-999)
#maybe dont need this


Unnamed: 0,EIA_P_DATE,EIA_PLANT_,MONTH,gm_DB_mean,gm_WB_mean,gm_WS_mean,gm_EV_mean,DB_C,WB_C,WS_ms,...,WATER_TYPE,WITHDRAWAL,CONSUMPTIO,MIN_WITHDR,MAX_WITHDR,MIN_CONSUM,MAX_CONSUM,NET_GENERA,why_no_CID,geometry
0,3_2015-01,3,1,8.420684,5.040449,4.056752,2.245942,7.1,9.7,3.442199,...,FR,412.0,7.93,189.0,2990.0,6.54,9.13,11387562,,POINT (-88.01089992 31.00696404)
1,3_2015-02,3,2,7.559216,4.222238,4.472853,2.485715,6.5,9.1,3.889238,...,FR,412.0,7.93,189.0,2990.0,6.54,9.13,11387562,,POINT (-88.01089992 31.00696404)
2,3_2015-03,3,3,16.939187,12.979736,3.904093,3.530295,15.3,18.0,3.21868,...,FR,412.0,7.93,189.0,2990.0,6.54,9.13,11387562,,POINT (-88.01089992 31.00696404)
3,3_2015-04,3,4,20.560345,16.793513,3.780933,4.360121,19.1,21.5,2.950457,...,FR,412.0,7.93,189.0,2990.0,6.54,9.13,11387562,,POINT (-88.01089992 31.00696404)
4,3_2015-05,3,5,23.277537,19.388452,3.260868,5.518137,20.8,24.0,2.861049,...,FR,412.0,7.93,189.0,2990.0,6.54,9.13,11387562,,POINT (-88.01089992 31.00696404)


In [26]:
mean_plot.drop(columns = ['NAME_OF_WA','COMID', 'COOLING_TY','GENERATION', 'WATER_SOUR', 'WATER_TYPE', 
                         'WITHDRAWAL', 'CONSUMPTIO','MIN_WITHDR', 'MAX_WITHDR', 'MIN_CONSUM', 'MAX_CONSUM', 
                         'NET_GENERA','geometry'], inplace =True)

In [None]:
mean_plot.to_csv('mean_plot_df.csv')

In [27]:
elev_df = pd.read_csv(os.path.join(TEdir, 'elev_df_interp.csv'))



In [28]:
mean_plot = pd.merge(mean_plot, elev_df, left_on = 'EIA_PLANT_', right_on = 'EIA_PLANT_ID')

In [29]:
mean_plot.columns

Index(['EIA_P_DATE', 'EIA_PLANT_', 'MONTH', 'gm_DB_mean', 'gm_WB_mean',
       'gm_WS_mean', 'gm_EV_mean', 'DB_C', 'WB_C', 'WS_ms', 'EV_mm',
       'OBS_SIM_DB', 'OBS_SIM_WB', 'OBS_SIM_WS', 'OBS_SIM_EV', 'DB_res_sq',
       'WB_res_sq', 'WS_res_sq', 'EV_res_sq', 'PLANT_NAME_x', 'COUNTY',
       'STATE', 'LATITUDE_x', 'LONGITUDE_x', 'why_no_CID', 'Unnamed: 0',
       'EIA_PLANT_ID', 'PLANT_NAME_y', 'LATITUDE_y', 'LONGITUDE_y',
       'ELEV_phys_mod', 'ELEV_gm', 'PM-gM'],
      dtype='object')

In [30]:
geometry = [Point(xy) for xy in zip(mean_plot['LONGITUDE_x'],mean_plot['LATITUDE_x'])]
geometry[:3]



[<shapely.geometry.point.Point at 0x224f749b2b0>,
 <shapely.geometry.point.Point at 0x224f749b748>,
 <shapely.geometry.point.Point at 0x224f749b358>]

In [31]:
mean_plot

Unnamed: 0.1,EIA_P_DATE,EIA_PLANT_,MONTH,gm_DB_mean,gm_WB_mean,gm_WS_mean,gm_EV_mean,DB_C,WB_C,WS_ms,...,LONGITUDE_x,why_no_CID,Unnamed: 0,EIA_PLANT_ID,PLANT_NAME_y,LATITUDE_y,LONGITUDE_y,ELEV_phys_mod,ELEV_gm,PM-gM
0,3_2015-01,3,01,8.420684,5.040449,4.056752,2.245942,7.1,9.7,3.442199,...,-88.010900,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158
1,3_2015-02,3,02,7.559216,4.222238,4.472853,2.485715,6.5,9.1,3.889238,...,-88.010900,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158
2,3_2015-03,3,03,16.939187,12.979736,3.904093,3.530295,15.3,18.0,3.218680,...,-88.010900,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158
3,3_2015-04,3,04,20.560345,16.793513,3.780933,4.360121,19.1,21.5,2.950457,...,-88.010900,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158
4,3_2015-05,3,05,23.277537,19.388452,3.260868,5.518137,20.8,24.0,2.861049,...,-88.010900,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13267,60100_2015-08,60100,08,26.847621,17.889834,1.732050,6.469384,17.5,27.7,3.397496,...,-119.419176,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808
13268,60100_2015-09,60100,09,24.765248,15.797080,1.758932,5.075685,15.6,25.7,2.726937,...,-119.419176,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808
13269,60100_2015-10,60100,10,20.919276,14.815301,1.904580,3.423831,15.3,21.7,2.190491,...,-119.419176,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808
13270,60100_2015-11,60100,11,10.042879,6.612147,1.916735,1.583513,7.9,11.0,1.832859,...,-119.419176,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808


In [32]:
crs = {'init': 'epsg:4326'}

In [33]:
#turn it into a pandas geodataframe
mean_df = gpd.GeoDataFrame(mean_plot, crs = crs, geometry= geometry)

In [34]:
mean_df.columns

Index(['EIA_P_DATE', 'EIA_PLANT_', 'MONTH', 'gm_DB_mean', 'gm_WB_mean',
       'gm_WS_mean', 'gm_EV_mean', 'DB_C', 'WB_C', 'WS_ms', 'EV_mm',
       'OBS_SIM_DB', 'OBS_SIM_WB', 'OBS_SIM_WS', 'OBS_SIM_EV', 'DB_res_sq',
       'WB_res_sq', 'WS_res_sq', 'EV_res_sq', 'PLANT_NAME_x', 'COUNTY',
       'STATE', 'LATITUDE_x', 'LONGITUDE_x', 'why_no_CID', 'Unnamed: 0',
       'EIA_PLANT_ID', 'PLANT_NAME_y', 'LATITUDE_y', 'LONGITUDE_y',
       'ELEV_phys_mod', 'ELEV_gm', 'PM-gM', 'geometry'],
      dtype='object')

In [35]:
mean_df


Unnamed: 0.1,EIA_P_DATE,EIA_PLANT_,MONTH,gm_DB_mean,gm_WB_mean,gm_WS_mean,gm_EV_mean,DB_C,WB_C,WS_ms,...,why_no_CID,Unnamed: 0,EIA_PLANT_ID,PLANT_NAME_y,LATITUDE_y,LONGITUDE_y,ELEV_phys_mod,ELEV_gm,PM-gM,geometry
0,3_2015-01,3,01,8.420684,5.040449,4.056752,2.245942,7.1,9.7,3.442199,...,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158,POINT (-88.01089992 31.00696404)
1,3_2015-02,3,02,7.559216,4.222238,4.472853,2.485715,6.5,9.1,3.889238,...,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158,POINT (-88.01089992 31.00696404)
2,3_2015-03,3,03,16.939187,12.979736,3.904093,3.530295,15.3,18.0,3.218680,...,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158,POINT (-88.01089992 31.00696404)
3,3_2015-04,3,04,20.560345,16.793513,3.780933,4.360121,19.1,21.5,2.950457,...,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158,POINT (-88.01089992 31.00696404)
4,3_2015-05,3,05,23.277537,19.388452,3.260868,5.518137,20.8,24.0,2.861049,...,,0,3,Barry,31.006964,-88.010900,4.5720,9.716158,-5.144158,POINT (-88.01089992 31.00696404)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13267,60100_2015-08,60100,08,26.847621,17.889834,1.732050,6.469384,17.5,27.7,3.397496,...,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808,POINT (-119.419176 36.571485)
13268,60100_2015-09,60100,09,24.765248,15.797080,1.758932,5.075685,15.6,25.7,2.726937,...,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808,POINT (-119.419176 36.571485)
13269,60100_2015-10,60100,10,20.919276,14.815301,1.904580,3.423831,15.3,21.7,2.190491,...,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808,POINT (-119.419176 36.571485)
13270,60100_2015-11,60100,11,10.042879,6.612147,1.916735,1.583513,7.9,11.0,1.832859,...,,1105,60100,Dinuba Energy,36.571485,-119.419176,104.2416,104.375408,-0.133808,POINT (-119.419176 36.571485)


In [37]:
np.linspace(1,12,12)
months = ['01','02','03','04', '05', '06', '07', '08', '09', '10', '11', '12']

In [38]:
rmse_dict = {'month': months,'DB': months, 'WB': months, 'WS': months, 'EV': months}


In [39]:
rmse_df = pd.DataFrame(data = rmse_dict)

In [40]:
rmse_df


Unnamed: 0,month,DB,WB,WS,EV
0,1,1,1,1,1
1,2,2,2,2,2
2,3,3,3,3,3
3,4,4,4,4,4
4,5,5,5,5,5
5,6,6,6,6,6
6,7,7,7,7,7
7,8,8,8,8,8
8,9,9,9,9,9
9,10,10,10,10,10


In [41]:
rmse_df.to_csv('rmse_df.csv')

In [45]:
monthly_df['DB_res_sq']



11        0.022718
23        0.986051
35        0.363851
47        1.064122
59        1.711327
           ...    
13223     4.894179
13235     0.069515
13247    71.588295
13259    71.585937
13271     1.190363
Name: DB_res_sq, Length: 1106, dtype: float64

In [46]:
np.mean(monthly_df[col_2])

4.788975541235927

In [48]:
np.mean(monthly_df[col_2])

4.788975541235927

In [49]:
np.sum(monthly_df[col_2])

5296.6069486069255

In [50]:
monthly_df[col_2]

11        0.022718
23        0.986051
35        0.363851
47        1.064122
59        1.711327
           ...    
13223     4.894179
13235     0.069515
13247    71.588295
13259    71.585937
13271     1.190363
Name: DB_res_sq, Length: 1106, dtype: float64

In [51]:
np.sqrt(np.mean(monthly_df[col_2]))

2.1883728067301345

In [52]:
np.sqrt(np.mean(np.sum(monthly_df[col_2])))

72.77779158924051

In [42]:
## for testing
#months = ['01']
params = ['DB']
units = ['deg_C']
# not enough memory to get through all of them, need to separate and do EV separate
#params = ['DB','WB','WS','EV']

#units = ['deg_C','deg_C','m_s','mm']

for p, param in enumerate(params):
    with PdfPages(param + units[p] +'.pdf') as pdf:
        for i, val in enumerate(months):
            if param == 'EV':
                monthly_df = mean_df[mean_df['MONTH']==val]
                monthly_df = monthly_df.dropna()
            else:
                monthly_df = mean_df[mean_df['MONTH']==val]
                
            #output shapefile
            monthly_df.to_file(os.path.join(TEdir,'..','GIS','Residuals', param, param+val+'.shp'))
            
            fig,(ax1,ax2) = plt.subplots(nrows = 2, figsize = (8,8))
            col_1 = 'OBS_SIM_'+ param                             
            col_2 = param + '_res_sq'
            RMSE = np.sqrt(np.mean(np.sum(monthly_df[col_2])))
            rmse_df.loc[i,param] = RMSE
            ax1.set_title('OBS-SIM_'+ val +'_' + param +'_'+ units[p] )           
            monthly_df.plot(ax = ax1, column = col_1, legend = True, cmap = 'gist_rainbow')
            ctx.add_basemap(ax1, crs = crs, source = ctx.providers.OpenTopoMap)
            
            ax2.set_title('OBS-SIM'+ '_' + val+ '_'+ param + '_'+units[p]+ ', RMSE = ' + str(np.round(RMSE,2)))
            ax2.hist(monthly_df[col_1])
            pdf.savefig()
            plt.clf()
            
#             #x = np.linspace(0,monthly_df[col_1].max())
            plt.figure(figsize = (8,8))
#             ax1 = f.add_subplot(211, aspect = 'equal')
#             ax1 = f.add_subplot(212, aspect = 'equal')
#             fig, (ax1, ax2) = plt.subplots(nrows = 2, figsize = (8,8))
            plt.title('Elev residuals vs ' + col_1 + ' residuals-' + 'month_'+ val)
            plt.scatter(monthly_df['PM-gM'], monthly_df[col_1])
#            ax1.scatter((monthly_df['PM - gM']/np.mean(monthly_df['PM - gM'])), 
                          #(monthly_df[col_1]/np.mean(monthly_df[col_1])))
            plt.xlabel('elev_residuals_PM-gM')
            plt.ylabel(col_1)
            
#             ax2.set_title('zero in on large diff elevs')
#             large_diffs = monthly_df[np.absolute(monthly_df['PM - gM'])>10]
#             ax2.scatter(large_diffs['PM - gM'], large_diffs[col_1])
            
            pdf.savefig()
            plt.clf()
            
        plt.figure(figsize = (8,8))    
        plt.plot(rmse_df['month'],rmse_df[param])
        plt.title('RMSE_' + str(param))
        plt.xlabel('month')
        plt.ylabel('RMSE')
        pdf.savefig()
        plt.clf()



<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

<Figure size 576x576 with 0 Axes>

In [None]:
## for testing
#months = ['01']
params = ['WB']
units = ['deg_C']
# not enough memory to get through all of them, need to separate and do EV separate
#params = ['DB','WB','WS','EV']

#units = ['deg_C','deg_C','m_s','mm']

for p, param in enumerate(params):
    with PdfPages(param + units[p] +'orig_abs'+'.pdf') as pdf:
        for i, val in enumerate(months):
                monthly_df = mean_df[mean_df['MONTH']==val]
                
            #output shapefile
            #monthly_df.to_file(os.path.join(TEdir,'..','GIS','Residuals', param, param+val+'.shp'))
            
                fig, ax1 = plt.subplots(nrows = 1, figsize = (8,8))
                col_1 = 'gm_'+ param + '_mean'                             
                col_2 = param + '_C'

                ax1.set_title('abs_'+ val +'_' + param +'_'+ units[p] )           
                monthly_df.plot(ax = ax1, column = col_1, legend = True, cmap = 'gist_rainbow')
                ctx.add_basemap(ax1, crs = crs, source = ctx.providers.OpenTopoMap)
                pdf.savefig()
                plt.clf()

In [None]:
large_diffs = monthly_df[np.absolute(monthly_df['PM - gM'])>10]

In [None]:
rmse_df

In [None]:
monthly_df.to_file(os.path.join(TEdir,'..','GIS','Residuals', param, param+val+'.shp'))

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows = 2, figsize = (8,8), sharex = True, sharey= True)
ax1.set_title('RMSE_'+ val + '_' + param + '_'+ units[p])
col_0 = 'RMSE_'+ param
monthly_df.plot(ax = ax1, column = col_0, legend = True)
ctx.add_basemap(ax1, crs = crs, source = ctx.providers.OpenTopoMap)
ax2.set_title('OBS-SIM_'+ val +'_' + param +'_'+ units[p])
col_1 = 'OBS_SIM_'+ param
monthly_df.plot(ax = ax2, column = col_1, legend = True)
ctx.add_basemap(ax2, crs = crs, source = ctx.providers.OpenTopoMap)

In [None]:
ctx.providers.keys()

In [None]:
monthly_df = monthly_df.dropna(inplace =True)

In [None]:
with PdfPages('January.pdf') as pdf:
    fig, (ax1, ax2) = plt.subplots(nrows = 2, sharex = True, sharey= True)
    ax1.set_title('RMSE')
    monthly_df.plot(ax = ax1, column = 'RMSE_DB', legend = True)
    ax2.set_title('OBS-SIM')
    monthly_df.plot(ax = ax2, column = 'OBS_SIM_DB', legend = True)

    pdf.savefig()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows = 2)
ax1.set_title('RMSE')
ax1.hist(jan_df['RMSE_DB'])
ax2.set_title('OBS-SIM')
ax2.hist(jan_df['OBS_SIM'])


In [None]:
for_plot_jan.to_csv('RMSE_Jan.csv')

In [None]:
for_plot_feb = for_plot[for_plot['MONTH']=='02']

In [None]:
for_plot_jan.to_csv('RMSE_Jan.csv')

In [None]:
range_pm = np.max(for_plot_jan['DB_C'])-np.min(for_plot_jan['DB_C'])
range_gm = np.max(for_plot_jan['gm_DB_mean'])-np.min(for_plot_jan['gm_DB_mean'])

In [None]:
range_gm

In [None]:
# Identify and analyze these fits. Are the trends regional? correlations with elevation? 

In [None]:
# Calculate a bias correction for the gridMET data
# underlying assumption is that the 2015 input data is "true". Any caveats here? Although 2015 input data was rigorously
# examined, there are more temporal data with gridMET, for which there might be some value. 