In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab as pl
%matplotlib inline
from scipy.stats import kde
import seaborn as sns
import gdal
import os

print('All packages imported succesfully')

# Define the year
year = '2018'
#year = '2017'

# Make some strings and some directories to save things in
str_ECDF = 'ECDF_'+str(year)+'_calibrated'
os.mkdir('../results/'+str_ECDF)
str_histogram = 'histogram_'+str(year)+'_calibrated'
os.mkdir('../results/'+str_histogram)
str_textfiles = 'textfiles_'+str(year)+'_calibrated'
os.mkdir('../results/'+str_textfiles)
str_topo = 'topo_2DH_'+str(year)+'_calibrated'
os.mkdir('../results/'+str_topo)
str_stats = 'stats_'+str(year)+'_calibrated'
# The next line will need to be commented out if the script has been run more than once
os.mkdir('../results/stats')

# Define the rank from the input filename
run = ['r16_1', 'r16_2','r16_3','r16_4','r16_5']

# Define which files you want to print out from the rank list above, accessed by position in the list.
allfiles = range(0,5)

# Define the limits for the topo 2DH figs
xlim_yearly=(0,6)


In [None]:
def datasets(rank,year):
    
    #########################################
    # Bring in all the geotiff data, reshape and remove some types of grid cells
    rs_data = gdal.Open('../data/aligned_resampled_'+str(year)+'_calibrated/rs_interpolated_'+str(year)+'_resample.tif')
    wiAssim_data = gdal.Open('../data/aligned_resampled_'+str(year)+'_calibrated/snowd_wiAssim_'+rank+'_'+str(year)+'_resample.tif')
    woAssim_data = gdal.Open('../data/aligned_resampled_'+str(year)+'_calibrated/snowd_woAssim_'+str(year)+'_resample.tif')

    ### Bring in the remote sensing geotiff as a numpy array, and then data frame
    rs_np = np.array(rs_data.GetRasterBand(1).ReadAsArray())
    rs_df = pd.DataFrame(rs_np)
    rs = rs_df.values.flatten().tolist()
    #print(len(rs))

    # ### Bring in the wiAssim geotiff as a numpy array, and then data frame
    wiAssim_np = np.array(wiAssim_data.GetRasterBand(1).ReadAsArray())
    wiAssim_df = pd.DataFrame(wiAssim_np)
    wiAssim = wiAssim_df.values.flatten().tolist()
    #print(len(wiAssim))

    # ### Bring in the woAssim geotiff as a numpy array, and then data frame
    woAssim_np = np.array(woAssim_data.GetRasterBand(1).ReadAsArray())
    woAssim_df = pd.DataFrame(woAssim_np)
    woAssim = woAssim_df.values.flatten().tolist()
    #print(len(woAssim))

    ### Bring in the model veg file geotiff as a data frame
    veg_geo = gdal.Open('../data/topo_veg/tp_veg_resample_'+str(year)+'.tif')
    veg_np = np.array(veg_geo.GetRasterBand(1).ReadAsArray())
    veg_df = pd.DataFrame(veg_np)
    veg = veg_df.values.flatten().tolist()
    #print(len(veg))

    ### Bring in the model DEM file geotiff as a data frame
    topo_geo = gdal.Open('../data/topo_veg/tp_topo_resample_'+str(year)+'.tif')
    topo_np = np.array(topo_geo.GetRasterBand(1).ReadAsArray())
    topo_df = pd.DataFrame(topo_np)
    topo = topo_df.values.flatten().tolist()
    #print(len(veg))

    # Create a dataframe from the multiple lists 
    # Name the column headers and column contents
    dataset = pd.DataFrame({'rs': rs,'wiAssim': wiAssim,'woAssim': woAssim,'veg': veg,'topo': topo})
    #print(len(data_2017))

    ## Remove the negative and no-data values
    dataset_rzero = dataset[dataset.rs >= 0]
    print(len(dataset_rzero),'# of  '+str(year)+' Grid cells from '+rank+' simulation that are greater than zero')

    # Remove glacier cells
    dataset_rg = dataset_rzero[dataset_rzero.veg != 20]
    print(len(dataset_rg),'# of  '+str(year)+' Grid cells from '+rank+' simulation without glacier cell values')

    # Remove the high end rs values
    data = dataset_rg[dataset_rg.rs <=10]
    print(len(data),'# of  '+str(year)+' Grid cells from '+rank+' simulation without extreme remote sensing values')
    
    return data 


In [None]:
def rmse(predictions, targets):
    differences = predictions - targets 
    differences_squared = differences ** 2          
    mean_of_differences_squared = differences_squared.mean()
    rmse_val = np.sqrt(mean_of_differences_squared) 
    return rmse_val   

In [None]:
def RMSE_wiAssim(data):
    
    # Run the RMSE function on the wiAssim dataset
    predictions_wi = data.wiAssim
    targets = data.rs

    rmse_values_wi = rmse(predictions_wi,targets)
    return rmse_values_wi

In [None]:
def RMSE_woAssim(data):
    
    # Run the RMSE function on the woAssim dataset
    predictions_wo = data.woAssim
    targets = data.rs

    rmse_values_wo = rmse(predictions_wo,targets)
    return rmse_values_wo

In [None]:
def histogram_plot(data):
    
    sns.set(style="white", palette="muted", color_codes=True)
    sns.set_context("notebook",font_scale=1.5, rc={"lines.linewidth": 2.5})

    # RS Dataset
    RS = data.rs
    rs_sd = np.std(RS)
    rs_m = np.median(RS)
    rs_max = np.max(RS)
    bins_rs = int((rs_max/rs_max)*100)
    print(bins_rs,'nbins RS')
    print(round(rs_m,3), 'RS Median')
    print(rs_max,'RS Max')

    # woAssim
    woAssim = data.woAssim
    wo_sd = np.std(woAssim)
    wo_m = np.median(woAssim)
    wo_max = np.max(woAssim)
    bins_wo = int((wo_max/rs_max)*100)
    print(bins_wo,'nbins No CSO')
    print(round(wo_m,3), 'Median No CSO')
    print(wo_max,'Max No CSO')

    # wiAssim
    wiAssim = data.wiAssim
    wi_sd = np.std(wiAssim)
    wi_m = np.median(wiAssim)
    wi_max = np.max(wiAssim)
    bins_wi = int((wi_max/rs_max)*100)
    print(bins_wi, 'nbins With CSO')
    print(round(wi_m,3), 'Median With CSO')
    print(wi_max, 'Max With CSO')

    # Plot a simple histogram with binsize determined automatically
    sns.distplot(RS, kde=False, color="slategray", bins=bins_rs, label='RS Dataset')

    # Plot a simple histogram with binsize determined automatically
    sns.distplot(woAssim, kde=False, color="plum", bins=bins_wo, label='NoAssim')

    # Plot a simple histogram with binsize determined automatically
    sns.distplot(wiAssim, kde=False, color="steelblue", bins=bins_wi, label='Best CSO')



    plt.xlim(0,8)
    plt.tick_params(labelsize=16)
    plt.xlabel('Snow Depth (m)', fontsize=18)
    plt.ylabel('Count', fontsize=18)
    #plt.title('LiDAR vs SnowModel Histograms',fontsize=22)
    plt.axvline(rs_m, color='slategray', linestyle='dashed',label='RS Median')
    plt.axvline(wo_m, color='plum', linestyle='dashed', label='NoAssim Median')
    plt.axvline(wi_m, color='steelblue', linestyle='dashed', label='Best CSO Median')
    plt.legend(fontsize=14)
    plt.tight_layout()
    plt.savefig('../results/'+str_histogram+'/histogram_'+rank+'_'+str(year)+'_wiAssim.png',dpi=400)
    plt.close('all')

In [None]:
def ECDFplot(data):

    from statsmodels.distributions.empirical_distribution import ECDF
    RS = ECDF(data.rs)
    woAssim = ECDF(data.woAssim)
    wiAssim = ECDF(data.wiAssim)
    # rs dataset
    rs = data.rs
    rs_sd = np.std(rs)
    rs_m = np.mean(rs)
    rs_me = np.median(rs)
    # woAssim
    wo = data.woAssim
    wo_sd = np.std(wo)
    wo_m = np.mean(wo)
    wo_me = np.median(wo)
    # wiAssim
    wi =  data.wiAssim
    wi_sd = np.std(wi)
    wi_m = np.mean(wi)
    wi_me = np.median(wi)

    plt.xlim(0,8)
    plt.xlabel('Snow Depth (m)', fontsize=18)
    plt.plot(RS.x,RS.y, color='slategray',linewidth=3.0,linestyle='dashed', label='RS')
    plt.plot(woAssim.x,woAssim.y, color='plum',linewidth=3.0,label='NoAssim')
    plt.plot(wiAssim.x,wiAssim.y, color='steelblue',linewidth=3.0, label='Best CSO')
    plt.legend()
    plt.tight_layout()
    plt.savefig('../results/'+str_ECDF+'/ECDF_'+rank+'_'+str(year)+'_wiAssim.png', dpi= 400)
    plt.close('all')
    return rs_me,wo_me,wi_me,rs_m,wo_m,wi_m

In [None]:
def KSstat(data):

    #  Generate Kolmogorov-Smirnov Stat
    rs = data.rs
    wo = data.woAssim
    wi = data.wiAssim

    print(stats.ks_2samp(rs, wo),'KS Stat for RS to No Assimilation')
    print(stats.ks_2samp(rs, wi),'KS Stat for RS to CSO Assimilation')

    ks_wo = stats.ks_2samp(rs, wo)
    ks_wi = stats.ks_2samp(rs, wi)
    return ks_wo, ks_wi

In [None]:
def write_text(rank,year):

    # Write some things to text file
    path = '../results/'+str_textfiles+'/woAssim_'+rank+'_'+str(year)+'_info.txt'
    file = open(path,'w')
    file.write('Variables of interest for'+rank+'_'+str(year)+'\n')
    file.write('The RMSE (m) for wiAssim = '+str(round(rmse_values_wi,3))+'\n')
    file.write('The RMSE (m) for woAssim = '+str(round(rmse_values_wo,3))+'\n')
    file.write('The RS median value = '+str(round(rs_me,3))+'\n')
    file.write('The woAssim median value = '+str(round(wo_me,3))+'\n')
    file.write('The wiAssim median value = '+str(round(wi_me,3))+'\n')
    file.write('The RS mean value = '+str(round(rs_m,3))+'\n')
    file.write('The woAssim mean value = '+str(round(wo_m,3))+'\n')
    file.write('The wiAssim mean value = '+str(round(wi_m,3))+'\n')
    file.write('The KS value for wiAssim = '+str(ks_wi)+'\n')
    file.write('The KS value for woAssim = '+str(ks_wo)+'\n')
    file.close()


In [None]:
def wiAssim_2dh(data):
    sns.set(style="white", palette="muted", color_codes=True)
    sns.set_context("notebook",font_scale=1.5, rc={"lines.linewidth": 2.5})

    wiAssim = data.wiAssim
    topo = data.topo

#     sns.scatterplot(x=wiAssim, y=topo, color='steelblue')
#     plt.ylabel('Elevation (m)', fontsize=18)
#     plt.xlabel('Snow Depth (m) ', fontsize=18)
#     plt.tight_layout()
#     #plt.title('NoAssim '+str(year))
#     plt.savefig('results/hypsometry/scatter_'+rank+'_'+str(year)+'_wiAssim.png',dpi=400)
    
    g = sns.jointplot(wiAssim, topo, kind='kde', height=8, space=0, xlim=xlim_yearly, ylim=(0, 2000), color='steelblue')
    g.set_axis_labels('Modeled Snow Depth (m)', 'Terrain DEM (m)')
    # Make sure to include the best/med/ninety in the title
    g.savefig('../results/'+str_topo+'/'+rank+'_'+str_topo+'.png',dpi=400)
    
    plt.close('all')


In [None]:
def RS_2dh(data):
    sns.set(style="white", palette="muted", color_codes=True)
    sns.set_context("notebook",font_scale=1.5, rc={"lines.linewidth": 2.5})

    rs = data.rs
    topo = data.topo

#     sns.scatterplot(x=rs, y=topo, color='slategray')
#     plt.ylabel('Elevation (m)', fontsize=18)
#     plt.xlabel('Snow Depth (m) ', fontsize=18)
#     plt.tight_layout()
#     #plt.title('RS '+str(year))
#     plt.savefig('results/hypsometry/scatterRS_'+rank+'_'+str(year)+'.png',dpi=400)
    
    g = sns.jointplot(rs, topo, kind='kde', height=8, space=0, xlim=xlim_yearly, ylim=(0, 2000), color='slategray')
    g.set_axis_labels('Snow Depth (m) RS', 'Terrain DEM (m)')
    # Make sure to include the best/med/ninety in the title
    g.savefig('../results/'+str_topo+'/RS_'+str_topo+'.png',dpi=400)
    plt.close('all')


In [None]:
def woAssim_2dh(data):
    sns.set(style="white", palette="muted", color_codes=True)
    sns.set_context("notebook",font_scale=1.5, rc={"lines.linewidth": 2.5})

    woAssim = data.woAssim
    topo = data.topo

#     sns.scatterplot(x=woAssim, y=topo, color='plum')
#     plt.ylabel('Elevation (m)', fontsize=18)
#     plt.xlabel('Snow Depth (m) ', fontsize=18)
#     plt.tight_layout()
#     #plt.title('NoAssim '+str(year))
#     plt.savefig('results/hypsometry/scatter_'+rank+'_'+str(year)+'_woAssim.png',dpi=400)
    
    g = sns.jointplot(woAssim, topo, kind='kde', height=8, space=0, xlim=xlim_yearly, ylim=(0, 2000), color='plum')
    g.set_axis_labels('Modeled Snow Depth (m)', 'Terrain DEM (m)')
    # Make sure to include the best/med/ninety in the title
    g.savefig('../results/'+str_topo+'/woAssim_'+str_topo+'.png',dpi=400)
    
    plt.close('all')


In [None]:
# For loop 

# Run the for loop, chooses the input files, timeslices and links to the functions
for i in allfiles:
    
    # Pull in the datasets
    rank = run[i]
    print('Working on run '+rank+' remote sensing analysis')
    data = datasets(run[i],year)
    rmse_values_wi = RMSE_wiAssim(data)
    rmse_values_wo = RMSE_woAssim(data)
    print('Working on run '+rank+' Histogram')
    histogram_plot(data)
    print('Working on run '+rank+' other stats')
    wiAssim_2dh(data)
    rs_me,wo_me,wi_me,rs_m,wo_m,wi_m = ECDFplot(data)
    ks_wo,ks_wi = KSstat(data)
    write_text(run[i],year)

RS_2dh(data)
woAssim_2dh(data)

print('All RS plots created successfully')

In [None]:
text_path = '../results/'+str_textfiles

# Create of list the names of all text files in the directory path (defined above).
text_dir = os.listdir(text_path)
print(text_dir)

column_names = ["name", "rmse", "ks_stat","median","mean"]
df = pd.DataFrame()

# Run the for loop, chooses the input files, timeslices and links to the functions
for i in allfiles:
    
    with open (text_path+'/'+text_dir[i], 'rt') as myfile: 
        line = myfile.readlines()
        name = line[0]
        name_run = name[25:-6]
        print(name_run)
        rmse = line[1]
        rmse_value = rmse[27:-1]
        print(rmse_value)
        ks_stat = line[9]
        ks_value = ks_stat[52:59]
        print(ks_value)
        median = line[5]
        med = median[27:-1]
        mean = line[8]
        me = mean[25:-1]
        mylist = [name_run, rmse_value, ks_value,med,me]
        data = pd.DataFrame([mylist], columns = column_names)
        df = pd.concat([df,data], ignore_index=True)
        print(df)


df.to_csv('../results/stats/'+str_stats+'.csv')