# Using the 20 "best" SA models, plot overall variability in melt data that we generated


In [None]:
from __future__ import print_function
%pylab notebook
# import datetime as dt
import glob
import matplotlib.pyplot as plt
#import matplotlib.dates as md
#from nose.tools import set_trace
import pandas as pd
import re
import os
import sys

# Make a plot of overall variability by basin and surface type

In [None]:
dir = "/Users/brodzik/projects/CHARIS/derived_hypsometries"
drainageIDs = ["IN_Hunza_at_DainyorBridge", 
               "AM_Vakhsh_at_Komsomolabad", 
               "SY_Naryn_at_NarynTown", 
               "GA_SaptaKosi_at_Chatara",
               "GA_Karnali_at_Benighat"]

alldf = pd.DataFrame([])

for drainageID in drainageIDs:
    file = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
    print("last20 file %s" % file, file=sys.stderr)
    df = pd.read_pickle(file)

    melt = df.copy()
    melt.drop(['Snow_on_land_min_ddf','Snow_on_land_max_ddf',
               'Snow_on_ice_min_ddf','Snow_on_ice_max_ddf',
               'Exposed_glacier_ice_min_ddf','Exposed_glacier_ice_max_ddf'], axis=1, inplace=True)
    # This idiotic step is necessary for seaborn to work in the plots
    melt["Snow_on_land_melt_km3"] = melt["Snow_on_land_melt_km3"].astype(float)
    melt["Snow_on_ice_melt_km3"] = melt["Snow_on_ice_melt_km3"].astype(float)
    melt["Exposed_glacier_ice_melt_km3"] = melt["Exposed_glacier_ice_melt_km3"].astype(float)

    alldf = alldf.append(melt)

In [None]:
alldf["ID"] = alldf.drainageID.str.extract(r"_(.+)_at")

In [None]:
alldf

In [None]:
alldf[['ID', 'year', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3', 'Snow_on_land_melt_km3']]

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(7,10))

alldf.boxplot(ax=axes[0],
              column='Snow_on_ice_melt_km3',
              by='ID',
              rot=0)
axes[0].set_title("Melt from Snow on Ice")

alldf.boxplot(ax=axes[1],
              column='Exposed_glacier_ice_melt_km3',
              by='ID',
              rot=0)
axes[1].set_title("Melt from Exposed Glacier Ice")

alldf.boxplot(ax=axes[2],
              column='Snow_on_land_melt_km3',
              by='ID',
              rot=0)
axes[2].set_title("Melt from Snow on Land")

for ax in axes:                  
    ax.set_ylabel('Melt ($km^3$)')

fig.suptitle("Variability in Melt from Best Models for last 20 cycles (2001-2014)")

fig.tight_layout()
fig.subplots_adjust(top=0.95)


In [None]:
fig, axes = plt.subplots(3, 1, figsize=(7,10))

order=['Naryn','Vakhsh','Hunza','Karnali','SaptaKosi']
axes[0] = sns.boxplot(ax=axes[0],
                      x='ID',
                      y='Snow_on_ice_melt_km3',
                      order=order,
                      color='Blue',
                      data=alldf)
axes[0].set_title("Melt from Snow on Ice")
axes[0].set_xlabel("")
axes[0].set_xticklabels([])

axes[1] = sns.boxplot(ax=axes[1],
                      x='ID',
                      y='Exposed_glacier_ice_melt_km3',
                      order=order,
                      color='purple',
                      data=alldf)
axes[1].set_title("Melt from Exposed Glacier Ice")
axes[1].set_xlabel("")
axes[1].set_xticklabels([])

axes[2] = sns.boxplot(ax=axes[2],
                      x='ID',
                      y='Snow_on_land_melt_km3',
                      order=order,
                      color='green',
                      data=alldf)
axes[2].set_title("Melt from Snow on Land")
axes[2].set_xticklabels(['Naryn (SY)','Vakhsh (AM)','Hunza (IN)','Karnali (GA)','SaptaKosi (BR)'])
axes[2].set_xlabel('Calibration Basin(Used for Major Basin)')

#ymax = 1.1 * alldf[['Snow_on_land_melt_km3', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3']].max().max()
for ax in axes:                  
    ax.set_ylabel('Melt ($km^3$)')
#    ax.set_ylim([0., ymax])
    

fig.suptitle("Variability in Melt from Best Models for last 20 cycles (2001-2014)")

fig.tight_layout()
fig.subplots_adjust(top=0.93)

# How to combine all 3 columns of data into a Single melt column with another column as label

In [None]:
alldf.iloc[0:3]

In [None]:
test = alldf.copy()
test

In [None]:
test.drop(['year','cycle','drainageID'],inplace=True,axis=1)
test

In [None]:
test.set_index('ID', inplace=True)

In [None]:
test

In [None]:
test = test.stack()

In [None]:
test.as_DataFrame()

In [None]:
fig, ax = plt.subplots(1)
b = sns.boxplot(x='ID'
                y='Snow_on_land_melt_km3',
                data=test)


In [None]:
alldf

In [None]:
fig, ax = plt.subplots(1)
b = sns.boxplot(x='ID',
                y='year',
                data=alldf)

In [None]:
tips

In [None]:
alldf.reindex(np.arange(1400))

In [None]:
1400 / 5


In [None]:
#alldf.drop(['Snow_on_land_melt_km3','Snow_on_ice_melt_km3'], inplace=True, axis=1)
test = pd.DataFrame([])
for i in np.arange(5):
    test = test.append(alldf.iloc[i * 280:(i*280)+3])
test


In [None]:
test.columns = ['year','value', 'ID']
test

In [None]:
d = {'year':[2001,2002,2003,2001,2002,2003],
     'value':[1., 1.5, 1.8, 2.3, 2.8, 2.8],
     'ID':['Hunza','Hunza','Hunza','Naryn','Naryn','Naryn']}
df = pd.DataFrame(data=d)
df

In [None]:
type(df['value'][0])

In [None]:
test['value'] = test['value'].astype(float)

In [None]:
test

In [None]:

fig, ax = plt.subplots(1)
b = sns.boxplot(x='ID',
                y='value',
                data=test)