## Advanced Visualization

In [None]:
# Let us load the data
import pandas as pd
data = pd.read_csv('data_obs_sims.csv')
data.head()

In [None]:
data.shape

In [None]:
# Let us drop any NaNs in the data
data.dropna(inplace=True)

In [None]:
# As an exercise, let us save the observation and simulations in separate dataframes
# Separating them is not required - we are doing it for the sake of this exercise
obs = pd.DataFrame(data['Obs'])
sim = data.iloc[:,1:]

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
# Uncomment the following line if seaborn is not already installed
# !conda install --yes seaborn
import seaborn as sns
sns.set(color_codes=True)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
plt.figure(figsize=(12,7))
plt.subplot(211)
sns.boxplot(data=obs,orient='h');
plt.subplot(212)
sns.distplot(obs,kde=True,bins=30);

In [None]:
import numpy as np
# Let us create a function
def histogram_boxplot(feature, figsize=(12,7), bins = None):
    f2, (ax_box2, ax_hist2) = plt.subplots(nrows = 2, # Number of rows of the subplot grid = 2
                                           sharex = True, # x-axis will be shared among all subplots
                                           gridspec_kw = {"height_ratios": (.5, .5)}, 
                                           figsize = figsize 
                                           ) # creating the 2 subplots
    sns.boxplot(feature, ax=ax_box2, showmeans=True) # boxplot will be created and a marker will indicate the mean value of the column
    sns.distplot(feature, kde=False, ax=ax_hist2, bins=bins) if bins else sns.distplot(feature, kde=True, ax=ax_hist2) # For histogram
    ax_hist2.axvline(np.mean(feature), linestyle='--') # Add mean to the histogram
    ax_hist2.axvline(np.median(feature), linestyle='-') # Add median to the histogram

In [None]:
histogram_boxplot(obs['Obs'])

In [None]:
plt.figure(figsize=(15,7))
sns.boxplot(data=sim, palette='viridis');

In [None]:
plt.figure(figsize=(12,7))
sns.boxplot(data=sim[['Sim1','Sim2','Sim3','Sim4','Sim5']],palette='viridis');

In [None]:
sim5 = sim[['Sim1','Sim2','Sim3','Sim4','Sim5']]
plt.figure(figsize=(10,7))
sns.heatmap(sim5.corr(),annot=True,cmap='viridis');

In [None]:
sim3 = sim[['Sim1','Sim2','Sim3']]
sns.pairplot(sim3);

In [None]:
from hydroeval import evaluator, nse
my_nse = evaluator(nse, sim, obs, axis=0);my_nse = my_nse.T;

In [None]:
my_nse = {'Simulation':['Sim1','Sim2','Sim3','Sim4','Sim5','Sim6','Sim7','Sim8','Sim9','Sim10',
    'Sim11','Sim12','Sim13','Sim14','Sim15','Sim16','Sim17','Sim18','Sim19','Sim20'],'NSE':my_nse}
my_nse = pd.DataFrame(data=my_nse)

In [None]:
plt.figure(figsize=(12,7))
sns.barplot(x='Simulation',y='NSE',data=my_nse,palette='viridis');