In [3]:
###############   Select Obs for SnowAssim   #########################

# This script takes in the entire dataset of observations from the Thompson Pass 
# modeling domain, after some preprocessing steps. 

# Inputs: a modified CSV of all of the yearly observations, originally downloaded from CSO website.
# Outputs: a series of .dat files that are formatted to be the input for SnowAssim 

# Import some things
import pandas as pd
import numpy as np
from scipy import stats
import math
from math import pi
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab as pl
%matplotlib inline
import random
from random import sample

print('All modules and packages successfully loaded.')

# Bring in the CSV as a data frame
# NOTE: these documents have a lot of analysis added to them, between the original download from the
# CSO website and the working copy. Things added are 
obs_tp_2018 = pd.read_csv('csvs/2018_obs_TP_working.csv', encoding = "ISO-8859-1")
obs_tp_2017 = pd.read_csv('csvs/2017_obs_TP_working.csv', encoding = "ISO-8859-1")

########################################################
######### USER INPUT REQUIRED  ######################
########################################################

# Choose the water year for the analysis
df = obs_tp_2017

# Change date here to match the year above
y2 = 2017
y1 = y2 - 1

########################################################
############    END USER INPUT      ######################
########################################################

# Subset by seasons/time, elevation, etc.
# These subsets are specific to the domain and types of 
# CSO observations that were submitted. 
early = df[df.Y == y1]
late = df[df.Y == y2]
dec = df[df.M == 12]
jan = df[df.M == 1]
feb = df[df.M == 2]
mar = df[df.M == 3]
apr = df[df.M == 4]
may = df[df.M == 5]
jun = df[df.M == 6]
winter = df[df.M.gt(0) & df.M.lt(5)]
spring = df[df.M.gt(3) & df.M.lt(6)]
peak = late[(late.M.gt(3) & late.D.gt(14) | late.M.gt(4))]

# Subset by elevation bands
high_elev = df[df.elevation >= 1000]
low_elev = df[df.elevation <= 1000]
elev_band_0250 = df[df.elevation <= 250]
elev_band_0500 = df[df.elevation.lt(500) & df.elevation.gt(251)]
elev_band_0750 = df[df.elevation.lt(750) & df.elevation.gt(501)]
elev_band_1000 = df[df.elevation.lt(1000) & df.elevation.gt(751)]
elev_band_1250 = df[df.elevation.lt(1250) & df.elevation.gt(1001)]
elev_band_gt1250 = df[df.elevation.gt(1250)]

spring_lt1000 = df[df.M.gt(3) & df.M.lt(6) & df.elevation.lt(1000)]
spring_gt1000 = df[df.M.gt(3) & df.M.lt(6) & df.elevation.gt(1000)]

# Sort for types of ordering, ascending by elevation and date
asc_elev = df.sort_values(by=['elevation'])
asc_date = df.sort_values(by=['Y','M','D'])

# Print some details about the data set in all of the categories
print((len(high_elev)),'=high')
print((len(low_elev)),'=low')
print((len(elev_band_0250)), '=band_0250')
print((len(elev_band_0500)), '=band_0500')
print((len(elev_band_0750)), '=band_0750')
print((len(elev_band_1000)), '=band_1000')
print((len(elev_band_1250)), '=band_1250')
print((len(elev_band_gt1250)), '=band_gt1250')
print((len(early)),'=early')
print((len(late)),'=late')
print((len(winter)),'=winter')
print((len(spring)),'=spring')
print((len(peak)),'=peak')
print((len(spring_lt1000)),'=sp1000')
print((len(spring_gt1000)),'=spgt1000')
print((len(apr)),'=apr')
print((len(may)),'=may')

All modules and packages successfully loaded.
40 =high
402 =low
9 =band_0250
4 =band_0500
67 =band_0750
314 =band_1000
15 =band_1250
25 =band_gt1250
0 =early
442 =late
396 =winter
232 =spring
210 =peak
206 =sp1000
26 =spgt1000
186 =apr
46 =may


In [4]:
# This is the loop that randomly samples the previously subsetted dataset, and chooses
# a smaller subset of those data to create a csv for the assimilation input.
# OUTPUT: a separate .dat file for each of the new, randomly selected swe values/locations within the subsets.
# These files are in the exact format required for SnowAssim inputs.

########################################################
######### USER INPUT REQUIRED  ######################
########################################################
# Num_runs is the number of times the random sampling of the subsetted data is repeated.
num_runs = 10

# Num_obs is the number of CSO obs to choose within the subset
num_obs = 16

# This is the subset (defined in the cell above) we that we want to randomly sample.
subset_df = peak

########################################################
############    END USER INPUT      ######################
########################################################

# This creates a list from the index of the subset. 
sublist = subset_df.index.tolist()

# Uncomment to learn some things about the sublist
# print(sublist)
# first = sublist[0]
# print(first, 'subset first index')
# num = len(sublist)-1
# print(num, 'length of sublist')
# last = sublist[num]
# print(last,'subset last index')

# This takes the num and increments from 1 to num
for i in range(1,num_runs+1):
    # Chooses a random set of row id numbers from the index of the dataframe (subset_df)
    rand_rows = random.sample(sublist, num_obs)
    # Create a new subsetted dataframe with only the new randomly chosen rows
    new_df = subset_df.loc[rand_rows].sort_values(by=['Y','M','D']).round({'albersX': 3,'albersY': 3,'SWE_hill':3,'SWE_sturm':3})
    # Group the data using the date_agg column 
    dates = new_df.groupby(by='date_agg2')
    # Count the number of different dates from the random set
    num_dates = new_df.groupby(by='date_agg2').size()
    
    # Open a file and print the data to the file
    #OUTPUT of for loop = a set of input files for SnowAssim, with randomly sampled rows from the subset df
    # SWE values using Hill et al. 2019
    with open('subset'+str(num_obs)+'_hill_'+str(i)+'.dat','w') as file1:
        file1.write(str(len(num_dates.index))+'\n')
        for j in range(0,len(num_dates)):
            temp = new_df[new_df.date_agg2==num_dates.index[j]]
            # Access the SWE values estimated by the Hill method.
            condense_hill = temp[['albersX','albersY','SWE_hill']].sort_index()
            file1.write(str(num_dates.index[j])+'\n')
            file1.write(str(num_dates[j])+'\n')
            file1.write(condense_hill.to_string(header=False)+'\n') 
    file1.close()
    
    # Open a file and print the data to the file
    #OUTPUT of for loop = a set of input files for SnowAssim, with randomly sampled rows from the subset df
    # SWE values using Sturm et al. 2010
    with open('subset'+str(num_obs)+'_sturm_'+str(i)+'.dat','w') as f:
        line0 =str(len(num_dates.index))+'\n'
        f.write(line0)
        for k in range(0,len(num_dates)):
            line1 = str(num_dates.index[k])+'\n'
            line2 = str(num_dates[k])+'\n'
            temp = new_df[new_df.date_agg2==num_dates.index[k]]
            # Access the SWE values estimated by the Sturm method.
            condense_sturm = temp[['albersX','albersY','SWE_sturm']].sort_index()
            f.write(line1)
            f.write(line2)
            f.write(condense_sturm.to_string(header=False)+'\n') 
    f.close()
    
print('Files successfully created')

Files successfully created
