In [30]:
import pandas as pd
import pickle
from datetime import datetime
import numpy as np
import logging

In [31]:
def open_dict(input_file):
    with open(input_file, 'rb') as f:
        return pickle.load(f)

In [32]:
def get_dates(data_dict):
    print('Fetching dates...')
    dates = set()
    for region in data_dict.keys():
        for sub_region in data_dict[region].keys():
            for resort in data_dict[region][sub_region].keys():
                years = data_dict[region][sub_region][resort]
                for year in years.keys():
                    nanless_dates = filter(lambda v: v==v, set(years[year]['dates']))
                    dates = dates.union(nanless_dates)
    return sorted(list(dates))

In [33]:
def df_shells(dates):
    cols = ['Region', 'Sub-Region', 'Resort'] + dates
    sf_df = pd.DataFrame(columns=cols)
    sd_df = pd.DataFrame(columns=cols)
    return sf_df, sd_df

In [34]:
def date_dict_shell(dates):
    return {k: np.nan for k in dates}

In [35]:
def name_shell(date_shell, region, sub_region, resort):
    sf_shell = date_shell.copy()
    sf_shell['Region'] = region
    sf_shell['Sub-Region'] = sub_region[0]
    sf_shell['Resort'] = resort[0]
    
    sd_shell = sf_shell.copy()
    
    return sf_shell, sd_shell

In [36]:
def pop_dfs(sf_df, sd_df, data_dict, date_shell):
    print('Populating dataframe...')
    i = 0
    for region in data_dict.keys():
        for sub_region in data_dict[region].keys():
            for resort in data_dict[region][sub_region].keys():
                sf_shell, sd_shell = name_shell(date_shell, region, sub_region, resort)
                for year in data_dict[region][sub_region][resort].keys():
                    data = data_dict[region][sub_region][resort][year]
                    if len(data['dates']) < 10:
                        continue
                        
                    for place, date in enumerate(data['dates']):
                        sf_shell[date] = data['snowfall'][place]
                        sd_shell[date] = data['depth'][place]
                try:
                    sf_df = sf_df.append(pd.Series(sf_shell, name=i))
                    sd_df = sd_df.append(pd.Series(sd_shell, name=i))
                except:
                    print(region)
                    print(sub_region)
                    print(resort)
                    logging.exception('Msg')
                    raise ValueError('Error')
                i += 1
    return sf_df, sd_df

In [37]:
def to_csv(df, output_file):
    df.to_csv(output_file, index=False)
    print('Dataframe saved as CSV here: {}'.format(output_file))

In [38]:
def run():
    data_dict = open_dict('../data/snow.pkl')
    dates = get_dates(data_dict)
    sf_df, sd_df = df_shells(dates)
    date_shell = date_dict_shell(dates)
    sf_df, sd_df = pop_dfs(sf_df, sd_df, data_dict, date_shell)
    to_csv(sf_df, '../data/snowfall.csv')
    to_csv(sd_df, '../data/snow_depth.csv')

In [39]:
run()

Fetching dates...
Populating dataframe...
Dataframe saved as CSV here: ../data/snowfall.csv
Dataframe saved as CSV here: ../data/snow_depth.csv
