In [1]:
import numpy as np
import collections
import pandas as pd
import datetime
import re
import matplotlib.pyplot as plt
import pickle
from collections import namedtuple
import os

s_line_dir = '../NP_transects/'

month_nums = {'jan':1,
              'feb':2,
              'fab':2,
              'mch':3,
              'mar':3,
              'apr':4,
              'may':5,
              'jun':6,
              'jul':7,
              'aug':8,
              'spt':9,
              'sep':9,
              'oct':10,
              'nov':11,
              'dec':12}

# Process depths from 'raw data'

### This goes through all the original text files and parses them straight

In [23]:
line_d = collections.defaultdict(dict)

filenames =  os.listdir(s_line_dir)
filenames = [item for item in filenames if 'xlsx' not in item]

for filename in filenames:
    
#     print(filename)
    
    # Get year
    
    partitions = filename.partition('.')
    
    year = 1900 + int(partitions[-1])
    
    # Get station number
    
    station_num = int(partitions[0].partition('_')[-1])
    
    snowline_data = pd.read_csv(s_line_dir+filename,
                            delim_whitespace=True,
                            skiprows=1,
                            index_col=False)
    
#     print(snowline_data)
    
    first_row = list(snowline_data.iloc[0])
    
    first_row.insert(0, first_row.pop())
    
    snowline_data.loc[0,:] = first_row

    snowline_data.drop(columns='row',inplace=True)
    
    snowline_data.loc[0,:] = [int(re.findall('\d+',string)[0]) for string in snowline_data.loc[0]]

    
    col_translator = {}
    
    for month, day in zip(list(snowline_data.columns), list(snowline_data.loc[0,:])):
        
        try:
            col_translator[month] = datetime.date(month=month_nums[month[:3].lower()],
                                            year=year,
                                            day = day)
        except Exception as e:
            
            if (day == 31) & (month[:3] == 'jun'):
                day = 30
                col_translator[month] = datetime.date(month=month_nums[month[:3].lower()],
                                        year=year,
                                        day = day)
                
    snowline_data.rename(columns=col_translator,inplace=True)
    
    snowline_data.replace([-9.9,-9,999, -99.0], np.nan, inplace=True)
    
#     print(snowline_data)

    line_d[station_num][year] = snowline_data

I also made an excel file of all the transects to make things easier for people. The following cell just counts the number of transects in that file and verifies that there are 499 in there to match the result of the 'raw' processing.

In [24]:
x = pd.ExcelFile(f'{s_line_dir}/transect_depths.xlsx')

x.sheet_names

counter=0

for sn in x.sheet_names:
    
    df = pd.read_excel(f'{s_line_dir}/transect_depths.xlsx',sheet_name=sn)
    
    counter+=(df.shape[1]-1)
    
counter

499

Count the number of transects in the dictionary that was generated from the 'raw' data

In [25]:
counter = 0

for key1 in line_d.keys():
    
    for key2 in line_d[key1].keys():
    
        counter+=len(line_d[key1][key2].keys())
    
counter

499

In [26]:
########################################

# Collect data from individual years
    
line_depths = {}

for station_num in line_d.keys():

    list_of_dfs = []
    
    if line_d[station_num]:
        
        for year in line_d[station_num]:
            
            list_of_dfs.append(line_d[station_num][year])

        result = pd.concat(list_of_dfs, axis=1, join='outer').astype(np.float32)

        result = result.replace([-9.9,-9,999, -99.0], np.nan)

        line_depths[station_num] = result

In [27]:
for key in line_d:
    print(key, line_d[key].keys())

7 dict_keys([1957, 1958, 1959])
8 dict_keys([1962, 1959, 1961])
22 dict_keys([1974, 1976, 1975, 1978, 1980, 1979, 1981, 1977, 1982])
27 dict_keys([1986, 1987, 1984, 1985])
18 dict_keys([1971, 1970, 1968])
25 dict_keys([1982, 1984, 1983, 1981])
29 dict_keys([1988])
13 dict_keys([1965, 1964, 1966])
15 dict_keys([1966, 1967])
31 dict_keys([1990, 1991, 1988, 1989])
10 dict_keys([1962, 1963])
24 dict_keys([1980, 1979, 1978])
23 dict_keys([1977, 1978])
16 dict_keys([1968, 1971, 1969, 1970])
12 dict_keys([1963, 1964, 1965])
11 dict_keys([1962, 1963])
30 dict_keys([1987, 1990, 1989, 1991, 1988])
19 dict_keys([1971, 1970, 1972])
26 dict_keys([1984, 1985, 1983, 1986])
9 dict_keys([1969])
14 dict_keys([1965])
20 dict_keys([1972, 1970, 1971])
5 dict_keys([1955])


In [15]:
pickle.dump(line_depths, open('../pickles/line_depths_dict.p', 'wb'))