In [2]:
import numpy as np
import collections
import pandas as pd
import datetime
import re
import matplotlib.pyplot as plt
import pickle
from collections import namedtuple
import os

s_line_dir = 'SNOWLINE/'

month_nums = {'jan':1,
              'feb':2,
              'fab':2,
              'mch':3,
              'mar':3,
              'apr':4,
              'may':5,
              'jun':6,
              'jul':7,
              'aug':8,
              'spt':9,
              'sep':9,
              'oct':10,
              'nov':11,
              'dec':12}

# Process Depths

In [3]:


line_d = collections.defaultdict(dict)

for filename in os.listdir(s_line_dir):
    
    print(filename)
    
    # Get year
    
    partitions = filename.partition('.')
    
    year = 1900 + int(partitions[-1])
    
    # Get station number
    
    station_num = int(partitions[0].partition('_')[-1])
    
    snowline_data = pd.read_csv(s_line_dir+filename,
                            delim_whitespace=True,
                            skiprows=1,
                            index_col=False)
    
    print(snowline_data)
    
    first_row = list(snowline_data.iloc[0])
    
    first_row.insert(0, first_row.pop())
    
    snowline_data.loc[0,:] = first_row

    snowline_data.drop(columns='row',inplace=True)
    
    snowline_data.loc[0,:] = [int(re.findall('\d+',string)[0]) for string in snowline_data.loc[0]]

    
    col_translator = {}
    
    for month, day in zip(list(snowline_data.columns), list(snowline_data.loc[0,:])):
        
        try:
            col_translator[month] = datetime.date(month=month_nums[month[:3].lower()],
                                            year=year,
                                            day = day)
        except Exception as e:
            
            if (day == 31) & (month[:3] == 'jun'):
                day = 30
                col_translator[month] = datetime.date(month=month_nums[month[:3].lower()],
                                        year=year,
                                        day = day)
                
    snowline_data.rename(columns=col_translator,inplace=True)
    
    snowline_data.replace([-9.9,-9,999, -99.0], np.nan, inplace=True)
    
    print(snowline_data)

    line_d[station_num][year] = snowline_data


########################################

# Collect data from individual years
    
line_depths = {}

for station_num in line_d.keys():

    list_of_dfs = []
    
    if line_d[station_num]:
        
        for year in line_d[station_num]:
            
            list_of_dfs.append(line_d[station_num][year])

        result = pd.concat(list_of_dfs, axis=1, join='outer').astype(np.float32)
        
#         result.to_hdf('/home/robbie/Dropbox/SP_W99_Drifters/Data/snow_lines.h5',
#           key=f'{station_num}',
#           mode='w')

        result = result.replace([-9.9,-9,999, -99.0], np.nan)

        line_depths[station_num] = result

NP_14.65
      row   may   sep   oct
0    (30)  (29)  (30)   NaN
1     001    15    18  38.0
2     002    04    25  27.0
3     003    08    20  17.0
4     004    37    14  26.0
..    ...   ...   ...   ...
96    096    17    26  14.0
97    097    47    44  34.0
98    098    22    29  32.0
99    099    33    20  35.0
100   100    30    15  58.0

[101 rows x 4 columns]
    1965-05-30 1965-09-29  1965-10-30
0           30         29        30.0
1           15         18        38.0
2           04         25        27.0
3           08         20        17.0
4           37         14        26.0
..         ...        ...         ...
96          17         26        14.0
97          47         44        34.0
98          22         29        32.0
99          33         20        35.0
100         30         15        58.0

[101 rows x 3 columns]
NP_15.67
      row   jan   feb   mar   apr
0    (22)  (20)  (24)  (20)   NaN
1     001    24    13    22  22.0
2     002    61    13    14  23.0
3     

      row   may   jun   sep   oct   nov   dec
0    (20)  (19)  (20)  (20)  (20)  (20)   NaN
1     001   045    28    22    10    37  27.0
2     002   041    30    25    08    37   3.0
3     003   032    44    14    37    24  45.0
4     004   010    53    30    06    35  39.0
..    ...   ...   ...   ...   ...   ...   ...
96    096   003    28    01    12    38  40.0
97    097   064    15    42    26    03  28.0
98    098   056    06    06    00    35  22.0
99    099   034    24    10    09    33  16.0
100   100   035    66    24    20    20  22.0

[101 rows x 7 columns]
    1968-05-20 1968-06-19 1968-09-20 1968-10-20 1968-11-20  1968-12-20
0           20         19         20         20         20        20.0
1          045         28         22         10         37        27.0
2          041         30         25         08         37         3.0
3          032         44         14         37         24        45.0
4          010         53         30         06         35        39.

     1982-01-20  1982-02-20  1982-03-20  1982-04-20  1982-09-20  1982-10-20  \
0          20.0        20.0        20.0        20.0        20.0        20.0   
1          17.0        20.0        16.0        20.0        25.0        44.0   
2          19.0        26.0        20.0        26.0         3.0        33.0   
3          12.0        19.0        17.0        21.0        20.0        30.0   
4          24.0        23.0        23.0        20.0        12.0        42.0   
..          ...         ...         ...         ...         ...         ...   
96         28.0        21.0        21.0        17.0       -99.0       -99.0   
97         27.0        26.0        27.0        21.0       -99.0       -99.0   
98         32.0        27.0        22.0        17.0       -99.0       -99.0   
99         36.0        30.0        20.0        23.0       -99.0       -99.0   
100        31.0        26.0        23.0        24.0       -99.0       -99.0   

     1982-11-20  1982-12-20  
0          20.0      

    1970-01-20 1970-02-20 1970-03-20 1970-04-20 1970-05-20 1970-06-20  \
0           20         20         20         20         20         20   
1           22         25         54        097        085        108   
2           34         59         53        074        064        100   
3           42         55         20        067        022        063   
4           30         39         20        010        029        026   
..         ...        ...        ...        ...        ...        ...   
96          23         37         40        087        028        023   
97          24         40         52        036        075        083   
98          39         38         38        067        075        076   
99          19         30         41        067        060        053   
100         38         21         44        061        047        044   

    1970-07-20 1970-08-20 1970-09-20 1970-10-20 1970-11-20  1970-12-20  
0           20         20         20         20   

     row   jan jan.1 jan.2   fab fab.1 fab.2   mch mch.1 mch.2  ... sep.2  \
0   (10)  (20)  (31)  (10)  (20)  (25)  (09)  (19)  (30)  (09)  ...  (10)   
1    001    18    22    15    16    13    10    17    16    15  ...    52   
2    002    19    16    05    08    18    33    28    41    13  ...    18   
3    003    26    24    39    27    12    07    08    09    12  ...    06   
4    004    21    16    47    29    27    36    20    41    39  ...    02   
5    005    59    71    20    31    32    20    27    15    15  ...    18   
6    006    33    30    19    28    11    45    13    12    52  ...    11   
7    007    10    10    28    40    10    11    18    27    35  ...    20   
8    008    48    41    14    18    11    10    27    27    40  ...    18   
9    009    10    11    12    21    15    26    32    13    29  ...    01   
10   010    32    29    18    23    29    24    30    26    14  ...    19   
11   011    21    36    22    17    37    34    12    37    27  ...    21   

     1955-05-31  1955-06-11  1955-06-23  1955-07-04  1955-11-22  1955-12-31
0          31.0        11.0        23.0         4.0        22.0        31.0
1          58.0        55.0        36.0        34.0        25.0        25.0
2          60.0        60.0        68.0         8.0        21.0        30.0
3          41.0        50.0        42.0        40.0        20.0        13.0
4          70.0        62.0        50.0        27.0        24.0        40.0
..          ...         ...         ...         ...         ...         ...
96        -99.0        48.0       -99.0        40.0        34.0         NaN
97        -99.0        40.0       -99.0        12.0         7.0         NaN
98        -99.0        43.0       -99.0        36.0        14.0         NaN
99        -99.0        32.0       -99.0        48.0        52.0         NaN
100       -99.0        30.0       -99.0        38.0        59.0         NaN

[101 rows x 6 columns]
NP_30.90
     row   jan jan.1 jan.2   feb feb.1 feb.2   mar mar.

In [5]:
for key in line_d:
    print(key, line_d[key].keys())

14 dict_keys([1965])
15 dict_keys([1967, 1966])
13 dict_keys([1966, 1965, 1964])
22 dict_keys([1974, 1977, 1976, 1980, 1975, 1982, 1981, 1979, 1978])
31 dict_keys([1989, 1990, 1991, 1988])
26 dict_keys([1983, 1984, 1986, 1985])
7 dict_keys([1959, 1958, 1957])
16 dict_keys([1968, 1970, 1969, 1971])
20 dict_keys([1972, 1970, 1971])
23 dict_keys([1977, 1978])
24 dict_keys([1978, 1980, 1979])
30 dict_keys([1989, 1987, 1988, 1991, 1990])
19 dict_keys([1972, 1971, 1970])
27 dict_keys([1984, 1985, 1986, 1987])
10 dict_keys([1962, 1963])
8 dict_keys([1961, 1959, 1962])
25 dict_keys([1982, 1984, 1981, 1983])
11 dict_keys([1963, 1962])
29 dict_keys([1988])
18 dict_keys([1970, 1968, 1971])
12 dict_keys([1963, 1965, 1964])
9 dict_keys([1969])
5 dict_keys([1955])


In [6]:
pickle.dump(line_depths, open('line_depths_dict.p', 'wb'))