This notebook requires station time series and outputs each station AMS 
in an individual csv file.

In [None]:
import numpy as np
import pandas as pd
import glob

In [None]:
def aggregate_by_frequency(df, fr, cols):
    """Create a new df with col aggregated by desired frequency
    Note: df needs to have a column named "date" and be a datetime object
    in order to work. Available freq depending on frequency of the original data
    possible values of freq: "3h, 6h, 12h, 24h..." and so on. It is not limited
    to hours but can also aggregate by week or by month (e.g. "Jan"), by year (e.g. "A" for
    annual) too."""

    return df.groupby(pd.Grouper(key='date', freq=fr))[cols].max() 

In [None]:
for st_records in glob.glob("station_data/ts/*"):
    print(st_records)
    name = st_records.split('/')[-1]
    df = pd.read_csv(st_records, parse_dates=['Unnamed: 0'], na_values="M")
    df.columns = ['date', 'prcp']
    df.replace("T", 0.00, inplace=True)
    df.prcp = df.prcp.astype(np.float)
    if not os.path.isdir("station_data/ams"):
        os.makedirs("station_data/ams")
    pd.DataFrame(aggregate_by_frequency(df, 'A', 'prcp')).to_csv("station_data/ams/{}".format(name))

In [None]:
df = pd.DataFrame(pd.date_range(start=pd.to_datetime("1890-12-31"), 
                                end=pd.to_datetime("2019-12-31"), freq="A"))
df.columns=['date']
df.set_index('date', inplace=True)

In [None]:
"""
This for loop is for merging all the station AMS into a single csv file
organized by rows --> years, and columns --> stations.
"""

cols = []
for f in glob.glob("input/station_data/ams/*"):
    cols.append(f.split('/')[-1][:-4])
   # print(cols)
    df_join = pd.read_csv(f, parse_dates=['date'], index_col=0)
    df = df.join(df_join)
    df.columns = cols

# Save complete AMS for all stations    
df.to_csv("station_data/ams_all_stations.csv")

#Only save the historical period that we decided on:
df['1950':'2000'].to_csv("station_data/ams_all_stations_1950_2000.csv")