# Libraries and Settings

#### Libraries

In [1]:
##########==========##########==========##########==========##########==========

## general purpose libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import urllib.request as url

from datetime import datetime
from bs4 import BeautifulSoup

from os import listdir, mkdir
from os.path import isdir, isfile


## time stamp functions
set_timing = dict()
def time_check(id_str = 'ZZ'):
    raw_time = datetime.now()
    current_time = [raw_time.hour, raw_time.minute, round(raw_time.second)]
    current_time = [str(i).zfill(2) for i in current_time]
    current_time = ':'.join(current_time)
    current_time = 'Checkpoint ' + id_str.ljust(4) + ' = ' + current_time
    set_timing[id_str] = current_time
    if id_str == 'ZZ':
        for i in set_timing:
            print(set_timing[i])

time_check('AA')

#### Settings

In [2]:
## url source of the data
set_url = "https://www.ncei.noaa.gov/data/local-climatological-data/access/{0}/"
set_year = range(2012, 2022)
set_prefix = ['69', '72', '74', '91', '99']

## cache settings
set_dev_mode = True
set_cache = {'hard_reset': False, 'testing': True,
             'collection' : True,  'compilation': True, 'modeling': True}

## defines dtype for the columns in the raw weather data files
set_col_list = {'DATE':str, 'LATITUDE':float,
    'LONGITUDE':float, 'ELEVATION':float,
    "HourlyDryBulbTemperature":float,
    "HourlyPrecipitation":float}

## set modeling parameters
set_param = {'window': 14, 'mid_summer': 213}

#### set up file system (if needed)

In [3]:
## make directories that script expects
def make_directories():
    all_dirs = ['A_Input', 'B_Process', 'C_Output']
    all_dirs = all_dirs + ['B_Process/downloads']
    for i in all_dirs:
        if not isdir(i): mkdir(i)
        
make_directories()

# Import Data

#### retrieve links page for first year

In [4]:
def make_data_roster(the_year, the_url = set_url, valid_prefix = set_prefix):
  
    ## retrive raw web page
    the_url = the_url.format(the_year)
    with url.urlopen(the_url) as conn:
        all_files = conn.read()
        conn.close()
        
    ## extract links to csv files
    all_files = BeautifulSoup(all_files)
    all_files = all_files.find_all('a')
    all_files = [i.string for i in all_files if i.string.find('.csv') != -1]
        
    ## package as pandas files; limit to valid prefix
    all_files = pd.DataFrame({'prefix':0, 'file':all_files,
                              'lon': np.nan, 'lat': np.nan, 'dist': np.nan})
    all_files['prefix'] = [i[0:2] for i in all_files['file']]
    all_files = all_files[all_files.prefix.isin(valid_prefix)]
    all_files = all_files.reset_index()
    all_files[str(the_year)] = False

    del all_files['index']
    
    return all_files 
        
data_roster = make_data_roster(set_year[-1])

#### determine which stations are near route cities

In [5]:
def filter_to_proximate_data(dat_rost = data_roster):
    for i in dat_rost.index:
        pass

print(data_roster)

     prefix             file  lon  lat  dist   2021
0        69  69015093121.csv  NaN  NaN   NaN  False
1        72  72011053983.csv  NaN  NaN   NaN  False
2        72  72011354829.csv  NaN  NaN   NaN  False
3        72  72012063837.csv  NaN  NaN   NaN  False
4        72  72012999999.csv  NaN  NaN   NaN  False
...     ...              ...  ...  ...   ...    ...
2810     99  99999996405.csv  NaN  NaN   NaN  False
2811     99  99999996406.csv  NaN  NaN   NaN  False
2812     99  99999996407.csv  NaN  NaN   NaN  False
2813     99  99999996408.csv  NaN  NaN   NaN  False
2814     99  99999996409.csv  NaN  NaN   NaN  False

[2815 rows x 6 columns]


#### retrieve data from subsequent years for proximate stations

#### record completion time stamp

In [6]:
time_check('ID')

# Refine Data

In [7]:
time_check('RD')

# Build Model

In [8]:
time_check('BM')

# Model Routes

In [9]:
time_check('MR')

# Render Visualization

In [10]:
time_check('RV')

# Test Code

In [11]:
time_check('TC')

# Miscellaneous

In [12]:
## Display time statistics
time_check('ZZ')

Checkpoint AA   = 17:48:40
Checkpoint ID   = 17:48:42
Checkpoint RD   = 17:48:42
Checkpoint BM   = 17:48:42
Checkpoint MR   = 17:48:42
Checkpoint RV   = 17:48:42
Checkpoint TC   = 17:48:42
Checkpoint ZZ   = 17:48:42
