# Project 1 Notebook for Jonathan Owens
This notebook is to keep track of the various items worked on with a place to collaborate all thoughts, ideas and creations with the team.


## Regional weather site code and elements for the region growing corn, wheat and soybeans
http://http://builder.rcc-acis.org/

- Station ID: KOMA
    - Location: Omaha Eppley Airfield
    - Region: Omaha, Nebrask
- Start date: 2000-10-26
- End date: 2020-10-28
- Elements: pcpn,avgt (percipitation, average temperature)
- Output: csv (comma seperated value)


## Regional Climate Centers (RCC) - Applied Climate Information Systems (ACIS) Documentation
    
- Documentation: http://www.rcc-acis.org/docs_webservices.html

- API URL: http://data.rcc-acis.org/StnData

- API URL ELEMENTS:

    elements = {
        sid=KOMA,
        sdate=20001026,
        edate=20201028,
        elems=pcpn,avgt,
        output=csv
        }

- Missing data values are returned as "M" and traces of precipitation, snowfall or snow depth are returned as "T".


In [1]:
# Libraries
import requests
import pandas as pd
import json
from pathlib import Path
from libs import weather
import datetime as dt


In [21]:
# Initialize variables
# Station id
station_id = 'KOMA'
# Starting date, furthest back
start_date = '20001026'
# Ending date, most recent
end_date = '20201028'
# Meta data to narrow information returned
meta_data = 'name'
# Data arguments to pull, precipitation, average temperature
elements = 'pcpn,avgt'
# Output to csv or json
output_style = 'json'
# API URL string
rcc_url = f'http://data.rcc-acis.org/StnData?sid={station_id}&sdate={start_date}&edate={end_date}&meta={meta_data}&elems={elements}&output={output_style}'


In [22]:
# Fetch current data
response_data = requests.get(rcc_url)


In [23]:
# Accept json format
data = response_data.json()


In [24]:
# Print json.dumps data
#print(json.dumps(data, indent=4))


In [25]:
# Create dataframe
weather_data =  pd.DataFrame(data['data'])
weather_data.head()


Unnamed: 0,0,1,2
0,2000-10-26,T,62.0
1,2000-10-27,0.00,54.5
2,2000-10-28,T,56.5
3,2000-10-29,0.68,56.5
4,2000-10-30,0.00,60.0


In [26]:
# Create column headings and assign
weather_data.columns = ['date', 'precipitation', 'average_temperature']
weather_data.head()

Unnamed: 0,date,precipitation,average_temperature
0,2000-10-26,T,62.0
1,2000-10-27,0.00,54.5
2,2000-10-28,T,56.5
3,2000-10-29,0.68,56.5
4,2000-10-30,0.00,60.0


In [27]:
# Drop rows with values of 'M'
index_rows = weather_data[weather_data.average_temperature == 'M'].index
clean_weather_data = weather_data.drop(index_rows)


In [28]:
# Replace values of 'T' with 0.0001 to represent a value other than 0 or False
# A measurement of precipitation was detected, but not provided
clean_weather_data['precipitation'] = clean_weather_data['precipitation'].replace('T', 0.0001)


In [29]:
# Change date values from object to datetime
clean_weather_data['date'] = pd.to_datetime(clean_weather_data['date'])


In [30]:
# Change precipitation values from object to type float
clean_weather_data['precipitation'] = clean_weather_data['precipitation'].astype(float)
# Change average_temperature values from object to type float
clean_weather_data['average_temperature'] = clean_weather_data['average_temperature'].astype(float)


In [31]:
# Set index to date and view clean dataframe
clean_weather_data.set_index('date', inplace=True)
clean_weather_data.head()


Unnamed: 0_level_0,precipitation,average_temperature
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-10-26,0.0001,62.0
2000-10-27,0.0,54.5
2000-10-28,0.0001,56.5
2000-10-29,0.68,56.5
2000-10-30,0.0,60.0


In [32]:
# Testing weather library and function
#returned_data = weather.get_precipitation_and_average_temperature('KOMA', '20001026', '20201028')
#returned_data.head()


### Build dataframe of three stations, 'KOMA', 'KCDR', 'KMCK'


In [33]:
# Build dataframe of three stations, 'KOMA', 'KCDR', 'KMCK'
# Grab dataframe from first station
#df_test = weather.get_precipitation_and_average_temperature('KOMA', '20001026', '20201028')
#df_test.head()

In [34]:
# Grab dataframe from second station
#df2_test = weather.get_precipitation_and_average_temperature('KCDR', '20001026', '20201028')
#df2_test.head()


In [35]:
# Grab dataframe from third station
#df3_test = weather.get_precipitation_and_average_temperature('KMCK', '20001026', '20201028')
#print(df3_test)


In [36]:
# Average the precipitations and temperatures from three stations
#print((df_test + df2_test + df3_test) / 3)


### Create function to pull weather data from multiple stations, combine data and output


In [44]:
# Function to call multiple stations in a specified state based on a timeframe with a selected output format: dataframe or csv file
def test_for_state(state_name, start_date, end_date, return_format):
    '''Calls multiple stations in a specified state based on a timeframe, retrieves 
    a daily precipitation and average temperature, if available, for each station, 
    averages the data of the three weather stations, drops the calculated values 
    returning null to now skew data, and combines values in a dataframe format.  

    Args:
        state_name (str): State postal abbreviation (Example: 'NE')
        start_date (str): Starting date range formatted as 'yyyymmdd'
        end_date (str): Ending date range formatted as 'yyyymmdd'
        return_format (str): Desired returned output is either 'csv' (comma separated
            value) or 'df' (dataframe)

    Returns:
        A clean dataframe of information including column headings titled
        date, precipitation, and average temperature if 'df' is specified or a 'csv'
        file is created in .  If arguments passed into function are 
        missing then return message to user.
    '''
    # Initialize variables
    # Dictionary of states and station codes.
    # Can create a connection to a sql db in the future to store this information for data growth
    state_station_codes = {
        'NE': ['KOMA', 'KCDR', 'KMCK'],
        'IA': ['KCID', 'KDSM', 'KALO'],
        'IL': ['KORD', 'KMDW', 'KSPI']
        }
    list_of_stations = []
    state_df = pd.DataFrame()

    # Check for valid arguments passed in by user
    if state_name == 'NE' or 'IA' or 'IL':
        # Get station ids for selected state
        list_of_stations = state_station_codes[state_name]

        # Get weather data for each station
        station_0 = weather.get_precipitation_and_average_temperature(list_of_stations[0], start_date, end_date)
        station_1 = weather.get_precipitation_and_average_temperature(list_of_stations[1], start_date, end_date)
        station_2 = weather.get_precipitation_and_average_temperature(list_of_stations[2], start_date, end_date)
        
        # Add all station data into one dataframe
        state_df = station_0 + station_1 + station_2

        # Calculate average of combine data values
        state_df = state_df / 3

        # Round values in each column
        state_df = state_df.round({'precipitation': 4, 'average_temperature': 1})

        # Drop null values to now skew data
        state_df.dropna(inplace=True)

        # Return state dateframe
        if return_format == 'df':
            return state_df

        # Output to csv file
        elif return_format == 'csv':
            # Create output path and write data to csv
            csv_output_path = Path('../data/clean_data/state_weather_data_clean.csv')
            state_df.to_csv(csv_output_path)
        else:
            # Return message that return format not found
            return 'Return format specified not found.  Pass in df or csv as a string.'
    else: 
        # Return message that state not found
        return 'State not found.  Pass in NE, IA or IL as a string.'


In [5]:
# Pull today's date as end date and calculate start date 5 years ago
end_date = dt.date.today()
start_date = end_date.replace(end_date.year - 5)
f'{start_date} to {end_date}'

'2015-11-02 to 2020-11-02'