In [None]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt

In [None]:
import numpy as np
import pandas as pd
import datetime as dt

## Reflect Tables into SQLALchemy ORM

In [None]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

In [None]:
# create engine to hawaii.sqlite
engine = create_engine("sqlite:///Resources/hawaii.sqlite")

In [None]:
# reflect an existing database into a new model
Base = automap_base()

# reflect the tables
Base.prepare(engine, reflect = True)

In [None]:
# View all of the classes that automap found
Base.classes.keys()

In [None]:
# Save references to each table
measurement = Base.classes.measurement
station = Base.classes.station

In [None]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [None]:
# Produce an inspection object for the given target
inspector = inspect(engine)

In [None]:
# Use inspector to print the column names and types from measurement table
m = inspector.get_columns("measurement")
for column in m:
    print(column["name"], column["type"])

In [None]:
# Use inspector to print the column names and types from stations table
s = inspector.get_columns("station")
for column in s:
    print(column["name"], column["type"])

## Bonus Challenge Assignment: Temperature Analysis II

In [None]:
# This function called `calc_temps` will accept start date and end date in the format '%Y-%m-%d' 
# and return the minimum, maximum, and average temperatures for that range of dates
def calc_temps(start_date, end_date):
    """TMIN, TAVG, and TMAX for a list of dates.
    
    Args:
        start_date (string): A date string in the format %Y-%m-%d
        end_date (string): A date string in the format %Y-%m-%d
        
    Returns:
        TMIN, TAVE, and TMAX
    """
    
    return session.query(func.min(measurement.tobs), func.avg(measurement.tobs), func.max(measurement.tobs)).\
        filter(measurement.date >= start_date).filter(measurement.date <= end_date).all()

# For example
print(calc_temps('2012-02-28', '2012-03-05'))

In [None]:
# Declare variables to hold vacation start and end dates
start_date = "2012-08-01"
end_date = "2012-08-07"

# Use the function `calc_temps` to calculate the tmin, tavg, and tmax for a year in the dataset
def calc_temps(start_date, end_date):
    """TMIN, TAVG, and TMAX for a list of dates.
    
    Args:
        start_date (string): A date string in the format %Y-%m-%d
        end_date (string): A date string in the format %Y-%m-%d
        
    Returns:
        TMIN, TAVE, and TMAX
    """
    
    return session.query(func.min(measurement.tobs), func.avg(measurement.tobs), func.max(measurement.tobs)).\
        filter(measurement.date >= start_date).filter(measurement.date <= end_date).all()

# Display minimum, average, and max temps
print(calc_temps(start_date, end_date))

In [None]:
# Plot the results from your previous query as a bar chart. 
# Use "Trip Avg Temp" as your Title
# Use the average temperature for bar height (y value)
# Use the peak-to-peak (tmax-tmin) value as the y error bar (yerr)

# Declare variables to hold values extracted from array
tmax = calc_temps(start_date, end_date)[0][2]
tavg = (round(calc_temps(start_date, end_date)[0][1]))
tmin = calc_temps(start_date, end_date)[0][0]

# Declare variable to hold y error bar value
yError = tmax - tmin

# Define bar plot size and remove x axis gridlines
plt.figure(figsize = (3,10))
plt.gca().xaxis.grid(False)

# Pass values into bar plot, and define color and transparency
plt.bar(2.5, height = tavg, width = 3, ec = "k", yerr = yError, color = "darkorange", alpha = 0.5)

# Define bar plot title, x label, and y label (and their font sizes)
plt.title("Trip Avg Temp (°F)", fontsize = 18)
plt.ylabel("Avg Temp (°F)", fontsize = 14)
plt.ylim(0, 110)
plt.xlim(0, 5)
plt.tick_params(bottom = False, labelbottom = False)

# Print pyplot bar plot to image file
# plt.savefig("../Images/Trip_Avg_Bar_Plot.jpg")

plt.show()

### Daily Rainfall Average

In [None]:
# Calculate the total amount of rainfall per weather station for your trip dates using the previous year's 
# matching dates.
# Sort this in descending order by precipitation amount and list the station, name, latitude, longitude, and elevation
precipitationData = session.query(measurement.date, measurement.station, measurement.prcp, station.name, station.latitude,
                                  station.longitude, station.elevation).\
                    filter((measurement.date <= end_date) & (measurement.date >= start_date)).all()
precipitationData

In [None]:
precipitation_df = pd.DataFrame(precipitationData)
precipitation_df

In [None]:
precipitation_df.dropna(axis = 0, how = "any", inplace = True)
precipitation_df

In [None]:
stationPrcp = precipitation_df.groupby(["station"])
stationRain = stationPrcp["prcp"].sum()

In [None]:
stationRain_df = pd.DataFrame({"Precipitation":(stationRain)})
stationRain_df

In [None]:
stationRain_df = stationRain_df.reset_index()
stationRain_df

In [None]:
# Merge caseData and censusData DataFrames
mergedStations_df = pd.merge(stationRain_df, precipitation_df, how = "left", left_on = "station", right_on = "station")
mergedStations_df

In [None]:
# Use this function to calculate the daily normals 
# (i.e. the averages for tmin, tmax, and tavg for all historic data matching a specific month and day)

def daily_normals(date):
    """Daily Normals.
    
    Args:
        date (str): A date string in the format '%m-%d'
        
    Returns:
        A list of tuples containing the daily normals, tmin, tavg, and tmax
    
    """
    
    sel = [func.min(measurement.tobs), func.avg(measurement.tobs), func.max(measurement.tobs)]
    return session.query(*sel).filter(func.strftime("%m-%d", measurement.date) == date).all()

# For example
daily_normals("01-01")

In [None]:
# calculate the daily normals for your trip
# push each tuple of calculations into a list called `normals`

# Set the start and end date of the trip
# start = "2011-08-01"
# end = "2011-08-07"

# # Use the start and end date to create a range of dates
# startingDate = (dt.datetime.strptime(start, "%Y-%m-%d")).date()
# endingDate = (dt.datetime.strptime(end, "%Y-%m-%d")).date()

# numdays = 7
# dateList = []
# for x in range (0, numdays):
#     dateList.append(endingDate - dt.timedelta(days = x))
# print(dateList)

In [None]:
# Strip off the year and save a list of strings in the format %m-%d

# def daily_normals(date):
#     """Daily Normals.
    
#     Args:
#         date (str): A date string in the format '%m-%d'
        
#     Returns:
#         A list of tuples containing the daily normals, tmin, tavg, and tmax
    
#     """
    
#     sel = [func.min(measurement.tobs), func.avg(measurement.tobs), func.max(measurement.tobs)]
#     return session.query(*sel).filter(func.strftime("%m-%d", measurement.date) == date).all()

# daily_normals(dateList)

# Use the `daily_normals` function to calculate the normals for each date string 
# and append the results to a list called `normals`.

In [None]:
# Set the start and end date of the trip
start = "2011-08-01"
end = "2011-08-07"

# Perform a query to retrieve the date and precipitation scores
tempsData = session.query(measurement.date, measurement.station, measurement.tobs).\
            filter((measurement.date <= end) & (measurement.date >= start)).all()
tempsData

In [None]:
# Load the previous query results into a Pandas DataFrame and add the `trip_dates` range as the `date` index
temps_df = pd.DataFrame(tempsData)
# precipitation_df = precipitation_df.set_index("date")
temps_df.head()

In [None]:
# Convert the date column format from string to datetime
temps_df["date"] = pd.to_datetime(temps_df["date"])
temps_df.dtypes

In [None]:
temps_df.head()

In [None]:
dateTemp = temps_df.groupby(["date"])

dailyMin_Temp = dateTemp["tobs"].min()
dailyAvg_Temp = dateTemp["tobs"].mean()
dailyMax_Temp = dateTemp["tobs"].max()

In [None]:
dailyTemps_df = pd.DataFrame({"Daily Minimum Temp":(dailyMin_Temp),
                              "Daily Average Temp":(dailyAvg_Temp),
                              "Daily Maximum Temp":(dailyMax_Temp)})
dailyTemps_df

In [None]:
# Plot the daily normals as an area plot with `stacked=False`
dailyTemps_df.plot.area(figsize = (18,10), stacked = False, alpha = 0.25, rot = 45, fontsize = 12)

plt.title(f"Daily Temperature Normals from {start} to {end}", fontsize = 18)
plt.xlabel("Date", fontsize = 14)
plt.ylabel("Temperature (F°)", fontsize = 14)
plt.ylim(0, 110)

# plt.savefig("Images/Daily_Temperature_Normals.png")
plt.show()

## Close Session

In [None]:
# Close Session
session.close()