# Bonus: Temperature Analysis II

In [None]:
# Dependencies and setup
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt

In [None]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

In [None]:
# Create engine to hawaii.sqlite
engine = create_engine("sqlite:///Resources/hawaii.sqlite")

In [None]:
# Reflect an existing database into a new model
Base = automap_base()

# Reflect the tables
Base.prepare(engine, reflect = True)

In [None]:
# View all of the classes that automap found
Base.classes.keys()

In [None]:
# Save references to each table
measurement = Base.classes.measurement
station = Base.classes.station

In [None]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [None]:
# Produce an inspection object for the given target
inspector = inspect(engine)

In [None]:
# Use inspector to print the column names and types from measurement table
m = inspector.get_columns("measurement")
for column in m:
    print(column["name"], column["type"])

In [None]:
# Use inspector to print the column names and types from stations table
s = inspector.get_columns("station")
for column in s:
    print(column["name"], column["type"])

### Trip Average Temperatures

In [None]:
# Declare variables to hold vacation start and end dates
start_date = "2012-08-01"
end_date = "2012-08-07"

# Use the function 'calc_temps' to calculate the tmin, tavg, and tmax for a year in the dataset
def calc_temps(start_date, end_date):
    """TMIN, TAVG, and TMAX for a list of dates.
    
    Args:
        start_date (string): A date string in the format %Y-%m-%d
        end_date (string): A date string in the format %Y-%m-%d
        
    Returns:
        TMIN, TAVE, and TMAX
    """
    
    return session.query(func.min(measurement.tobs), func.avg(measurement.tobs), func.max(measurement.tobs)).\
        filter(measurement.date >= start_date).filter(measurement.date <= end_date).all()

# Display minimum, average, and max temps
print(calc_temps(start_date, end_date))

In [None]:
# Plot the results from your previous query as a bar chart
# Use "Trip Avg Temp" as your Title
# Use the average temperature for bar height (y value)
# Use the peak-to-peak (tmax-tmin) value as the y error bar (yerr)

# Declare variables to hold values extracted from array
tmax = calc_temps(start_date, end_date)[0][2]
tavg = (round(calc_temps(start_date, end_date)[0][1]))
tmin = calc_temps(start_date, end_date)[0][0]

# Declare variable to hold y error bar value
yError = tmax - tmin

# Define bar plot size and remove x axis gridlines
plt.figure(figsize = (3,10))
plt.gca().xaxis.grid(False)

# Pass values into bar plot, and define color and transparency
plt.bar(2.5, height = tavg, width = 3, ec = "k", yerr = yError, color = "darkorange", alpha = 0.5)

# Define bar plot title, x label, and y label (and their font sizes)
plt.title("Trip Avg Temp (°F)", fontsize = 18)
plt.ylabel("Avg Temp (°F)", fontsize = 14)
plt.ylim(0, 110)
plt.xlim(0, 5)
plt.tick_params(bottom = False, labelbottom = False)

# Print bar plot to image file
plt.savefig("Images/Trip_Avg_Bar_Plot.jpg")

# Display bar plot
plt.show()

### Daily Rainfall Average

In [None]:
# Calculate the total amount of rainfall per weather station for your trip dates using the previous year's matching dates
# Sort this in descending order by precipitation amount and list the station, name, latitude, longitude, and elevation
precipitationData = session.query(measurement.date, measurement.station, measurement.prcp, station.name, station.latitude,
                                  station.longitude, station.elevation).\
                    filter((measurement.date <= end_date) & (measurement.date >= start_date)).all()
precipitationData

In [None]:
# Save the query results as a pandas DataFrame
precipitation_df = pd.DataFrame(precipitationData)
precipitation_df

In [None]:
# Check total precipitation for date range for all stations
precipitation_df["prcp"].sum()

In [None]:
# Drop rows with NaN values
precipitation_df.dropna(axis = 0, how = "any", inplace = True)
precipitation_df

In [None]:
# Group by station ID and calculate total precipitation
stationPrcp = precipitation_df.groupby(["station"])
stationRain = stationPrcp["prcp"].sum()

In [None]:
# Create DataFrame with grouped by station precipitation totals
stationRain_df = pd.DataFrame({"Precipitation":(stationRain)})
stationRain_df

In [None]:
# Reset index
stationRain_df = stationRain_df.reset_index()
stationRain_df

In [None]:
# Declare variables to hold series
precipAmt = stationRain_df["Precipitation"]
stationID = stationRain_df["station"]
precipName = precipitation_df["name"]
precipLat = precipitation_df["latitude"]
precipLng = precipitation_df["longitude"]
precipElv = precipitation_df["elevation"]

# Assemble resulting series into a new DataFrame
dailyRain_df = pd.DataFrame({"Precipitation":(precipAmt),
                             "Station ID":(stationID),
                             "Station Name":(precipName),
                             "Latitude":(precipLat),
                             "Longitude":(precipLng),
                             "Elevation":(precipElv)})
dailyRain_df

In [None]:
# Drop rows containing NaN values
dailyRain_df.dropna(axis = 0, how = "any", inplace = True)
dailyRain_df

In [None]:
# Reconcile total precipitation for grouped by station DataFrame against dated range DataFrame
dailyRain_df["Precipitation"].sum()

In [None]:
# Sort this in descending order by precipitation amount
dailyRain_df.sort_values("Precipitation", ascending = False, inplace = True)
dailyRain_df

In [None]:
# Reset index and drop resulting 'index' column
dailyRain_df.reset_index(inplace = True, drop = True)
dailyRain_df

### Daily Temperature Range and Average

In [None]:
# Use this function to calculate the daily normals 
# (i.e. the averages for tmin, tmax, and tavg for all historic data matching a specific month and day)
def daily_normals(date):
    """Daily Normals.
    
    Args:
        date (str): A date string in the format '%m-%d'
        
    Returns:
        A list of tuples containing the daily normals, tmin, tavg, and tmax
    
    """
    
    sel = [func.min(measurement.tobs), func.avg(measurement.tobs), func.max(measurement.tobs)]
    return session.query(*sel).filter(func.strftime("%m-%d", measurement.date) == date).all()

# For example
daily_normals("01-01")

In [None]:
# Declare variables to hold vacation start and end dates
start = (dt.datetime.strptime("2011-08-01", "%Y-%m-%d")).date()
end = (dt.datetime.strptime("2011-08-07", "%Y-%m-%d")).date()

# Create empty list to hold dates
dateList = []

In [None]:
# Loop through range of specified dates and append to dateList with %m-%d format
while start <= end:
    month = start.strftime("%m")
    day = start.strftime("%d")
    dateList.append(f"{month}-{day}")
    start += dt.timedelta(days = 1)
    
# Reset start date to original    
start = (dt.datetime.strptime("2011-08-01", "%Y-%m-%d")).date()

In [None]:
# Check to see if list was populated with formatted dates
dateList

In [None]:
# Create empty list to hold daily normals
normals = []

In [None]:
# Loop through dateList and append temperatures to daily normals list
for x in range(len(dateList)):
    dailyTemps = daily_normals(dateList[x])
    normals.append(dailyTemps[0])

In [None]:
# Check to see if list was populated with tuples of temperatures
normals

In [None]:
# Load the previous query results into a pandas DataFrame and add the trip_dates range as the date index
normals_df = pd.DataFrame(normals, columns = ["Daily Min Temp", "Daily Avg Temp", "Daily Max Temp"])
normals_df["Date"] = dateList
normals_df = normals_df.set_index("Date")
normals_df

In [None]:
# Plot the daily normals as an area plot with 'stacked = False'
normals_df.plot.area(figsize = (18,10), stacked = False, alpha = 0.25, rot = 45, fontsize = 12)

# Define area plot title, x label and y label (and their font sizes), and y axis limits
plt.title(f"Daily Temperature Normals from {start} to {end}", fontsize = 18)
plt.xlabel("Date", fontsize = 14)
plt.ylabel("Temperature (F°)", fontsize = 14)
plt.ylim(0, 110)

# Print area plot to image file
plt.savefig("Images/Daily_Temperature_Normals.jpg")

# Display area plot
plt.show()

## Close Session

In [None]:
# Close session
session.close()