In [23]:
# Dependencies
import numpy as np
import pandas as pd

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

In [24]:
engine = create_engine("sqlite:///hawaii.sqlite")

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save references to each table
Measurement = Base.classes.measurement
Station = Base.classes.station

In [25]:
# Create our session (link) from Python to the DB
session = Session(engine)

## D1: Determine the Summary Statistics for June

In [26]:
# 1. Import the sqlalchemy extract function.
from sqlalchemy import extract

# 2. Write a query that filters the Measurement table to retrieve the temperatures for the month of June. 
June = session.query(Measurement.date, Measurement.tobs).filter(extract('month', Measurement.date)==7).all()


In [27]:
#  3. Convert the June temperatures to a list.
June_list = list(June)
print(June_list)

[('2010-07-01', 75.0), ('2010-07-02', 77.0), ('2010-07-03', 77.0), ('2010-07-04', 78.0), ('2010-07-05', 78.0), ('2010-07-06', 76.0), ('2010-07-07', 74.0), ('2010-07-08', 76.0), ('2010-07-09', 74.0), ('2010-07-10', 76.0), ('2010-07-11', 78.0), ('2010-07-12', 77.0), ('2010-07-13', 80.0), ('2010-07-16', 78.0), ('2010-07-17', 76.0), ('2010-07-18', 77.0), ('2010-07-19', 77.0), ('2010-07-21', 80.0), ('2010-07-22', 78.0), ('2010-07-24', 80.0), ('2010-07-25', 79.0), ('2010-07-26', 78.0), ('2010-07-27', 79.0), ('2010-07-28', 78.0), ('2010-07-29', 79.0), ('2010-07-30', 78.0), ('2010-07-31', 77.0), ('2011-07-01', 77.0), ('2011-07-02', 78.0), ('2011-07-03', 78.0), ('2011-07-04', 78.0), ('2011-07-05', 78.0), ('2011-07-06', 76.0), ('2011-07-07', 76.0), ('2011-07-08', 77.0), ('2011-07-09', 77.0), ('2011-07-10', 77.0), ('2011-07-11', 78.0), ('2011-07-12', 77.0), ('2011-07-13', 73.0), ('2011-07-14', 77.0), ('2011-07-15', 79.0), ('2011-07-16', 78.0), ('2011-07-17', 77.0), ('2011-07-18', 78.0), ('2011-07

In [32]:
# 4. Create a DataFrame from the list of temperatures for the month of June. 
df = pd.DataFrame(June_list, columns=['Date','Temperature'])
df.set_index(df['Date'], inplace=True)
df = df.sort_index()
print(df.to_string(index=False))

      Date  Temperature
2010-07-01         75.0
2010-07-01         71.0
2010-07-01         73.0
2010-07-01         73.0
2010-07-01         74.0
2010-07-01         73.0
2010-07-01         70.0
2010-07-01         73.0
2010-07-02         71.0
2010-07-02         69.0
2010-07-02         69.0
2010-07-02         74.0
2010-07-02         72.0
2010-07-02         77.0
2010-07-02         70.0
2010-07-02         74.0
2010-07-03         73.0
2010-07-03         78.0
2010-07-03         72.0
2010-07-03         73.0
2010-07-03         69.0
2010-07-03         76.0
2010-07-03         77.0
2010-07-03         75.0
2010-07-04         79.0
2010-07-04         76.0
2010-07-04         78.0
2010-07-04         73.0
2010-07-04         78.0
2010-07-04         72.0
2010-07-04         75.0
2010-07-04         72.0
2010-07-05         75.0
2010-07-05         78.0
2010-07-05         74.0
2010-07-05         78.0
2010-07-05         72.0
2010-07-05         76.0
2010-07-05         72.0
2010-07-05         79.0
2010-07-06      

In [33]:
# 5. Calculate and print out the summary statistics for the June temperature DataFrame.
df.describe()

Unnamed: 0,Temperature
count,1711.0
mean,76.082408
std,3.356532
min,61.0
25%,74.0
50%,76.0
75%,78.0
max,87.0


## D2: Determine the Summary Statistics for December

In [34]:
# 6. Write a query that filters the Measurement table to retrieve the temperatures for the month of December.
Dec = session.query(Measurement.date, Measurement.tobs).filter(extract('month', Measurement.date)==12).all()

In [35]:
# 7. Convert the December temperatures to a list.
Dec_list = list(Dec)
print(Dec_list)

[('2010-12-01', 76.0), ('2010-12-03', 74.0), ('2010-12-04', 74.0), ('2010-12-06', 64.0), ('2010-12-07', 64.0), ('2010-12-08', 67.0), ('2010-12-09', 77.0), ('2010-12-10', 66.0), ('2010-12-11', 69.0), ('2010-12-12', 68.0), ('2010-12-13', 68.0), ('2010-12-14', 71.0), ('2010-12-15', 74.0), ('2010-12-16', 67.0), ('2010-12-17', 66.0), ('2010-12-18', 69.0), ('2010-12-19', 71.0), ('2010-12-23', 70.0), ('2010-12-24', 70.0), ('2010-12-26', 74.0), ('2010-12-27', 74.0), ('2010-12-28', 71.0), ('2010-12-29', 75.0), ('2010-12-30', 75.0), ('2010-12-31', 72.0), ('2011-12-01', 69.0), ('2011-12-02', 68.0), ('2011-12-03', 73.0), ('2011-12-04', 74.0), ('2011-12-05', 73.0), ('2011-12-06', 73.0), ('2011-12-07', 73.0), ('2011-12-08', 73.0), ('2011-12-09', 71.0), ('2011-12-10', 73.0), ('2011-12-11', 73.0), ('2011-12-12', 77.0), ('2011-12-13', 72.0), ('2011-12-14', 71.0), ('2011-12-15', 73.0), ('2011-12-16', 70.0), ('2011-12-17', 73.0), ('2011-12-18', 69.0), ('2011-12-19', 74.0), ('2011-12-20', 72.0), ('2011-12

In [36]:
# 8. Create a DataFrame from the list of temperatures for the month of December. 
df = pd.DataFrame(Dec_list, columns=['Date','Temperature'])
df.set_index(df['Date'], inplace=True)
df = df.sort_index()
print(df.to_string(index=False))

      Date  Temperature
2010-12-01         76.0
2010-12-01         73.0
2010-12-01         72.0
2010-12-01         78.0
2010-12-01         72.0
2010-12-01         70.0
2010-12-01         71.0
2010-12-01         73.0
2010-12-02         71.0
2010-12-02         71.0
2010-12-02         73.0
2010-12-02         75.0
2010-12-02         74.0
2010-12-02         72.0
2010-12-02         74.0
2010-12-02         70.0
2010-12-03         67.0
2010-12-03         74.0
2010-12-03         70.0
2010-12-03         74.0
2010-12-03         71.0
2010-12-03         74.0
2010-12-03         72.0
2010-12-03         73.0
2010-12-04         75.0
2010-12-04         72.0
2010-12-04         77.0
2010-12-04         74.0
2010-12-04         74.0
2010-12-04         78.0
2010-12-04         74.0
2010-12-05         66.0
2010-12-05         69.0
2010-12-05         71.0
2010-12-05         78.0
2010-12-05         73.0
2010-12-05         73.0
2010-12-05         69.0
2010-12-06         65.0
2010-12-06         61.0
2010-12-06      

In [37]:
# 9. Calculate and print out the summary statistics for the Decemeber temperature DataFrame.
df.describe()

Unnamed: 0,Temperature
count,1517.0
mean,71.041529
std,3.74592
min,56.0
25%,69.0
50%,71.0
75%,74.0
max,83.0
