# Bonus: Temperature Analysis I

In [1]:
import pandas as pd
from datetime import datetime as dt
from scipy.stats import ttest_ind, ttest_rel

# Using Pandas

In [2]:
# The "tobs" is "temperature observations"
df = pd.read_csv('Resources/hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [3]:
# Check the data types
df.dtypes

station     object
date        object
prcp       float64
tobs         int64
dtype: object

In [4]:
# Convert the date column format from string to datetime
df['date']=df['date'].astype("datetime64")

In [5]:
# Check the data types
df.dtypes

station            object
date       datetime64[ns]
prcp              float64
tobs                int64
dtype: object

In [6]:
# Set the date column as the DataFrame index and drop the data column
df_dateindex = df.set_index("date")
df_dateindex.head()

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.0,63
2010-01-03,USC00519397,0.0,74
2010-01-04,USC00519397,0.0,76
2010-01-06,USC00519397,,73


In [7]:
df_dateindex.dtypes

station     object
prcp       float64
tobs         int64
dtype: object

# Using SQLAlchemy ORM 

In [8]:
# Import dependencies
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

import numpy as np
import pandas as pd
import datetime as dt

In [9]:
# Create engine to hawaii.sqlite
engine = create_engine("sqlite:///Resources/hawaii.sqlite")

In [10]:
# Reflect an existing database into a new model
Base = automap_base()

# Reflect the tables
Base.prepare(engine, reflect=True)

In [11]:
# Save references to each table
Measurement = Base.classes.measurement
Station = Base.classes.station

In [12]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [13]:
# Read SQL and save in measurement dataframe
measurement_df = pd.read_sql('select * from measurement', engine.connect())
measurement_df.head()

Unnamed: 0,id,station,date,prcp,tobs
0,1,USC00519397,2010-01-01,0.08,65.0
1,2,USC00519397,2010-01-02,0.0,63.0
2,3,USC00519397,2010-01-03,0.0,74.0
3,4,USC00519397,2010-01-04,0.0,76.0
4,5,USC00519397,2010-01-06,,73.0


In [14]:
# Convert the date column format from string to datetime and drop the data column
measurement_df['date'] = measurement_df['date'].astype('datetime64')
measurement = measurement_df.set_index("date")
measurement.head()

Unnamed: 0_level_0,id,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01,1,USC00519397,0.08,65.0
2010-01-02,2,USC00519397,0.0,63.0
2010-01-03,3,USC00519397,0.0,74.0
2010-01-04,4,USC00519397,0.0,76.0
2010-01-06,5,USC00519397,,73.0


### Compare June and December data across all years 

In [15]:
from scipy import stats

In [16]:
# Filter data for June months
june = session.query(Measurement.date, Measurement.tobs).\
    filter(func.strftime("%m",Measurement.date)=="06").all()

# Save query results in a June dataframe
june_df = pd.DataFrame(june, columns=['date','tobs'])
june_df.head()

Unnamed: 0,date,tobs
0,2010-06-01,78.0
1,2010-06-02,76.0
2,2010-06-03,78.0
3,2010-06-04,76.0
4,2010-06-05,77.0


In [17]:
# Filter data for December months
december = session.query(Measurement.date, Measurement.tobs).\
    filter(func.strftime("%m",Measurement.date)=="12").all()

# Save query results in a June dataframe
december_df = pd.DataFrame(december, columns=['date','tobs'])
december_df.head()

Unnamed: 0,date,tobs
0,2010-12-01,76.0
1,2010-12-03,74.0
2,2010-12-04,74.0
3,2010-12-06,64.0
4,2010-12-07,64.0


In [18]:
# Identify the average temperature for June
ave_temp_june = session.query(func.avg(Measurement.tobs)).\
    filter(func.strftime("%m",Measurement.date)=="06").all()
ave_temp_june 

[(74.94411764705882)]

In [19]:
# Identify the average temperature for December
ave_temp_december = session.query(func.avg(Measurement.tobs)).\
    filter(func.strftime("%m",Measurement.date)=="12").all()
ave_temp_december 

[(71.04152933421226)]

In [20]:
# Run unpaired t-test
ttest_results = ttest_ind(june_df['tobs'], december_df['tobs'])
ttest_results

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)

### T-test Analysis

We used the paired t-test to calculate the means between June temperature and December temperature.  The **p-value** is **3.90**, which *exceeds the 0.5 level of significance*, means that there is no strong evidence that that there is significant difference between the two-month tobs--June and December.