# Bonus: Temperature Analysis I

In [1]:
import pandas as pd
from datetime import datetime as dt


In [2]:
# "tobs" is "temperature observations"
df = pd.read_csv('hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,01/01/2010,0.08,65
1,USC00519397,02/01/2010,0.0,63
2,USC00519397,03/01/2010,0.0,74
3,USC00519397,04/01/2010,0.0,76
4,USC00519397,06/01/2010,,73


In [3]:
# Convert the date column format from string to datetime
df = pd.DataFrame({'date':[2010-10-15]})
print(df)
df.info()

   date
0  1985
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   date    1 non-null      int64
dtypes: int64(1)
memory usage: 136.0 bytes


In [4]:
# Set the date column as the DataFrame index
df.set_index('date')

1985


In [5]:
# Drop the date column
df.drop(['date'], axis=1)

0


### Compare June and December data across all years 

In [6]:
from scipy import stats
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.types import Date
from sqlalchemy.orm import Session
from sqlalchemy import func
from sqlalchemy.sql.expression import extract
import pandas as pd 
from scipy.stats import ttest_ind
import numpy as np

In [7]:
# set up Base

Base = declarative_base()

In [8]:
#create class for measurement table

class Measurement(Base):
    __tablename__ = "measurement"
    
    id = Column(Integer, primary_key=True)
    station = Column(String)
    date = Column(Date)
    prcp = Column(Float)
    tobs = Column(Float)

In [9]:
#create class for station table

class Station(Base):
    __tablename__ = "station"
    
    id = Column(Integer, primary_key=True)
    station = Column(String)
    name = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)
    elevation =  Column(Float)

In [10]:
 #create engine and session to link to the database

engine = create_engine("sqlite:///Resources/hawaii.sqlite")
conn = engine.connect()
session = Session(bind=engine) # If something breaks, check here <-



In [11]:
# get all temperatures during June at all stations in all years

june_df = session.query(Measurement.date, Measurement.tobs)\
    .filter(extract("month", Measurement.date) == "6").all()



In [12]:
# convert to dataframe
june_df = pd.DataFrame(june_df, columns = ["Date", "Temperature"])

june_df

Unnamed: 0,Date,Temperature
0,2010-06-01,78.0
1,2010-06-02,76.0
2,2010-06-03,78.0
3,2010-06-04,76.0
4,2010-06-05,77.0
...,...,...
1695,2017-06-26,79.0
1696,2017-06-27,74.0
1697,2017-06-28,74.0
1698,2017-06-29,76.0


In [13]:
# get all temperatures during December at all stations in all years

december_df = session.query(Measurement.date, Measurement.tobs)\
    .filter(extract("month", Measurement.date) == "12").all()

In [14]:
# convert to dataframe
december_df = pd.DataFrame(december_df, columns = ["Date", "Temperature"])

december_df

Unnamed: 0,Date,Temperature
0,2010-12-01,76.0
1,2010-12-03,74.0
2,2010-12-04,74.0
3,2010-12-06,64.0
4,2010-12-07,64.0
...,...,...
1512,2016-12-27,71.0
1513,2016-12-28,71.0
1514,2016-12-29,69.0
1515,2016-12-30,65.0


In [15]:
# get average June temperature
june_avg = june_df["Temperature"].mean()
june_avg

74.94411764705882

In [16]:
# get average December temperature
december_avg = december_df["Temperature"].mean()
december_avg

71.04152933421226

In [17]:
# Create collections of temperature data

collections_temp_data = [june_df, december_df]

In [18]:
# Run paired t-test

stats.ttest_ind(june_df['Temperature'], december_df['Temperature'])

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)

### Analysis

In [None]:
#Paired t-test was used to calculate the two different independent samples: 
#temperature for June vs temperature for December across all available years in the dataset.
#Results: t = 31.60, p = 3.90
#Concluion: There is not sufficient evidence to conclude that there is a significant difference in means between 
#June and December temperatures across all years available.