# Temperature Analysis

In [2]:
import pandas as pd
from datetime import datetime as dt

In [3]:
# "tobs" is "temperature observations"
df = pd.read_csv('Resources/hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [4]:
# convert the date column format from string to datetime
df['date'] = pd.to_datetime(df['date'])

# set date as index
df.set_index('date', inplace=True)

df.head()

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.0,63
2010-01-03,USC00519397,0.0,74
2010-01-04,USC00519397,0.0,76
2010-01-06,USC00519397,,73


### Compare June and December data across all years 

In [6]:
from scipy import stats

In [7]:
# Filter data for desired months

df_june = df[pd.DatetimeIndex(df.index).month == 6]
df_dec = df[pd.DatetimeIndex(df.index).month == 12]

df_june.head(), df_dec.head()

(                station  prcp  tobs
 date                               
 2010-06-01  USC00519397  0.00    78
 2010-06-02  USC00519397  0.01    76
 2010-06-03  USC00519397  0.00    78
 2010-06-04  USC00519397  0.00    76
 2010-06-05  USC00519397  0.00    77,
                 station  prcp  tobs
 date                               
 2010-12-01  USC00519397  0.04    76
 2010-12-03  USC00519397  0.00    74
 2010-12-04  USC00519397  0.00    74
 2010-12-06  USC00519397  0.00    64
 2010-12-07  USC00519397  0.00    64)

In [8]:
# Identify the average temperature for June
june_avg = df_june['tobs'].mean()

june_avg

74.94411764705882

In [9]:
# Identify the average temperature for December
dec_avg = df_dec['tobs'].mean()

dec_avg

71.04152933421226

In [36]:
# Create collections of temperature data
rand_state = 42

a = list(df_june['tobs'].sample(n=50, random_state=rand_state)) #take 50 samples from june
b = list(df_dec['tobs'].sample(n=50, random_state=rand_state)) #take 50 samples from dec
     

In [42]:
# Run un-paired t-test

tStat, pValue = stats.ttest_ind(a, b, equal_var = False) #run independent sample T-Test
tStat, pValue

(4.4833328532586645, 2.0144292854007394e-05)

In [43]:
# Run paired t-test
tStat, pValue =  stats.ttest_rel(a, b)
tStat, pValue

(4.9718941000089725, 8.51846416693701e-06)

### Analysis

The difference in temperatures in Hawaii in June and December is stastically significant. The t-test shows a very low p-value.