In [145]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

In [95]:
# "tobs" is "temperature observations"
df = pd.read_csv('hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [96]:
# Convert the date column format from string to datetime
df['date'] =  pd.to_datetime(df['date'], format='%Y/%m/%d')
df['date'].head()

0   2010-01-01
1   2010-01-02
2   2010-01-03
3   2010-01-04
4   2010-01-06
Name: date, dtype: datetime64[ns]

In [97]:
# Set the date column as the DataFrame index
date_df = pd.read_csv('hawaii_measurements.csv')
date_df.set_index("date", inplace=True)
date_df.head()

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.0,63
2010-01-03,USC00519397,0.0,74
2010-01-04,USC00519397,0.0,76
2010-01-06,USC00519397,,73


In [98]:
# Drop the date column
drop_df = pd.read_csv('hawaii_measurements.csv')
drop_df.drop("date", axis=1, inplace=True)
drop_df.head()

Unnamed: 0,station,prcp,tobs
0,USC00519397,0.08,65
1,USC00519397,0.0,63
2,USC00519397,0.0,74
3,USC00519397,0.0,76
4,USC00519397,,73


# Compare June and December data across all years

In [99]:
from scipy import stats
from scipy.stats import ttest_ind

In [100]:
# Filtering data for June across all years
june_filt = df[df["date"].dt.month == 6]
june_filt.head()

Unnamed: 0,station,date,prcp,tobs
133,USC00519397,2010-06-01,0.0,78
134,USC00519397,2010-06-02,0.01,76
135,USC00519397,2010-06-03,0.0,78
136,USC00519397,2010-06-04,0.0,76
137,USC00519397,2010-06-05,0.0,77


In [101]:
# Filtering data for December across all years
dec_filt = df[df['date'].dt.month == 12]
dec_filt.head()

Unnamed: 0,station,date,prcp,tobs
305,USC00519397,2010-12-01,0.04,76
306,USC00519397,2010-12-03,0.0,74
307,USC00519397,2010-12-04,0.0,74
308,USC00519397,2010-12-06,0.0,64
309,USC00519397,2010-12-07,0.0,64


In [147]:
# Identifying the average temperature for June
june_avg =june_filt["tobs"].mean()

In [146]:
np.mean(june_filt["tobs"],axis = 0)

74.94411764705882

In [149]:
# Identifying the average temperature for December
dec_avg = dec_filt["tobs"].mean()

In [148]:
np.mean(dec_filt["tobs"],axis = 0)

71.04152933421226

In [137]:
# Creating collections of temperature data
new = [june_filt["tobs"], dec_filt["tobs"]].copy()

In [139]:
# Running paired t-test
ttest_ind(june_filt["tobs"], dec_filt["tobs"])

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)