## Overview

In [1]:
# For each of the following questions, formulate a null and alternative hypothesis 
# (be as specific as you can be), then give an example of what a true positive, true negative,
# type I and type II errors would look like. Note that some of the questions are 
# intentionally phrased in a vague way. It is your job to reword these as more 
# precise questions that could be tested.

# Has the network latency gone up since we switched internet service providers?

# Null - The network latency has not changed since we changed internet service providers.
# Alternative - The network latency has gotten worse since we changed internet service providers.
# True Positive - The null hypothesis is not true and we rejected it
# True Negative - The null hypothesis is true and we accepted it.
# Type I - The null hypothesis was rejected but it's true.
# Type II - The null hypothesis was not rejected but it's false.

# Is the website redesign any good?
# Is the website redesign attracting more users?

# Null - The website redesign is not changing the amount of users.
# Alternative - The website redesign is increasing the amount of users.
# True Positive - The null hypothesis is not true and we rejected it
# True Negative - The null hypothesis is true and we accepted it.
# Type I - The null hypothesis was rejected but it's true.
# Type II - The null hypothesis was not rejected but it's false. 

# Is our television ad driving more sales?

# Null - The television add has not changed the sales.
# Alternative - The television add has increased the sales.
# True Positive - The null hypothesis is not true and we rejected it
# True Negative - The null hypothesis is true and we accepted it.
# Type I - The null hypothesis was rejected but it's true.
# Type II - The null hypothesis was not rejected but it's false.


## T-Test

In [70]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from pydataset import data
import seaborn as sns
alpha = .05

In [71]:
# Ace Realty wants to determine whether the average time it takes to sell homes is different 
# for its two offices. A sample of 40 sales from office #1 revealed a mean of 90 days 
# and a standard deviation of 15 days. A sample of 50 sales from office #2 revealed a mean 
# of 100 days and a standard deviation of 20 days. Use a .05 level of significance.

# Null - There is no difference in the average time it takes to sell homes in the different offices.

office1 = stats.norm(90, 15)
avg_time1 = office1.pdf(40)

office2 = stats.norm(100, 20)
avg_time2 = office1.pdf(50)

office1_rand = stats.norm.rvs(90, 15, size=100)
office2_rand = stats.norm.rvs(100, 20, size=100)
f, p = stats.f_oneway(office1_rand, office2_rand)
if p < alpha:
    print("We reject $H_{0}$")
else:
    print("We fail to reject $H_{0}$")


We fail to reject $H_{0}$


In [84]:
f, p = stats.ttest_ind_from_stats(90,15,40,100,20,50,equal_var=False)
if p < alpha:
    print("We reject $H_{0}$")
else:
    print("We fail to reject $H_{0}$")

We reject $H_{0}$


In [72]:
# Load the mpg dataset and use it to answer the following questions:

mpg = data('mpg')

# Is there a difference in fuel-efficiency in cars from 2008 vs 1999?

mpg['average_mileage'] = ((mpg.cty + mpg.hwy)/2)
mpg_2008 = mpg.loc[(mpg['year'] == 2008)]
mpg_1999 = mpg.loc[(mpg['year'] == 1999)]
fuel_eff_2008 = mpg_2008.average_mileage.mean()
fuel_eff_1999 = mpg_1999.average_mileage.mean()
print(mpg_2008.average_mileage.var())
print(mpg_1999.average_mileage.var())
f, p = stats.f_oneway(mpg_2008.average_mileage, mpg_1999.average_mileage)
if p < alpha:
    print("We reject $H_{0}$")
else:
    print("We fail to reject $H_{0}$")

24.097480106100797
27.122605363984682
We fail to reject $H_{0}$


In [73]:
# Are compact cars more fuel-efficient than the average car?

compact = ['compact']
mpg_avg_car = mpg[~mpg['class'].isin(compact)]
mpg_compact = mpg.loc[(mpg['class'] == 'compact')]
t, p = stats.ttest_ind(mpg_compact.average_mileage, mpg_avg_car.average_mileage, equal_var=False)
print("is p/2 < alpha? ", p / 2 < alpha)
print("is t > 0? ", t > 0)
if p / 2 > alpha:
    print("We fail to reject $H_{0}$")
elif t < 0:
    print("We fail to reject $H_{0}$")
else:
    print("We reject $H_{0}$")

is p/2 < alpha?  True
is t > 0?  True
We reject $H_{0}$


In [76]:
# Do manual cars get better gas mileage than automatic cars?

mpg['is_automatic'] = mpg.trans.str.startswith('a')
mpg_auto = mpg.loc[(mpg['is_automatic'] == True)]
mpg_manual = mpg.loc[(mpg['is_automatic'] == False)]
print(mpg_auto.average_mileage.var())
print(mpg_manual.average_mileage.var())
t, p = stats.ttest_ind(mpg_manual.average_mileage, mpg_auto.average_mileage, equal_var=False)
print("is p/2 < alpha? ", p / 2 < alpha)
print("is t > 0? ", t > 0)
if p / 2 > alpha:
    print("We fail to reject $H_{0}$")
elif t < 0:
    print("We fail to reject $H_{0}$")
else:
    print("We reject $H_{0}$")

21.942777233382337
26.635167464114826
is p/2 < alpha?  True
is t > 0?  True
We reject $H_{0}$


In [75]:
mpg

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class,average_mileage,is_automatic
1,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact,23.5,True
2,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact,25.0,False
3,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact,25.5,False
4,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact,25.5,True
5,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact,21.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,volkswagen,passat,2.0,2008,4,auto(s6),f,19,28,p,midsize,23.5,True
231,volkswagen,passat,2.0,2008,4,manual(m6),f,21,29,p,midsize,25.0,False
232,volkswagen,passat,2.8,1999,6,auto(l5),f,16,26,p,midsize,21.0,True
233,volkswagen,passat,2.8,1999,6,manual(m5),f,18,26,p,midsize,22.0,False
