# Hypothesis Testing with One Sample

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

df = pd.read_csv('../data/wine.csv')

In [2]:
print('H0 : mean of pH is 3.29')
print('H1 : mean of pH > 3.29')
print('alpha = 0.05')

print('Critical Region : z > 1.645')

z = (df['pH'].mean() - 3.29) / (df['pH'].std() / np.sqrt(len(df['pH'])))
print('z =', z)

# convert to p-value (right-tailed)
p = scipy.stats.norm.sf(abs(z))
print('p =', p)

if p > 0.05:
    print('H0 failed to reject')
    print('Mean of pH = 3.29')
else:
    print('H0 rejected')
    print('Mean of pH > 3.29')

print('Reality:', df['pH'].mean())

H0 : mean of pH is 3.29
H1 : mean of pH > 3.29
alpha = 0.05
Critical Region : z > 1.645
z = 4.1037807933651145
p = 2.0322630043302333e-05
H0 rejected
Mean of pH > 3.29
Reality: 3.30361


In [3]:
print('H0 : mean of residual sugar is 2.5')
print('H1 : mean of residual sugar != 2.5')
print('alpha = 0.05')

print('Critical Region : z < -1.645 or z > 1.645')

z = (df['residual sugar'].mean() - 2.5) / (df['residual sugar'].std() / np.sqrt(len(df['residual sugar'])))
print('z =', z)

# convert to p-value (two-tailed)
p = scipy.stats.norm.sf(abs(z)) * 2
print('p =', p)

if p > 0.05:
    print('H0 failed to reject')
    print('Mean of residual sugar = 2.5')
else:
    print('H0 rejected')
    print('Mean of residual sugar != 2.5')

print('Reality:', df['residual sugar'].mean())

H0 : mean of residual sugar is 2.5
H1 : mean of residual sugar != 2.5
alpha = 0.05
Critical Region : z < -1.645 or z > 1.645
z = 2.1479619435539523
p = 0.031716778818727434
H0 rejected
Mean of residual sugar != 2.5
Reality: 2.5671036825067595


In [4]:
print('H0 : mean of sulphates = 0.65')
print('H1 : mean of sulphates != 0.65')
print('alpha = 0.05')

print('Critical Region : z < -1.645 or z > 1.645')

z = (df['sulphates'].iloc[:150].mean() - 0.65) / (df['sulphates'].iloc[:150].std() / np.sqrt(len(df['sulphates'].iloc[:150])))
print('z =', z)

# convert to p-value (two-tailed)
p = scipy.stats.norm.sf(abs(z)) * 2
print('p =', p)

if p > 0.05:
    print('H0 failed to reject')
    print('Mean of sulphates = 0.65')
else:
    print('H0 rejected')
    print('Mean of sulphates != 0.65')

print('Reality: ', df['sulphates'].iloc[:150].mean())

H0 : mean of sulphates = 0.65
H1 : mean of sulphates != 0.65
alpha = 0.05
Critical Region : z < -1.645 or z > 1.645
z = -4.964843393315918
p = 6.875652918327357e-07
H0 rejected
Mean of sulphates != 0.65
Reality:  0.6058666666666667


In [5]:
print('H0 : mean of total sulfur dioxide = 35')
print('H1 : mean of total sulfur dioxide < 35')
print('alpha = 0.05')

print('Critical Region : z < -1.645')

z = (df['total sulfur dioxide'].mean() - 35) / (df['total sulfur dioxide'].std() / np.sqrt(len(df['total sulfur dioxide'])))
print('z =', z)

# convert to p-value (left-tailed)
p = scipy.stats.norm.sf(abs(z))
print('p =', p)

if p > 0.05:
    print('H0 failed to reject')
    print('Mean of total sulfur dioxide = 35')
else:
    print('H0 rejected')
    print('Mean of total sulfur dioxide < 35')

print('Reality: ', df['total sulfur dioxide'].mean())

H0 : mean of total sulfur dioxide = 35
H1 : mean of total sulfur dioxide < 35
alpha = 0.05
Critical Region : z < -1.645
z = 16.786387372296744
p = 1.5348545974086137e-63
H0 rejected
Mean of total sulfur dioxide < 35
Reality:  40.290150000000004


In [6]:
print('H0 : proportion of total sulfur dioxide > 40 = 50%')
print('H1 : proportion of total sulfur dioxide > 40 != 50%')
print('alpha = 0.05')

print('Critical Region : z < -1.645 or z > 1.645')

z = (df[df['total sulfur dioxide'] > 40].shape[0] / df.shape[0] - 0.5) / (np.sqrt(0.5 * 0.5 / df.shape[0]))
print('z =', z)

# convert to p-value (two-tailed)
p = scipy.stats.norm.sf(abs(z)) * 2
print('p =', p)

if p > 0.05:
    print('H0 failed to reject')
    print('Proportion of total sulfur dioxide > 40 = 50%')
else:
    print('H0 rejected')
    print('Proportion of total sulfur dioxide > 40 != 50%')

print('Reality: ', df[df['total sulfur dioxide'] > 40].shape[0] / df.shape[0])

H0 : proportion of total sulfur dioxide > 40 = 50%
H1 : proportion of total sulfur dioxide > 40 != 50%
alpha = 0.05
Critical Region : z < -1.645 or z > 1.645
z = 0.7589466384404118
p = 0.4478844782641115
H0 failed to reject
Proportion of total sulfur dioxide > 40 = 50%
Reality:  0.512
