# Hypothesis Testing with One Sample

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

In [2]:
# Read in the data
df = pd.read_csv('../data/wine.csv')

In [3]:
# Nilai rata-rata pH di atas 3.29?
print('H0 : rata-rata kualitas anggur adalah 3.29')
print('H1 : rata-rata kualitas anggur > 3.29')
print('alpha = 0.05')

# Daerah Kritis : z > z_alpha
# z_alpha = 1.645
print('Daerah Kritis : z > 1.645')

z = (df['pH'].mean() - 3.29) / (df['pH'].std() / np.sqrt(len(df['pH'])))
print('z =', z)

# convert to p-value (right-tailed)
p = scipy.stats.norm.sf(abs(z))
print('p =', p)

if p > 0.05:
    print('H0 diterima')
    print('H1 ditolak')
    print('Rata-rata kualitas anggur = 3.29')
else:
    print('H0 ditolak')
    print('H1 diterima')
    print('Rata-rata kualitas anggur > 3.29')

print('Kenyataan :', df['pH'].mean())

H0 : rata-rata kualitas anggur adalah 3.29
H1 : rata-rata kualitas anggur > 3.29
alpha = 0.05
Daerah Kritis : z > 1.645
z = 4.103780793366056
p = 2.0322630043219585e-05
H0 ditolak
H1 diterima
Rata-rata kualitas anggur > 3.29
Kenyataan : 3.303610000000003


In [4]:
# Nilai rata-rata residual sugar tidak sama dengan 2.5?
print('H0 : rata-rata residual sugar adalah 2.5')
print('H1 : rata-rata residual sugar != 2.5')
print('alpha = 0.05')

# Daerah Kritis : z < -z_alpha atau z > z_alpha
# z_alpha = 1.645
print('Daerah Kritis : z < -1.645 atau z > 1.645')

z = (df['residual sugar'].mean() - 2.5) / (df['residual sugar'].std() / np.sqrt(len(df['residual sugar'])))
print('z =', z)

# convert to p-value (two-tailed)
p = scipy.stats.norm.sf(abs(z)) * 2
print('p =', p)

if p > 0.05:
    print('H0 diterima')
    print('H1 ditolak')
    print('Rata-rata residual sugar = 2.5')
else:
    print('H0 ditolak')
    print('H1 diterima')
    print('Rata-rata residual sugar != 2.5')

print('Kenyataan :', df['residual sugar'].mean())

H0 : rata-rata residual sugar adalah 2.5
H1 : rata-rata residual sugar != 2.5
alpha = 0.05
Daerah Kritis : z < -1.645 atau z > 1.645
z = 2.147961943553881
p = 0.03171677881873312
H0 ditolak
H1 diterima
Rata-rata residual sugar != 2.5
Kenyataan : 2.5671036825067572


In [8]:
# Nilai rata-rata 150 baris pertama kolom sulphates sama dengan 0.65?
print('H0 : rata-rata sulphates = 0.65')
print('H1 : rata-rata sulphates != 0.65')
print('alpha = 0.05')

# Daerah Kritis : z < -z_alpha atau z > z_alpha
# z_alpha = 1.645
print('Daerah Kritis : z < -1.645 atau z > 1.645')

z = (df['sulphates'].iloc[:150].mean() - 0.65) / (df['sulphates'].iloc[:150].std() / np.sqrt(len(df['sulphates'].iloc[:150])))
print('z =', z)

# convert to p-value (two-tailed)
p = scipy.stats.norm.sf(abs(z)) * 2
print('p =', p)

if p > 0.05:
    print('H0 diterima')
    print('H1 ditolak')
    print('Rata-rata sulphates = 0.65')
else:
    print('H0 ditolak')
    print('H1 diterima')
    print('Rata-rata sulphates != 0.65')

print('Kenyataan: ', df['sulphates'].iloc[:150].mean())

H0 : rata-rata sulphates = 0.65
H1 : rata-rata sulphates != 0.65
alpha = 0.05
Daerah Kritis : z < -1.645 atau z > 1.645
z = -4.964843393315906
p = 6.875652918327764e-07
H0 ditolak
H1 diterima
Rata-rata sulphates != 0.65
Kenyataan:  0.6058666666666668


In [6]:
# Nilai rata-rata total sulfur dioxide di bawah 35?
print('H0 : rata-rata total sulfur dioxide = 35')
print('H1 : rata-rata total sulfur dioxide < 35')
print('alpha = 0.05')

# Daerah Kritis : z < -z_alpha
# z_alpha = 1.645
print('Daerah Kritis : z < -1.645')

z = (df['total sulfur dioxide'].mean() - 35) / (df['total sulfur dioxide'].std() / np.sqrt(len(df['total sulfur dioxide'])))
print('z =', z)

# convert to p-value (left-tailed)
p = scipy.stats.norm.sf(abs(z))
print('p =', p)

if p > 0.05:
    print('H0 diterima')
    print('H1 ditolak')
    print('Rata-rata total sulfur dioxide = 35')
else:
    print('H0 ditolak')
    print('H1 diterima')
    print('Rata-rata total sulfur dioxide < 35')

print('Kenyataan: ', df['total sulfur dioxide'].mean())

H0 : rata-rata total sulfur dioxide = 35
H1 : rata-rata total sulfur dioxide < 35
alpha = 0.05
Daerah Kritis : z < -1.645
z = 16.786387372296968
p = 1.5348545974028345e-63
H0 ditolak
H1 diterima
Rata-rata total sulfur dioxide < 35
Kenyataan:  40.290150000000075


In [7]:
# Proporsi nilai total Sulfat Dioxide yang lebih dari 40, adalah tidak sama dengan 50%?
print('H0 : proporsi nilai total sulfur dioxide > 40 = 50%')
print('H1 : proporsi nilai total sulfur dioxide > 40 != 50%')
print('alpha = 0.05')

# Daerah Kritis : z < -z_alpha atau z > z_alpha
# z_alpha = 1.645
print('Daerah Kritis : z < -1.645 atau z > 1.645')

z = (df[df['total sulfur dioxide'] > 40].shape[0] / df.shape[0] - 0.5) / (np.sqrt(0.5 * 0.5 / df.shape[0]))
print('z =', z)

# convert to p-value (two-tailed)
p = scipy.stats.norm.sf(abs(z)) * 2
print('p =', p)

if p > 0.05:
    print('H0 diterima')
    print('H1 ditolak')
    print('Proporsi nilai total sulfur dioxide > 40 = 50%')
else:
    print('H0 ditolak')
    print('H1 diterima')
    print('Proporsi nilai total sulfur dioxide > 40 != 50%')

print('Kenyataan: ', df[df['total sulfur dioxide'] > 40].shape[0] / df.shape[0])

H0 : proporsi nilai total sulfur dioxide > 40 = 50%
H1 : proporsi nilai total sulfur dioxide > 40 != 50%
alpha = 0.05
Daerah Kritis : z < -1.645 atau z > 1.645
z = 0.7589466384404118
p = 0.4478844782641115
H0 diterima
H1 ditolak
Proporsi nilai total sulfur dioxide > 40 = 50%
Kenyataan:  0.512
