In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import scipy.stats as stats
%matplotlib inline

### Chi Square test for variance

**Assumptions:** 

* Continuous data
* Normally distributed population - All the samples should be greater than 30, Central Limit Theorem states that the distribution of sample means will be normal.
* Random sampling from the population

**Example 1:** A factory claims the variance in the lifespan of its light bulbs is 1000 hours. A sample of 30 light bulbs has a sample variance of 1200 hours. Test if the sample variance significantly differs from the population variance at a 5% significance level.

Testing the null hypothesis

>$H_0:\sigma^2≤1000$

against the alternate hyothesis

>$H_1:\sigma^2> 1000$

In [2]:
#Data
pop_variance=1000
sample_variance=1200
alpha=0.05
n=30
def chi2_squ(pop,samp,n):
    test_static=(n-1)*sample_variance/pop_variance
    p_value=1-stats.chi2.cdf(test_static,n-1)
    return (test_static,p_value)
print(chi2_squ(pop_variance,sample_variance,n))   

(34.8, 0.21118233136574438)


since the p value is greater than alpha we fail to reject the null hypothesis and hence we can conclude that we dont have enough statistical evidence to say the population variance is greater than the sample variance

In [3]:
#testing the hypothesis using critical value
alpha=0.05
critical_value=stats.chi2.ppf(1-alpha,n-1)
critical_value


42.55696780429269

since t_stat is less than the critical value we fail to reject the null hypothesis

**Example 2:** A manufacturer claims that the variance in the diameter of a part is 25 mm². A sample of 40 parts has a sample variance of 30 mm². Test the hypothesis at the 1% significance level.

Testing the null hypothesis

>$H_0:\sigma^2≤25$

against the alternate hypothesis

>$H_1:\sigma^2>25$

In [4]:
#Data
pop_var=25
sam_var=30
alpha=0.01
n=40
df=n-1
#calculate the hypothesis using p value
def chi2_Squ(pop_var,sam_pop,n):
    test_stat=(n-1)*sam_var/pop_var
    p_value=stats.chi2.cdf(test_stat,df)
    return(test_stat,p_value)
print(chi2_squ(pop_var,sam_var,n))

(46.8, 0.18280344197784837)


since p value is greater than alpha we fail to reject the null hypothesis hence we can conclude that we dont have enough statistical evidence to say the population variance is greater than the sample variance

In [5]:
#calculate the hypothesis using the critical value
critical_value=stats.chi2.ppf(1-alpha,df)
critical_value

62.4281210161849

since test_stat is less than the critical value we fail to reject the null hypothesis

**Example 3:** An educational institution wants to check if the variance in scores on a standardized test among its students is less than the national variance of 20 points². A sample of 30 students from the institution has a variance of 15 points². At a 1% significance level, test if the variance is less than the national variance.

Testing the null hypothesis

>$H_0:\sigma^2≥20$

against the alternate hypothesis

>$H_1:\sigma^2<20$

In [6]:
#Data
n = 30
pop_var = 20
sample_var = 15
alpha = 0.01

In [9]:
#checking the hypothesis using p value
# Calculate test statistic
df = n - 1
test_stat = (df * sample_var) / pop_var
p_value = stats.chi2.cdf(test_stat, df)
critical_value = stats.chi2.ppf(alpha, df)


print(chi2_squ(pop_var,sam_var,n))

(34.8, 0.21118233136574438)


since p value is greater than alpha we fail to reject the null hypothesis

In [10]:
#testing the hypothesis using critical value
critical_value=stats.chi2.ppf(alpha,df)
critical_value

14.256454576274688

In [11]:
test_stat

21.75

since test_stat is greater than critical value we fail to reject the null hypothesis

**Example 4:** A production line manager claims that the variance in the defect rate per batch should be at least 1.2% squared. A sample of 25 batches has a variance of 0.9% squared. Test at a 1% significance level if the variance is less than the standard variance.

Testing the null hypothesis

>$H_0:\sigma^2≥1.2$

against the alternate hypothesis

>$H_1:\sigma^2<1.25$

In [12]:
# Given data
n = 25
pop_var = 1.2
sample_var = 0.9
alpha = 0.01

In [16]:
#testing the hypothesis using p value
def chi_var_left_tailed(pop_var, sample_var, n, alpha):
    df = n - 1
    test_stat = (df * sample_var) / pop_var
    p_value = stats.chi2.cdf(test_stat, df)  
    critical_value = stats.chi2.ppf(alpha, df)
    return test_stat, p_value, critical_value

In [17]:
test_stat, p_value, critical_value = chi_var_left_tailed(pop_var, sample_var, n, alpha)


In [18]:
p_value

0.19699161747065777

since p value is greater than alpha we fail to reject the null hypothesis

In [19]:
test_stat

18.000000000000004

In [20]:
critical_value

10.85636147553228

since test stat is greater than critical value we fail to reject the null hypothesis