## Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [45]:
import scipy.stats as stat
import numpy as np

# making func which takes 2 arrays
def f_test(array1, array2):
    # f-value by dividing varienc of both arrays
    f = np.var(array1, ddof=1) / np.var(array2, ddof=1)
    
    # degree of freedom of both arrays
    dof1 = array1.size -1
    dof2 = array2.size -1
    
    # p-value is (1- cdf)
    p_val = 1- stat.f.cdf(f, dof1, dof2)
    
    print(f"F-Value = {f:.4f} \nP-Value = {p_val:.4f}")
    
# sample arrays
a = np.linspace(9.5, 11, 15)
b = np.linspace(10, 11, 14)

# testing func
f_test(a,b)


F-Value = 2.2172 
P-Value = 0.0802


## Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [46]:
import scipy.stats as stat

# creating a func which takes degree of freedom of numerator as num and denominator as deno
def f_val(num , den):
    
    # given
    alpha = 0.05
    # f
    f_critical = stat.f.ppf(1- alpha/2, num, den)
    
    return f'f-value = {f_critical:.4f}'

num = 5
den = 6

f_val(num, den)

'f-value = 5.9876'

## Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F- value, degrees of freedom, and p-value for the test.

In [85]:
import numpy as np
from scipy.stats import f

# known variance, rest all assumed
n1 = 20
n2 = 25
mean1 = 10
mean2 = 11
variance1 = 9
variance2 = 12

# generating samples
np.random.seed(1)
sample1 = np.random.normal(mean1, np.sqrt(variance1), n1)
sample2 = np.random.normal(mean2, np.sqrt(variance2), n2)

# calculating Degree of freedom
dof1 = sample1.size - 1
dof2 = sample2.size - 1

# calculating f- value
f_val = np.var(sample1)/np.var(sample2)

# calculating p- value
p_val = 1 - stat.f.cdf(f_val, dof1, dof2)

# output
print(f'F-value = {f_val:.4f} \nP-value = {p_val:.4f} \nDegree of Freedom 1= {dof1} \nDegree of Freedom 2= {dof2}')

F-value = 1.3184 
P-value = 0.2583 
Degree of Freedom 1= 19 
Degree of Freedom 2= 24


## The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [95]:
import scipy.stats as stat

# Null hypothesis:
## variances are equal 

# Alternate hypothesis :
## variances are significantly different

# Given
var1= 10
var2= 15
n1= 12
n2= 12
alpha = 0.05

# calculating f-value
f_val = var1/var2

# calculating f-critical
f_critical = stat.f.ppf((1-alpha) , (n1-1), (n2-2)) 

# decision making
if f_val < f_critical:
    print('Fail to reject null hypothesis, variances are equal')
else:
    print('Reject the null hypothesis, variances are significantly different')

Fail to reject null hypothesis, variances are equal


## A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [119]:
import scipy.stats as stat

# Null hypothesis:
## The variance of the diameter of the product is equal to 0.005.

# Alternative hypothesis:
## The variance of the diameter of the product is greater than 0.005.

# given
var1 = 0.006
var2 = 0.005
n1 = 25
alpha = 0.1

# degree of freedom of numerator
dof1 = n1- 1

# calculating f-value
f_val = var1/var2

# calculating p-value, dof of denominator = 10000(considering population size to be a big number)
p_val = 1 -  stat.f.cdf(f_val, dof1, 10000)

if p_val< alpha:
    print('Reject the null hypothesis, The variance of the diameter of the product is greater than 0.005.')
else: 
    print("Fail to reject the null hypothesis, The variance of the diameter of the product is equal to 0.005.")
    
print(f'p-value = {p_val:.4f}')

Fail to reject the null hypothesis, The variance of the diameter of the product is equal to 0.005.
p-value = 0.2281


## Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

In [129]:
# dfn = degree of freedom of numerator
# dfd = degree of freedom of denominator

# creating a function for calculating mean and variance 
# takes 2 arguments 
def fMeanVariacne(dfn, dfd):
    
    # raise error if any of the two values is negative or zero
    if dfn <= 0 or dfd <= 0:
        raise TypeError('Values can not be zero or less than zero')
    
    # raise error if 1st aurgument is odd 
    if dfn % 2 != 0:
        raise TypeError('Degree of freedom of numerator has to be even')
        
    
    # calculations for mean and variance     
    mean = dfd / (dfd - 2)
    variance = (2 * dfd ** 2 * (dfn + dfd - 2)) / (dfn * (dfd - 2) ** 2 * (dfd - 4))

    return mean, variance

# testing output
fMeanVariacne(6, 10)
        


(1.25, 1.2152777777777777)

## A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [132]:
import scipy.stats as stat

# Null hypothesis :  variances are not significantly different
# Alternate hypothesis :  variances are significantly different

# given
n1 = 10
var1 = 25
n2 = 15
var2 = 20
alpha = 0.1

# calculating degree of freedom
dfn = n1 -1
dfd = n2- 1

# calculating the f-value of the distributions
f_val = var1 / var2

# calcuclating the p-value
p_val = 1 - stat.f.cdf(f_val, dfn, dfd)

# decision making
if p_val < alpha:
    print('Reject the null hypothesis,  variances are significantly different')
else:
    print('Fail to reject the null hypothesis,  variances are not significantly different')

Fail to reject the null hypothesis,  variances are not significantly different


## The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [137]:
import scipy.stats as stat

# Null hypothesis :  variances are not significantly different
# Alternate hypothesis :  variances are significantly different

# given
A = [24, 25, 28, 23, 22, 20, 27]
B = [31, 33, 35, 30, 32, 36]
alpha = 0.05

# length of samples
n1 = len(A)
n2 = len(B)

# variances of samples
var1 = np.var(A)
var2 = np.var(B)

# calculating degree of freedom
dfn = n1 -1           # numerator
dfd = n2- 1           # denominator

# calculating the f-value of the distributions
f_val = var1 / var2

# calcuclating the p-value
p_val = 1 - stat.f.cdf(f_val, dfn, dfd)

# decision making
if p_val < alpha:
    print('Reject the null hypothesis,  variances are significantly different')
else:
    print('Fail to reject the null hypothesis,  variances are not significantly different')

Fail to reject the null hypothesis,  variances are not significantly different


## The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [139]:
import scipy.stats as stat

# Null hypothesis :  variances are not significantly different
# Alternate hypothesis :  variances are significantly different

# given
A = [80, 85, 90, 92, 87, 83]
B = [75, 78, 82, 79, 81, 84]
alpha = 0.05

# length of samples
n1 = len(A)
n2 = len(B)

# variances of samples
var1 = np.var(A)
var2 = np.var(B)

# calculating degree of freedom
dfn = n1 -1           # numerator
dfd = n2- 1           # denominator

# calculating the f-value of the distributions
f_val = var1 / var2

# calcuclating the p-value
p_val = 1 - stat.f.cdf(f_val, dfn, dfd)

# decision making
if p_val < alpha:
    print('Reject the null hypothesis,  variances are significantly different')
else:
    print('Fail to reject the null hypothesis,  variances are not significantly different')

Fail to reject the null hypothesis,  variances are not significantly different
