# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
import numpy as np
from scipy.stats import f

def variance_ratio_test(x, y):
    """
    Calculates the F-value and corresponding p-value for a variance ratio test.
    
    Parameters:
    x (array): The first array of data.
    y (array): The second array of data.
    
    Returns:
    f_value (float): The F-value for the variance ratio test.
    p_value (float): The corresponding p-value for the test.
    """
    n_x = len(x)
    n_y = len(y)
    var_x = np.var(x, ddof=1)
    var_y = np.var(y, ddof=1)
    if var_x > var_y:
        f_value = var_x / var_y
        df1 = n_x - 1
        df2 = n_y - 1
    else:
        f_value = var_y / var_x
        df1 = n_y - 1
        df2 = n_x - 1
    p_value = f.sf(f_value, df1, df2)
    return f_value, p_value


In [2]:
x = [2, 4, 6, 8, 10]
y = [1, 3, 5, 7, 9]
f_value, p_value = variance_ratio_test(x, y)
print(f"F-value: {f_value:.2f}")
print(f"p-value: {p_value:.4f}")


F-value: 1.00
p-value: 0.5000


# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [3]:
from scipy.stats import f

def critical_f_value(dfn, dfd):
    """
    Calculates the critical F-value for a two-tailed test given a significance level of 0.05
    and the degrees of freedom for the numerator and denominator of an F-distribution.
    
    Parameters:
    dfn (int): The degrees of freedom for the numerator.
    dfd (int): The degrees of freedom for the denominator.
    
    
    Returns:
    f_crit (float): The critical F-value for a two-tailed test.
    """
    alpha = 0.05
    f_crit = f.ppf(1 - alpha/2, dfn, dfd)
    return f_crit


In [4]:
dfn = 5
dfd = 10
f_crit = critical_f_value(dfn, dfd)
print(f"Critical F-value: {f_crit:.4f}")


Critical F-value: 4.2361


# Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F-value, degrees of freedom, and p-value for the test.

In [7]:
import numpy as np
from scipy.stats import f

# Set seed for reproducibility
np.random.seed(123)

# Generate random samples from two normal distributions with known variances
var1 = 4
var2 = 4
n1 = 20
n2 = 20
sample1 = np.random.normal(loc=0, scale=np.sqrt(var1), size=n1)
sample2 = np.random.normal(loc=0, scale=np.sqrt(var2), size=n2)

# Perform F-test for equal variances
f_value = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
dfn = n1 - 1
dfd = n2 - 1
p_value = f.sf(f_value, dfn, dfd) * 2  # two-tailed test

# Print results
print(f"F-value: {f_value:.4f}")
print(f"Degrees of freedom (numerator, denominator): ({dfn}, {dfd})")
print(f"p-value: {p_value:.4f}")


F-value: 1.3181
Degrees of freedom (numerator, denominator): (19, 19)
p-value: 0.5531


# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from beach population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [8]:
from scipy.stats import f

# Define the sample sizes and known variances
n1 = 12
n2 = 12
var1 = 10
var2 = 15

# Calculate the F-statistic
F = var1 / var2

# Calculate the p-value
p = 2 * (1 - f.cdf(F, n1 - 1, n2 - 1))

# Define the level of significance
alpha = 0.05

# Determine if we reject or fail to reject the null hypothesis
if p < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# Print the F-statistic, degrees of freedom, and p-value
print("F-statistic:", F)
print("Degrees of freedom:", n1 - 1, n2 - 1)
print("p-value:", p)


Fail to reject the null hypothesis.
F-statistic: 0.6666666666666666
Degrees of freedom: 11 11
p-value: 1.4876102012642


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [9]:
from scipy.stats import f

# Define the sample size, known variance, and sample variance
n = 25
var = 0.005
sample_var = 0.006

# Calculate the F-statistic
F = sample_var / var

# Calculate the p-value
p = 1 - f.cdf(F, n - 1, n - 1)

# Define the level of significance
alpha = 0.01

# Determine if we reject or fail to reject the null hypothesis
if p < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# Print the F-statistic, degrees of freedom, and p-value
print("F-statistic:", F)
print("Degrees of freedom:", n - 1, n - 1)
print("p-value:", p)


Fail to reject the null hypothesis.
F-statistic: 1.2
Degrees of freedom: 24 24
p-value: 0.3293654682817244


# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

In [10]:
def f_distribution_mean_var(numerator_df, denominator_df):
    mean = denominator_df / (denominator_df - 2)
    variance = (2 * (denominator_df ** 2) * (numerator_df + denominator_df - 2)) / ((numerator_df * (denominator_df - 2) ** 2) * (denominator_df - 4))
    return mean, variance


In [11]:
# Calculate the mean and variance of an F-distribution with 5 and 10 degrees of freedom
mean, variance = f_distribution_mean_var(5, 10)

# Print the mean and variance
print("Mean:", mean)
print("Variance:", variance)


Mean: 1.25
Variance: 1.3541666666666667


# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [12]:
from scipy.stats import f

# Define the sample sizes and sample variances
n1 = 10
n2 = 15
s1_sq = 25
s2_sq = 20

# Calculate the F-statistic
F = s1_sq / s2_sq

# Calculate the p-value
p = 2 * min(f.cdf(F, n1 - 1, n2 - 1), 1 - f.cdf(F, n1 - 1, n2 - 1))

# Define the level of significance
alpha = 0.1

# Determine if we reject or fail to reject the null hypothesis
if p < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# Print the F-statistic, degrees of freedom, and p-value
print("F-statistic:", F)
print("Degrees of freedom:", n1 - 1, n2 - 1)
print("p-value:", p)


Fail to reject the null hypothesis.
F-statistic: 1.25
Degrees of freedom: 9 14
p-value: 0.6832194382585952


# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [13]:
from scipy.stats import f

# Define the waiting times for each restaurant
a = [24, 25, 28, 23, 22, 20, 27]
b = [31, 33, 35, 30, 32, 36]

# Calculate the sample variances for each restaurant
var_a = sum((x - sum(a)/len(a))**2 for x in a) / (len(a) - 1)
var_b = sum((x - sum(b)/len(b))**2 for x in b) / (len(b) - 1)

# Calculate the F-statistic
F = var_a / var_b

# Calculate the p-value
p = 2 * min(f.cdf(F, len(a)-1, len(b)-1), 1 - f.cdf(F, len(a)-1, len(b)-1))

# Define the level of significance
alpha = 0.05

# Determine if we reject or fail to reject the null hypothesis
if p < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# Print the F-statistic, degrees of freedom, and p-value
print("F-statistic:", F)
print("Degrees of freedom:", len(a) - 1, len(b) - 1)
print("p-value:", p)


Fail to reject the null hypothesis.
F-statistic: 1.4551907719609583
Degrees of freedom: 6 5
p-value: 0.6974815747937484


# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [14]:
from scipy.stats import f

# Define the test scores for each group
a = [80, 85, 90, 92, 87, 83]
b = [75, 78, 82, 79, 81, 84]

# Calculate the sample variances for each group
var_a = sum((x - sum(a)/len(a))**2 for x in a) / (len(a) - 1)
var_b = sum((x - sum(b)/len(b))**2 for x in b) / (len(b) - 1)

# Calculate the F-statistic
F = var_a / var_b

# Calculate the p-value
p = 2 * min(f.cdf(F, len(a)-1, len(b)-1), 1 - f.cdf(F, len(a)-1, len(b)-1))

# Define the level of significance
alpha = 0.01

# Determine if we reject or fail to reject the null hypothesis
if p < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

# Print the F-statistic, degrees of freedom, and p-value
print("F-statistic:", F)
print("Degrees of freedom:", len(a) - 1, len(b) - 1)
print("p-value:", p)


Fail to reject the null hypothesis.
F-statistic: 1.9442622950819677
Degrees of freedom: 5 5
p-value: 0.4831043549070688
