In [2]:
# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio 
# test. The function should return the F-value and the corresponding p-value for the test.

In [3]:
import numpy as np
from scipy.stats import f

def variance_ratio_test(data1, data2):
    n1 = len(data1)
    n2 = len(data2)
    df1 = n1 - 1
    df2 = n2 - 1
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    f_value = var1 / var2
    p_value = f.cdf(f_value, df1, df2)
    return f_value, p_value


In [4]:
import numpy as np

data1 = np.array([2, 3, 4, 5, 6])
data2 = np.array([3, 4, 5, 6, 7, 8])

f_stat, p_value = variance_ratio_test(data1, data2)

print("F-value:", f_stat)
print("p-value:", p_value)


F-value: 0.7142857142857143
p-value: 0.383277210940066


In [5]:
# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an 
# F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [6]:
from scipy.stats import f

def critical_f_value(dfn, dfd, alpha=0.05):
    return f.ppf(alpha / 2, dfn, dfd), f.ppf(1 - alpha / 2, dfn, dfd)


In [7]:
critical_f_value(3, 16)

(0.07026656300420833, 4.07682306196248)

In [8]:
# This means that if the calculated F-value falls below 0.0702 or above 4.077,
# we would reject the null hypothesis at the 0.05 level of significance for a two-tailed test.

In [9]:
# Q3. Write a Python program that generates random samples from two normal distributions with known 
# variances and uses an F-test to determine if the variances are equal. The program should output the F value, 
# degrees of freedom, and p-value for the test.

In [10]:
import numpy as np
from scipy.stats import f

# Set the random seed for reproducibility
np.random.seed(42)

# Generate random samples from two normal distributions with known variances
n = 20
m = 25
var_x = 3
var_y = 5
x = np.random.normal(loc=0, scale=np.sqrt(var_x), size=n)
y = np.random.normal(loc=0, scale=np.sqrt(var_y), size=m)

# Calculate the F-value, degrees of freedom, and p-value for the variance ratio test
dfn = n - 1
dfd = m - 1
f_stat = var_x / var_y if var_x >= var_y else var_y / var_x
p_value = 1 - f.cdf(f_stat, dfn, dfd)

# Output the results
print("Sample size of x:", n)
print("Sample size of y:", m)
print("Variance of x:", var_x)
print("Variance of y:", var_y)
print("F-value:", f_stat)
print("Degrees of freedom (numerator, denominator):", dfn, dfd)
print("p-value:", p_value)


Sample size of x: 20
Sample size of y: 25
Variance of x: 3
Variance of y: 5
F-value: 1.6666666666666667
Degrees of freedom (numerator, denominator): 19 24
p-value: 0.11804857371993138


In [11]:
# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from 
# each population. Conduct an F-test at the 5% significance level to determine if the variances are 
# significantly different.

In [12]:
# To conduct an F-test to determine if the variances of two populations are significantly different at a 5% significance level, 
# we need to follow the steps:

# State the null and alternative hypotheses:

# H0: The variances of the two populations are equal (σ1^2 = σ2^2).
# Ha: The variances of the two populations are significantly different (σ1^2 ≠ σ2^2).
# Determine the level of significance α. In this case, α = 0.05.

# Calculate the F-statistic using the formula:

# F = S1^2 / S2^2

# where S1^2 and S2^2 are the sample variances of the two populations.

# Determine the degrees of freedom for the numerator and denominator of the F-statistic. For two populations with sample sizes n1 and n2, 
# the degrees of freedom are df1 = n1 - 1 and df2 = n2 - 1, respectively.

# Calculate the p-value associated with the F-statistic using a cumulative distribution function (CDF) of the F-distribution 
# with df1 and df2 degrees of freedom.

# Compare the p-value to the level of significance. If the p-value is less than the level of significance (p-value < α),
# reject the null hypothesis and conclude that the variances of the two populations are significantly different. Otherwise, 
# fail to reject the null hypothesis.

## ******************************
# Note:- Generally, if the p-value is less than the significance level (usually set at 0.05), 
#        the null hypothesis is rejected, and the alternative hypothesis is accepted. Conversely, 
#       if the p-value is greater than the significance level, the null hypothesis is not rejected,
#       and the alternative hypothesis is not accepted.

## XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#Note:- Generally, if the F-statics (which we calculate var.1/var.2) is less than the F-value (which we calculate from F distribution table), 
#        we fail to reject  null hypothesis , and the alternative hypothesis is rejected. Conversely, 
#       if the F-statics is greater than the F-value, the null hypothesis is rejected,
#       and the alternative hypothesis is  accepted.

## ******************************
# Now let's apply these steps to the problem:

# The null and alternative hypotheses are:

# H0: σ1^2 = σ2^2

# Ha: σ1^2 ≠ σ2^2

# The level of significance is α = 0.05.

# The F-statistic is:

# F = S1^2 / S2^2

# We don't have the sample data, but we can use the known population variances to estimate the pooled variance:

# Sp^2 = [(n1 - 1)S1^2 + (n2 - 1)S2^2] / (n1 + n2 - 2)

# Substituting the values, we get:

# Sp^2 = [(12 - 1)10 + (12 - 1)15] / (12 + 12 - 2) = 12.5

# Then, the F-statistic is:

# F = S1^2 / S2^2 = 10 / 15 = 0.6667 ~= (p value = 0.79) 

# The degrees of freedom for the numerator and denominator are df1 = 11 and df2 = 11, respectively.

# We can use the cumulative distribution function (CDF) of the F-distribution with df1 and df2 degrees of freedom to find 
# the p-value associated with the F-statistic. Using a statistical software or a F-distribution table, we find that 
# the p-value is approximately 0.79.

# Finally, we compare the p-value to the level of significance. Since the p-value is greater than the level of significance (p-value > α), 
# we fail to reject the null hypothesis. Therefore, we conclude that there is not enough evidence to suggest that the variances of the two 
# populations are significantly different at the 5% significance level.

In [13]:
# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 
# products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance 
# level to determine if the claim is justified.

In [14]:
# To test if the variance of the diameter of a certain product is equal to a claimed value, we can use an F-test. 
# The null and alternative hypotheses for the F-test are as follows:

# H0: σ^2 = σ0^2 (the variance is equal to the claimed value)
# Ha: σ^2 ≠ σ0^2 (the variance is significantly different from the claimed value)
# where σ^2 is the population variance and σ0^2 is the claimed variance.

# Given that the manufacturer claims that the variance is 0.005 and a sample of 25 products is taken with a sample variance of 0.006, 
# we can calculate the F-statistic as:

# F = S^2 / σ0^2

# where S^2 is the sample variance. Substituting the values, we get:

# F = 0.006 / 0.005 = 1.2

# The degrees of freedom for the numerator and denominator are df1 = 24 and df2 = ∞ (since we are assuming a known population variance).

# Using a statistical software or an F-distribution table, we can find the p-value associated with the F-statistic. At the 1% significance level, 
# the critical F-value for df1 = 24 and df2 = ∞ is approximately 1.38318.

# Since the observed F-statistic (F = 1.2) is less than the critical F-value (1.38318), we fail to reject the null hypothesis. 
# Therefore, we conclude that there is not enough evidence to suggest that the claimed variance is significantly different 
# from the sample variance at the 1% significance level.

In [15]:
# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an 
# F-distribution and calculates the mean and variance of the distribution. The function should return the 
# mean and variance as a tuple.

In [16]:
def f_distribution_mean_and_variance(df1, df2):
    if df1 <= 0 or df2 <= 0:
        raise ValueError("Degrees of freedom must be greater than zero")
    
    mean = df2 / (df2 - 2)
    variance = (2 * (df2**2) * (df1 + df2 - 2)) / (df1 * (df2 - 2)**2 * (df2 - 4))
    
    return mean, variance


In [17]:
# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The 
# sample variance is found to be 25. Another random sample of 15 measurements is taken from another 
# normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test 
# at the 10% significance level to determine if the variances are significantly different.

In [18]:
# To determine if the variances of two normal populations are significantly different, we can use an F-test. 
# The null hypothesis is that the variances are equal, while the alternative hypothesis is that they are not equal.

# In this case, we have a sample of 10 measurements with a sample variance of 25, and another sample of 15 measurements 
# with a sample variance of 20.

# We can calculate the test statistic as the ratio of the two sample variances:

# F = s1^2 / s2^2

# where s1^2 and s2^2 are the sample variances of the first and second samples, respectively.

# We can then compare this test statistic to the F-distribution with degrees of freedom equal to n1 - 1 and n2 - 1,
# where n1 and n2 are the sample sizes of the two populations.

# At a 10% significance level, and with 9 and 14 degrees of freedom respectively, the critical value of the F-distribution is:

# f_critical = 2.12195

# If the calculated F statistic is greater than the critical value, we reject the null hypothesis and conclude that 
# the variances are significantly different. Otherwise, we fail to reject the null hypothesis and conclude that 
# the variances are not significantly different.

# Let's calculate the F statistic for the given samples:

# F = 25 / 20 = 1.25

# Since F = 1.25 < f_critical = 2.12195, we fail to reject the null hypothesis and conclude 
# that the variances are not significantly different at the 10% significance level.

# Therefore, we can say that there is not enough evidence to suggest that the variances of the two populations are significantly different.

In [19]:
# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday 
# night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% 
# significance level to determine if the variances are significantly different.

In [20]:
# To determine if the variances of two populations are significantly different, we can use an F-test. The null hypothesis is that the variances are equal, 
# while the alternative hypothesis is that they are not equal.

# In this case, we have waiting time data for two different restaurants on a Saturday night:

# Restaurant A: 24, 25, 28, 23, 22, 20, 27
# Restaurant B: 31, 33, 35, 30, 32, 36

# We can calculate the sample variances for each restaurant, and then calculate the F-statistic as the ratio of the larger sample variance to 
# the smaller sample variance:

# s1^2 = 10.81
# s2^2 = 5.6
# F = s1^2 / s2^2 = 1.932

# Next, we can compare this F-statistic to the F-distribution with degrees of freedom equal to n1-1 and n2-1, where n1 and n2 are 
# the sample sizes of the two populations.

# We have n1 = 7 and n2 = 6, so the degrees of freedom are 6 and 5 respectively.

# At a 5% significance level, the critical value of the F-distribution with 6 and 5 degrees of freedom is:

# f_critical = 4.2067

# Since F = 1.932 < f_critical = 4.2067, we fail to reject the null hypothesis and conclude that the variances are not significantly different at 
# the 5% significance level.

# Therefore, we can say that there is not enough evidence to suggest that the variances of the waiting times at the two restaurants are significantly different.

In [21]:
# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; 
# Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances 
# are significantly different.

In [None]:
# To determine if the variances of two populations are significantly different, we can use an F-test. The null hypothesis is that the variances are equal, 
# while the alternative hypothesis is that they are not equal.

# In this case, we have test score data for two groups of students:

# Group A: 80, 85, 90, 92, 87, 83
# Group B: 75, 78, 82, 79, 81, 84

# We can calculate the sample variances for each group, and then calculate the F-statistic as the ratio of the larger sample variance to the smaller sample variance:

# s1^2 = 23.2
# s2^2 = 6.8
# F = s1^2 / s2^2 = 3.412

# Next, we can compare this F-statistic to the F-distribution with degrees of freedom equal to n1-1 and n2-1, where n1 and n2 are the sample sizes of 
# the two populations.

# We have n1 = 6 and n2 = 6, so the degrees of freedom are 5 and 5 respectively.

# At a 1% significance level, the critical value of the F-distribution with 5 and 5 degrees of freedom is:

# f_critical = 4.2839

# Since F = 3.412 < f_critical = 4.2839, we fail to reject the null hypothesis and conclude that the variances are not significantly different at 
# the 1% significance level.

# Therefore, we can say that there is not enough evidence to suggest that the variances of the test scores of the two groups of students are significantly different.