Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
from scipy.stats import f

def variance_ratio_test(data1, data2):
    """
    Calculate the F-value for a variance ratio test between two arrays of data.
    
    Parameters:
        data1 (array-like): First array of data.
        data2 (array-like): Second array of data.
        
    Returns:
        f_value (float): F-value for the variance ratio test.
        p_value (float): Corresponding p-value for the test.
    """
    # Calculate the sample variances
    var1 = np.var(data1, ddof=1)  # Use ddof=1 for unbiased estimation of sample variance
    var2 = np.var(data2, ddof=1)
    
    # Calculate the F-value
    f_value = var1 / var2
    
    # Calculate the degrees of freedom
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    
    # Calculate the p-value
    p_value = f.cdf(f_value, df1, df2)
    
    return f_value, p_value

# Example usage:
data1 = [10, 12, 15, 14, 11]
data2 = [8, 9, 11, 10, 12]
f_value, p_value = variance_ratio_test(data1, data2)
print("F-value:", f_value)
print("p-value:", p_value)

NameError: name 'np' is not defined

Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [None]:
from scipy.stats import f

def critical_f_value(significance_level, dfn, dfd):
    """
    Calculate the critical F-value for a two-tailed test given a significance level
    and degrees of freedom for the numerator and denominator of an F-distribution.
    
    Parameters:
        significance_level (float): Significance level (alpha).
        dfn (int): Degrees of freedom for the numerator.
        dfd (int): Degrees of freedom for the denominator.
        
    Returns:
        crit_f_value (float): Critical F-value.
    """
    crit_f_value = f.ppf(1 - significance_level / 2, dfn, dfd)
    return crit_f_value

# Example usage:
significance_level = 0.05
dfn = 3  # Degrees of freedom for the numerator
dfd = 20  # Degrees of freedom for the denominator
crit_f = critical_f_value(significance_level, dfn, dfd)
print("Critical F-value:", crit_f)

Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F-value, degrees of freedom, and p-value for the test.

In [None]:
import numpy as np
from scipy.stats import f

def variance_ratio_test(data1, data2):
    """
    Perform an F-test to determine if the variances of two datasets are equal.
    
    Parameters:
        data1 (array-like): First dataset.
        data2 (array-like): Second dataset.
        
    Returns:
        f_value (float): F-value for the variance ratio test.
        df1 (int): Degrees of freedom for the numerator.
        df2 (int): Degrees of freedom for the denominator.
        p_value (float): Corresponding p-value for the test.
    """
    # Calculate sample variances
    var1 = np.var(data1, ddof=1)  # Use ddof=1 for unbiased estimation of sample variance
    var2 = np.var(data2, ddof=1)
    
    # Calculate degrees of freedom
    n1 = len(data1)
    n2 = len(data2)
    df1 = n1 - 1
    df2 = n2 - 1
    
    # Calculate F-value
    f_value = var1 / var2 if var1 > var2 else var2 / var1
    
    # Calculate p-value
    p_value = f.cdf(f_value, df1, df2)
    
    return f_value, df1, df2, p_value

# Generate random samples from two normal distributions
np.random.seed(0)  # Set random seed for reproducibility
data1 = np.random.normal(loc=0, scale=1, size=30)  # Sample from N(0, 1)
data2 = np.random.normal(loc=0, scale=1.5, size=30)  # Sample from N(0, 1.5)

# Perform F-test
f_value, df1, df2, p_value = variance_ratio_test(data1, data2)

# Output results
print("Results of F-test for equality of variances:")
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [None]:
from scipy.stats import f

# Known variances
variance1 = 10
variance2 = 15

# Sample sizes
n1 = 12
n2 = 12

# Calculate the F-value
f_value = variance1 / variance2

# Calculate degrees of freedom
df1 = n1 - 1
df2 = n2 - 1

# Calculate the p-value
p_value = f.cdf(f_value, df1, df2)

# Significance level
alpha = 0.05

# Critical F-value
crit_f_value = f.ppf(1 - alpha / 2, df1, df2)

# Output results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)
print("Critical F-value:", crit_f_value)

# Compare F-value to critical F-value
if f_value > crit_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the variances are significantly different.")

Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [None]:
from scipy.stats import f

# Claimed variance
claimed_variance = 0.005

# Sample variance
sample_variance = 0.006

# Sample size
n = 25

# Calculate the F-value
f_value = sample_variance / claimed_variance

# Calculate degrees of freedom
df1 = n - 1
df2 = 1  # Degrees of freedom for the denominator

# Calculate the p-value
p_value = f.cdf(f_value, df1, df2)

# Significance level
alpha = 0.01

# Critical F-value
crit_f_value = f.ppf(1 - alpha, df1, df2)

# Output results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)
print("Critical F-value:", crit_f_value)

# Compare F-value to critical F-value
if f_value > crit_f_value:
    print("Reject the null hypothesis. The claimed variance is not justified.")
else:
    print("Fail to reject the null hypothesis. The claimed variance is justified.")

Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

In [None]:
def f_distribution_mean_and_variance(dfn, dfd):
    """
    Calculate the mean and variance of an F-distribution given the degrees of freedom
    for the numerator and denominator.
    
    Parameters:
        dfn (int): Degrees of freedom for the numerator.
        dfd (int): Degrees of freedom for the denominator.
        
    Returns:
        mean (float): Mean of the F-distribution.
        variance (float): Variance of the F-distribution.
    """
    # Mean of F-distribution
    mean = dfd / (dfd - 2) if dfd > 2 else float('inf')  # Mean is undefined for dfd <= 2
    
    # Variance of F-distribution
    if dfd > 4:
        variance = (2 * dfd ** 2 * (dfn + dfd - 2)) / (dfn * (dfd - 2) ** 2 * (dfd - 4))
    else:
        variance = float('inf')  # Variance is undefined for dfd <= 4
    
    return mean, variance

# Example usage:
dfn = 5  # Degrees of freedom for the numerator
dfd = 10  # Degrees of freedom for the denominator
mean, variance = f_distribution_mean_and_variance(dfn, dfd)
print("Mean of F-distribution:", mean)
print("Variance of F-distribution:", variance)

Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [None]:
from scipy.stats import f

# Sample variances
sample_variance1 = 25
sample_variance2 = 20

# Sample sizes
n1 = 10
n2 = 15

# Calculate the F-value
f_value = sample_variance1 / sample_variance2

# Calculate degrees of freedom
df1 = n1 - 1
df2 = n2 - 1

# Calculate the p-value
p_value = f.cdf(f_value, df1, df2)

# Significance level
alpha = 0.10

# Critical F-value
crit_f_value = f.ppf(1 - alpha, df1, df2)

# Output results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)
print("Critical F-value:", crit_f_value)

# Compare F-value to critical F-value
if f_value > crit_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the variances are significantly different.")

Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [None]:
from scipy.stats import f
import numpy as np

# Waiting times data
waiting_times_restaurant_A = np.array([24, 25, 28, 23, 22, 20, 27])
waiting_times_restaurant_B = np.array([31, 33, 35, 30, 32, 36])

# Sample variances
sample_variance_A = np.var(waiting_times_restaurant_A, ddof=1)
sample_variance_B = np.var(waiting_times_restaurant_B, ddof=1)

# Sample sizes
n_A = len(waiting_times_restaurant_A)
n_B = len(waiting_times_restaurant_B)

# Calculate the F-value
f_value = sample_variance_A / sample_variance_B

# Calculate degrees of freedom
df1 = n_A - 1
df2 = n_B - 1

# Calculate the p-value
p_value = f.cdf(f_value, df1, df2)

# Significance level
alpha = 0.05

# Critical F-value
crit_f_value = f.ppf(1 - alpha, df1, df2)

# Output results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)
print("Critical F-value:", crit_f_value)

# Compare F-value to critical F-value
if f_value > crit_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the variances are significantly different.")

Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [None]:
from scipy.stats import f
import numpy as np

# Test scores data
test_scores_group_A = np.array([80, 85, 90, 92, 87, 83])
test_scores_group_B = np.array([75, 78, 82, 79, 81, 84])

# Sample variances
sample_variance_A = np.var(test_scores_group_A, ddof=1)
sample_variance_B = np.var(test_scores_group_B, ddof=1)

# Sample sizes
n_A = len(test_scores_group_A)
n_B = len(test_scores_group_B)

# Calculate the F-value
f_value = sample_variance_A / sample_variance_B

# Calculate degrees of freedom
df1 = n_A - 1
df2 = n_B - 1

# Calculate the p-value
p_value = f.cdf(f_value, df1, df2)

# Significance level
alpha = 0.01

# Critical F-value
crit_f_value = f.ppf(1 - alpha, df1, df2)

# Output results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)
print("Critical F-value:", crit_f_value)

# Compare F-value to critical F-value
if f_value > crit_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the variances are significantly different.")