In [None]:
#Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
# test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
def variance_ratio_test(data1, data2):
  """Calculates the F-value and p-value for a variance ratio test.

  Args:
    data1: An array of data.
    data2: An array of data.

  Returns:
    A tuple of the F-value and p-value.
  """

  n1 = len(data1)
  n2 = len(data2)
  var1 = np.var(data1)
  var2 = np.var(data2)
  f_value = (var1 / var2) * (n1 / n2)
  p_value = 1 - stats.f.cdf(f_value, n1 - 1, n2 - 1)
  return f_value, p_value

In [None]:
#Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
#F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [2]:
def critical_f_value(significance_level, df_num, df_denom):
  """Returns the critical F-value for a two-tailed test.

  Args:
    significance_level: The significance level of the test.
    df_num: The degrees of freedom for the numerator of the F-distribution.
    df_denom: The degrees of freedom for the denominator of the F-distribution.

  Returns:
    The critical F-value.
  """

  return stats.f.ppf(1 - significance_level / 2, df_num, df_denom)


In [4]:
#Q3. Write a Python program that generates random samples from two normal distributions with known
#variances and uses an F-test to determine if the variances are equal. The program should output the Fvalue, degrees of freedom, and p-value for the test.

In [5]:
import numpy as np
import scipy.stats as stats

def main():
  """Generates random samples from two normal distributions with known variances and uses an F-test
  to determine if the variances are equal. The program outputs the F-value, degrees of freedom, and
  p-value for the test."""

  # Set the significance level.
  significance_level = 0.05

  # Set the number of samples to generate.
  n_samples = 100

  # Set the mean and variance of the first normal distribution.
  mean1 = 0
  var1 = 10

  # Set the mean and variance of the second normal distribution.
  mean2 = 0
  var2 = 20

  # Generate the random samples.
  data1 = np.random.normal(mean1, var1, n_samples)
  data2 = np.random.normal(mean2, var2, n_samples)

  # Calculate the F-value and p-value for the test.
  f_value, p_value = variance_ratio_test(data1, data2)

  # Print the results of the test.
  print("F-value:", f_value)
  print("Degrees of freedom:", n_samples - 1, ",", n_samples - 1)
  print("p-value:", p_value)

if __name__ == "__main__":
  main()


F-value: 0.20898987517179957
Degrees of freedom: 99 , 99
p-value: 0.9999999999999413


In [None]:
#Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
#each population. Conduct an F-test at the 5% significance level to determine if the variances are
#significantly different.

In [6]:
import numpy as np
import scipy.stats as stats

def f_test(var1, var2, n1, n2):
  """Calculates the F-value and p-value for an F-test.

  Args:
    var1: The variance of population 1.
    var2: The variance of population 2.
    n1: The number of observations in population 1.
    n2: The number of observations in population 2.

  Returns:
    A tuple of the F-value and p-value.
  """

  f_value = (var1 / var2) * (n1 / n2)
  p_value = stats.f.cdf(f_value, n1 - 1, n2 - 1)
  return f_value, p_value

if __name__ == "__main__":
  var1 = 10
  var2 = 15
  n1 = 12
  n2 = 12
  f_value, p_value = f_test(var1, var2, n1, n2)
  print("F-value:", f_value)
  print("p-value:", p_value)


F-value: 0.6666666666666666
p-value: 0.2561948993678998


In [7]:
# We can conclude that the variances of the two populations are significantly different.

In [None]:
#Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
#products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
#level to determine if the claim is justified.

In [12]:
import numpy as np
import scipy.stats as stats

def f_test(var_claimed, var_sample, n_samples):
  """Calculates the F-value and p-value for an F-test.

  Args:
    var_claimed: The variance claimed by the manufacturer.
    var_sample: The sample variance.
    n_samples: The number of samples.

  Returns:
    A tuple of the F-value and p-value.
  """

  f_value = (var_claimed / var_sample) * (n_samples / 2)
  p_value = stats.f.cdf(f_value, n_samples - 1, n_samples - 1)
  return f_value, p_value

if __name__ == "__main__":
  var_claimed = 0.005
  var_sample = 0.006
  n_samples = 25
  f_value, p_value = f_test(var_claimed, var_sample, n_samples)
  print("F-value:", f_value)
  print("p-value:", p_value)



F-value: 10.416666666666668
p-value: 0.9999998906299596


In [13]:
# Since the p-value is less than the significance level of 0.01, we can reject the null hypothesis of equal variances. We can conclude that the manufacturer's claim is not justified. 
# The variance of the diameter of the product is significantly different from 0.005.

In [14]:
#Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
#F-distribution and calculates the mean and variance of the distribution. The function should return the
#mean and variance as a tuple.

In [15]:
import numpy as np

def f_distribution_mean_variance(df_num, df_denom):
  """Calculates the mean and variance of an F-distribution.

  Args:
    df_num: The degrees of freedom for the numerator of the F-distribution.
    df_denom: The degrees of freedom for the denominator of the F-distribution.

  Returns:
    A tuple of the mean and variance.
  """

  mean = (df_denom / (df_denom - 2))
  variance = (2 * df_denom * df_num) / ((df_num + df_denom - 2) * (df_num + df_denom - 4))
  return mean, variance

if __name__ == "__main__":
  df_num = 10
  df_denom = 15
  mean, variance = f_distribution_mean_variance(df_num, df_denom)
  print("Mean:", mean)
  print("Variance:", variance)


Mean: 1.1538461538461537
Variance: 0.6211180124223602


In [None]:
#Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
#sample variance is found to be 25. Another random sample of 15 measurements is taken from another
#normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
#at the 10% significance level to determine if the variances are significantly different.

In [16]:
import numpy as np
import scipy.stats as stats

def f_test(sample_variance_1, sample_variance_2, n_samples_1, n_samples_2):
  """Calculates the F-value and p-value for an F-test.

  Args:
    sample_variance_1: The sample variance of population 1.
    sample_variance_2: The sample variance of population 2.
    n_samples_1: The number of samples in population 1.
    n_samples_2: The number of samples in population 2.

  Returns:
    A tuple of the F-value and p-value.
  """

  f_value = (sample_variance_1 / sample_variance_2) * (n_samples_1 / n_samples_2)
  p_value = stats.f.cdf(f_value, n_samples_1 - 1, n_samples_2 - 1)
  return f_value, p_value

if __name__ == "__main__":
  sample_variance_1 = 25
  sample_variance_2 = 20
  n_samples_1 = 10
  n_samples_2 = 15
  f_value, p_value = f_test(sample_variance_1, sample_variance_2, n_samples_1, n_samples_2)
  print("F-value:", f_value)
  print("p-value:", p_value)


F-value: 0.8333333333333333
p-value: 0.4017845736613887


In [None]:
#Since the p-value is greater than the significance level of 10%, we cannot reject the null hypothesis of equal variances.
#We cannot conclude that the variances of the two populations are significantly different.

In [None]:
#Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
#night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
#significance level to determine if the variances are significantly different.

In [17]:
import numpy as np
import scipy.stats as stats

restaurant_a_waiting_times = [24, 25, 28, 23, 22, 20, 27]
restaurant_b_waiting_times = [31, 33, 35, 30, 32, 36]

sample_variance_a = np.var(restaurant_a_waiting_times)
sample_variance_b = np.var(restaurant_b_waiting_times)

f_value = (sample_variance_a / sample_variance_b)
p_value = stats.f.cdf(f_value, 6, 6)

print("F-value:", f_value)
print("p-value:", p_value)


F-value: 1.496767651159843
p-value: 0.6816647776781659


In [None]:
# The F-value of 1.496767651159843 is not significant at the 5% level, so we cannot reject the null hypothesis of equal variances. 
# We cannot conclude that the variances of the two populations are significantly different.

In [None]:
#Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
#Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
# are significantly different.

In [18]:
import numpy as np
import scipy.stats as stats

group_a_test_scores = [80, 85, 90, 92, 87, 83]
group_b_test_scores = [75, 78, 82, 79, 81, 84]

sample_variance_a = np.var(group_a_test_scores)
sample_variance_b = np.var(group_b_test_scores)

f_value = (sample_variance_a / sample_variance_b)
p_value = stats.f.cdf(1.82, 5, 5)

print("F-value:", f_value)
print("p-value:", p_value)


F-value: 1.9442622950819677
p-value: 0.7365207851749804


In [None]:
# The F-value of 1.9442622950819677 is not significant at the 1% level, so we cannot reject the null hypothesis of equal variances. 
# We cannot conclude that the variances of the two populations are significantly different.