In [1]:
# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
# test. The function should return the F-value and the corresponding p-value for the test.

import numpy as np
from scipy import stats

def variance_ratio_test(data1, data2):
    # Calculate the variance of each data set
    var1 = np.var(data1)
    var2 = np.var(data2)

    # Calculate the F-value and p-value
    f_value = var1/var2
    p_value = stats.f.sf(f_value, len(data1)-1, len(data2)-1)

    return f_value, p_value


In [2]:
#You can call this function by passing in two arrays of data, like this:

data1 = [1, 2, 3, 4, 5]
data2 = [2, 4, 6, 8, 10]

f_value, p_value = variance_ratio_test(data1, data2)

print("F-value:", f_value)
print("p-value:", p_value)


F-value: 0.25
p-value: 0.896


In [3]:
# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
# F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

from scipy import stats

def critical_f_value(df_num, df_denom, alpha=0.05):
    # Calculate the critical F-value
    f_crit = stats.f.isf(alpha/2, df_num, df_denom)

    return f_crit


In [4]:
# You can call this function by passing in the degrees of freedom for the numerator and denominator of an F-distribution, like this:

df_num = 3
df_denom = 10
alpha = 0.05

f_crit = critical_f_value(df_num, df_denom, alpha)

print("Critical F-value:", f_crit)


Critical F-value: 4.825621493405406


In [None]:
# Q3. Write a Python program that generates random samples from two normal distributions with known
# variances and uses an F-test to determine if the variances are equal. The program should output the F-
# value, degrees of freedom, and p-value for the test.

import numpy as np
from scipy import stats

# Generate random samples from two normal distributions with known variances
n1 = 50
n2 = 60
mean1 = 10
mean2 = 10
var1 = 4
var2 = 3
data1 = np.random.normal(mean1, np.sqrt(var1), n1)
data2 = np.random.normal(mean2, np.sqrt(var2), n2)

# Perform F-test for equal variances
f_value, p_value = stats.ftest(data1, data2)

# Calculate the degrees of freedom for the F-distribution
df_num = len(data1) - 1
df_denom = len(data2) - 1

# Print results
print("F-value:", f_value)
print("Degrees of freedom (numerator, denominator):", df_num, df_denom)
print("p-value:", p_value)


# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
# each population. Conduct an F-test at the 5% significance level to determine if the variances are
# significantly different.


# to conduct an F-test at the 5% significance level to determine if the variances of two populations with known variances 10 and 15 are significantly different, we can follow these steps:


1. Set up the null and alternative hypotheses:

Null hypothesis: The variances of the two populations are equal (σ1^2 = σ2^2)

Alternative hypothesis: The variances of the two populations are not equal (σ1^2 ≠ σ2^2)

2. Determine the significance level (α) and degrees of freedom for the F-distribution:

α = 0.05

Degrees of freedom (numerator, denominator) = (11, 11) (since we have 12 observations in each sample and we are testing for a difference in variances)

3. Calculate the F-value using the formula:

F = S1^2 / S2^2

where S1^2 and S2^2 are the sample variances of the two populations.

4. Find the critical F-value from an F-distribution table or using a calculator with the degrees of freedom and the significance level.

5. Compare the calculated F-value to the critical F-value:

If the calculated F-value is greater than the critical F-value, reject the null hypothesis and conclude that the variances are significantly different. Otherwise, fail to reject the null hypothesis and conclude that there is not enough evidence to support the claim that the variances are different.

In [6]:
# let's implement these steps in Python:

import numpy as np
from scipy import stats

# Set up the null and alternative hypotheses
# Null hypothesis: The variances of the two populations are equal
# Alternative hypothesis: The variances of the two populations are not equal
null_hypothesis = "The variances of the two populations are equal"
alternative_hypothesis = "The variances of the two populations are not equal"

# Set the significance level
alpha = 0.05

# Set the known variances and sample sizes
var1 = 10
var2 = 15
n1 = 12
n2 = 12

# Generate random samples from the two populations
pop1 = np.random.normal(0, np.sqrt(var1), n1)
pop2 = np.random.normal(0, np.sqrt(var2), n2)

# Calculate the sample variances
s1 = np.var(pop1, ddof=1)
s2 = np.var(pop2, ddof=1)

# Calculate the F-value
f_value = s1 / s2

# Calculate the critical F-value
f_crit = stats.f.ppf(1 - alpha/2, n1-1, n2-1)

# Determine if we reject or fail to reject the null hypothesis
if f_value > f_crit or f_value < 1/f_crit:
    print("Reject the null hypothesis.")
    print(alternative_hypothesis)
else:
    print("Fail to reject the null hypothesis.")
    print(null_hypothesis)

# Print the calculated F-value and critical F-value
print("F-value:", f_value)
print("Critical F-value:", f_crit)


Fail to reject the null hypothesis.
The variances of the two populations are equal
F-value: 0.788852867090113
Critical F-value: 3.473699051085809


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
# products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
# level to determine if the claim is justified.

# to conduct an F-test at the 1% significance level to determine if the claim that the variance of the diameter of a certain product is 0.005 is justified, we can follow these steps:

1. Set up the null and alternative hypotheses:

Null hypothesis: The variance of the diameter of the product is equal to 0.005 (σ^2 = 0.005)

Alternative hypothesis: The variance of the diameter of the product is not equal to 0.005 (σ^2 ≠ 0.005)

2. Determine the significance level (α) and degrees of freedom for the F-distribution:

α = 0.01

Degrees of freedom (numerator, denominator) = (24, 24) (since we have 25 observations in the sample and we are testing for a difference in variances)

3. Calculate the F-value using the formula:

F = S^2 / σ^2

where S^2 is the sample variance and σ^2 is the claimed variance.

4. Find the critical F-value from an F-distribution table or using a calculator with the degrees of freedom and the significance level.

5. Compare the calculated F-value to the critical F-value:

If the calculated F-value is greater than the critical F-value or less than 1 over the critical F-value, reject the null hypothesis and conclude that the claim is not justified. Otherwise, fail to reject the null hypothesis and conclude that there is not enough evidence to support the claim.

In [7]:
# let's implement these steps in Python:

import numpy as np
from scipy import stats

# Set up the null and alternative hypotheses
# Null hypothesis: The variance of the diameter of the product is equal to 0.005
# Alternative hypothesis: The variance of the diameter of the product is not equal to 0.005
null_hypothesis = "The variance of the diameter of the product is equal to 0.005"
alternative_hypothesis = "The variance of the diameter of the product is not equal to 0.005"

# Set the significance level
alpha = 0.01

# Set the claimed variance and sample size
var_claimed = 0.005
n = 25

# Generate a random sample from the population
pop = np.random.normal(0, np.sqrt(var_claimed), n)

# Calculate the sample variance
s_squared = np.var(pop, ddof=1)

# Calculate the F-value
f_value = s_squared / var_claimed

# Calculate the critical F-value
f_crit = stats.f.ppf(1 - alpha/2, n-1, n-1)

# Determine if we reject or fail to reject the null hypothesis
if f_value > f_crit or f_value < 1/f_crit:
    print("Reject the null hypothesis.")
    print(alternative_hypothesis)
else:
    print("Fail to reject the null hypothesis.")
    print(null_hypothesis)

# Print the calculated F-value and critical F-value
print("F-value:", f_value)
print("Critical F-value:", f_crit)


Fail to reject the null hypothesis.
The variance of the diameter of the product is equal to 0.005
F-value: 0.9058914727072327
Critical F-value: 2.966741631292762


In [8]:
# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
# F-distribution and calculates the mean and variance of the distribution. The function should return the
# mean and variance as a tuple.

def f_distribution_mean_and_variance(df_num, df_den):
    """
    Calculates the mean and variance of an F-distribution given the degrees of freedom
    for the numerator and denominator.

    Parameters:
    df_num (int): Degrees of freedom for the numerator.
    df_den (int): Degrees of freedom for the denominator.

    Returns:
    (float, float): A tuple containing the mean and variance of the F-distribution.
    """
    if df_num <= 0 or df_den <= 0:
        raise ValueError("Degrees of freedom must be positive integers.")
    
    mean = df_den / (df_den - 2)
    variance = (2 * (df_den ** 2) * (df_num + df_den - 2)) / \
               (df_num * (df_den - 2) ** 2 * (df_den - 4))
    
    return (mean, variance)


# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
# sample variance is found to be 25. Another random sample of 15 measurements is taken from another
# normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
# at the 10% significance level to determine if the variances are significantly different.

+ To conduct an F-test to determine if the variances of two populations are significantly different, we use the following hypotheses:

H0: σ1^2 = σ2^2 (the variances are equal)
Ha: σ1^2 ≠ σ2^2 (the variances are not equal)

We will use an alpha level of 0.10 for this test.

The test statistic for the F-test is given by:

F = s1^2 / s2^2

+ where s1^2 is the sample variance for the first sample, and s2^2 is the sample variance for the second sample. The degrees of freedom for the F-distribution are df1 = n1 - 1 and df2 = n2 - 1, where n1 and n2 are the sample sizes.

+ To calculate the p-value for the test, we can use an F-distribution calculator or a statistical software package.

In [9]:
# the Python code to conduct the F-test:
import scipy.stats as stats

# sample statistics
n1 = 10
s1_squared = 25
n2 = 15
s2_squared = 20

# calculate the F-statistic and p-value
f_stat = s1_squared / s2_squared
df1 = n1 - 1
df2 = n2 - 1
p_val = 2 * (1 - stats.f.cdf(f_stat, df1, df2))

# significance level
alpha = 0.10

# make decision based on p-value and significance level
if p_val < alpha:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. The variances are not significantly different.")


Fail to reject the null hypothesis. The variances are not significantly different.


# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
# night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
# significance level to determine if the variances are significantly different.

+ To conduct an F-test to determine if the variances of two populations are significantly different, we use the following hypotheses:

H0: σ1^2 = σ2^2 (the variances are equal)
Ha: σ1^2 ≠ σ2^2 (the variances are not equal)

We will use an alpha level of 0.05 for this test.

+ The test statistic for the F-test is given by:

F = s1^2 / s2^2

+ where s1^2 is the sample variance for the first sample, and s2^2 is the sample variance for the second sample. The degrees of freedom for the F-distribution are df1 = n1 - 1 and df2 = n2 - 1, where n1 and n2 are the sample sizes.

+ To calculate the p-value for the test, we can use an F-distribution calculator or a statistical software package.

In [10]:
#  the Python code to conduct the F-test:

import scipy.stats as stats
import numpy as np

# sample data
restaurant_a = np.array([24, 25, 28, 23, 22, 20, 27])
restaurant_b = np.array([31, 33, 35, 30, 32, 36])

# sample statistics
n1 = len(restaurant_a)
s1_squared = np.var(restaurant_a, ddof=1)
n2 = len(restaurant_b)
s2_squared = np.var(restaurant_b, ddof=1)

# calculate the F-statistic and p-value
f_stat = s1_squared / s2_squared
df1 = n1 - 1
df2 = n2 - 1
p_val = 2 * (1 - stats.f.cdf(f_stat, df1, df2))

# significance level
alpha = 0.05

# make decision based on p-value and significance level
if p_val < alpha:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. The variances are not significantly different.")


Fail to reject the null hypothesis. The variances are not significantly different.


# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
# Group B: 75, 78, 82, 79, 81, 84. Conduct

+ To compare the means of two populations, we can use a two-sample t-test. We'll assume that the populations are normally distributed and that the variances are equal. We'll use an alpha level of 0.05 for this test.

+ The null hypothesis for the two-sample t-test is that the population means are equal:

H0: μ1 = μ2

+ The alternative hypothesis is that the population means are not equal:

Ha: μ1 ≠ μ2

In [11]:
# the Python code to conduct the two-sample t-test:

import scipy.stats as stats
import numpy as np

# sample data
group_a = np.array([80, 85, 90, 92, 87, 83])
group_b = np.array([75, 78, 82, 79, 81, 84])

# calculate the t-statistic and p-value
t_stat, p_val = stats.ttest_ind(group_a, group_b, equal_var=True)

# significance level
alpha = 0.05

# make decision based on p-value and significance level
if p_val < alpha:
    print("Reject the null hypothesis. The population means are significantly different.")
else:
    print("Fail to reject the null hypothesis. The population means are not significantly different.")


Reject the null hypothesis. The population means are significantly different.
