In [None]:
"""
Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
test. The function should return the F-value and the corresponding p-value for the test.
"""

import numpy as np
from scipy.stats import f

def variance_ratio_test(data1, data2):
    """
    Performs an F-test to compare variances of two samples.
    
    Parameters:
        data1 (array-like): First sample
        data2 (array-like): Second sample
    
    Returns:
        F_value: Calculated F statistic
        p_value: Corresponding p-value (two-tailed)
    """
    # Sample sizes
    n1 = len(data1)
    n2 = len(data2)

    # Sample variances
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)

    # F statistic (larger variance / smaller variance)
    if var1 > var2:
        F_value = var1 / var2
        df1, df2 = n1 - 1, n2 - 1
    else:
        F_value = var2 / var1
        df1, df2 = n2 - 1, n1 - 1

    # Two-tailed p-value for variance test
    p_value = 2 * (1 - f.cdf(F_value, df1, df2))
    p_value = min(p_value, 1)  # Ensure p-value never exceeds 1

    return F_value, p_value


In [None]:
"""
Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

"""
from scipy.stats import f

def critical_f_value_two_tailed(alpha, df1, df2):
    """
    Returns the lower and upper critical F-values for a two-tailed test.
    
    Parameters:
        alpha (float): Significance level (e.g., 0.05)
        df1 (int): Degrees of freedom (numerator)
        df2 (int): Degrees of freedom (denominator)
    
    Returns:
        (F_lower, F_upper): Tuple of lower and upper critical values
    """
    # Lower tail critical value
    F_lower = f.ppf(alpha / 2, df1, df2)
    
    # Upper tail critical value
    F_upper = f.ppf(1 - (alpha / 2), df1, df2)
    
    return F_lower, F_upper


In [None]:
alpha = 0.05
df1 = 5   # numerator degrees of freedom
df2 = 10  # denominator degrees of freedom

F_low, F_up = critical_f_value_two_tailed(alpha, df1, df2)

print("Lower critical F-value:", F_low)
print("Upper critical F-value:", F_up)


In [None]:
"""
Q3. Write a Python program that generates random samples from two normal distributions with known

variances and uses an F-test to determine if the variances are equal. The program should output the F-
value, degrees of freedom, and p-value for the test.

"""
import numpy as np
from scipy.stats import f

# Step 1: Generate random samples
np.random.seed(42)  # for reproducibility

# Known parameters
mean1, var1 = 10, 4      # variance = 4 → std = 2
mean2, var2 = 12, 9      # variance = 9 → std = 3

n1 = 30
n2 = 35

# Generate samples
sample1 = np.random.normal(mean1, np.sqrt(var1), n1)
sample2 = np.random.normal(mean2, np.sqrt(var2), n2)

# Step 2: Compute sample variances
s1_sq = np.var(sample1, ddof=1)
s2_sq = np.var(sample2, ddof=1)

# Step 3: Calculate F-value (larger variance / smaller variance)
if s1_sq > s2_sq:
    F_value = s1_sq / s2_sq
    df1, df2 = n1 - 1, n2 - 1
else:
    F_value = s2_sq / s1_sq
    df1, df2 = n2 - 1, n1 - 1

# Step 4: Compute two-tailed p-value
p_value = 2 * (1 - f.cdf(F_value, df1, df2))
p_value = min(p_value, 1)  # ensure p ≤ 1

# Step 5: Output results
print("Sample Variance 1:", s1_sq)
print("Sample Variance 2:", s2_sq)
print("F-value:", F_value)
print("Degrees of freedom:", df1, df2)
print("p-value:", p_value)


In [None]:
"""
Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
each population. Conduct an F-test at the 5% significance level to determine if the variances are
significantly different.

I'll conduct an F-test to determine if the two population variances are significantly different.

## Given Information
- Population 1 variance: σ₁² = 10
- Population 2 variance: σ₂² = 15
- Sample size from population 1: n₁ = 12
- Sample size from population 2: n₂ = 12
- Significance level: α = 0.05

## F-Test Procedure

**Step 1: State the Hypotheses**
- H₀: σ₁² = σ₂² (the variances are equal)
- H₁: σ₁² ≠ σ₂² (the variances are different)

This is a two-tailed test.

**Step 2: Calculate the F-statistic**

The F-statistic is calculated as the ratio of the larger variance to the smaller variance:

F = (larger variance)/(smaller variance) = σ₂²/σ₁² = 15/10 = **1.5**

**Step 3: Determine the degrees of freedom**
- df₁ = n₂ - 1 = 12 - 1 = 11 (numerator degrees of freedom)
- df₂ = n₁ - 1 = 12 - 1 = 11 (denominator degrees of freedom)

**Step 4: Find the critical value**

For a two-tailed test at α = 0.05, we split the significance level:
- Upper critical value: F₀.₀₂₅(11, 11) ≈ 3.43
- Lower critical value: F₀.₉₇₅(11, 11) = 1/F₀.₀₂₅(11, 11) ≈ 1/3.43 ≈ 0.29

**Step 5: Decision Rule**

Reject H₀ if F > 3.43 or F < 0.29

**Step 6: Conclusion**

Our calculated F-statistic is **F = 1.5**

Since 0.29 < 1.5 < 3.43, the F-statistic falls within the acceptance region.

## Result

**We fail to reject the null hypothesis** at the 5% significance level.

**Conclusion:** There is insufficient evidence to conclude that the two population variances are significantly different. The observed difference between the variances (10 and 15) could reasonably be due to chance variation.

"""

In [None]:
"""
Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
level to determine if the claim is justified.

I'll conduct an F-test to test the manufacturer's claim about the variance.

## Given Information
- Claimed population variance: σ² = 0.005
- Sample size: n = 25
- Sample variance: s² = 0.006
- Significance level: α = 0.01

## Hypotheses

**H₀**: σ² = 0.005 (the manufacturer's claim is correct)
**H₁**: σ² ≠ 0.005 (the manufacturer's claim is not correct)

This is a two-tailed test.

## Test Statistic

For testing a single variance, we use the chi-square statistic:

χ² = (n - 1)s² / σ₀²

where σ₀² is the claimed variance.

χ² = (25 - 1) × 0.006 / 0.005
χ² = 24 × 0.006 / 0.005
χ² = 0.144 / 0.005
χ² = **28.8**

## Degrees of Freedom

df = n - 1 = 25 - 1 = 24

## Critical Values

For a two-tailed test at α = 0.01 with df = 24:
- Lower critical value: χ²₀.₀₀₅,₂₄ ≈ 9.886
- Upper critical value: χ²₀.₉₉₅,₂₄ ≈ 45.559

**Rejection region**: Reject H₀ if χ² < 9.886 or χ² > 45.559

## Decision

Our calculated test statistic χ² = 28.8 falls within the non-rejection region (9.886 < 28.8 < 45.559).

**Decision**: We fail to reject H₀ at the 1% significance level.

## Conclusion

At the 1% significance level, there is **insufficient evidence** to reject the manufacturer's claim that the variance of the diameter is 0.005. The observed sample variance of 0.006 is not significantly different from the claimed variance of 0.005.

**Note**: The manufacturer's claim is justified based on this test. The difference between the sample variance (0.006) and the claimed variance (0.005) can reasonably be attributed to sampling variation.

"""


In [None]:
"""
Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
F-distribution and calculates the mean and variance of the distribution. The function should return the
mean and variance as a tuple.

"""

def f_distribution_stats(df1, df2):
    """
    Calculate the mean and variance of an F-distribution.
    
    Parameters:
    -----------
    df1 : int or float
        Degrees of freedom for the numerator (d1)
    df2 : int or float
        Degrees of freedom for the denominator (d2)
    
    Returns:
    --------
    tuple
        A tuple containing (mean, variance) of the F-distribution
        Returns (None, None) if parameters are invalid
    
    Notes:
    ------
    - Mean exists only if df2 > 2
    - Variance exists only if df2 > 4
    - Mean = df2 / (df2 - 2) for df2 > 2
    - Variance = 2 * df2^2 * (df1 + df2 - 2) / [df1 * (df2 - 2)^2 * (df2 - 4)] for df2 > 4
    """
    
    # Validate input
    if df1 <= 0 or df2 <= 0:
        print("Error: Degrees of freedom must be positive")
        return (None, None)
    
    # Calculate mean
    if df2 > 2:
        mean = df2 / (df2 - 2)
    else:
        print(f"Warning: Mean is undefined for df2 <= 2 (df2 = {df2})")
        mean = None
    
    # Calculate variance
    if df2 > 4:
        numerator = 2 * (df2 ** 2) * (df1 + df2 - 2)
        denominator = df1 * ((df2 - 2) ** 2) * (df2 - 4)
        variance = numerator / denominator
    else:
        print(f"Warning: Variance is undefined for df2 <= 4 (df2 = {df2})")
        variance = None
    
    return (mean, variance)


# Example usage
if __name__ == "__main__":
    # Test cases
    print("F-Distribution Statistics Calculator\n")
    
    # Test 1: Normal case
    df1, df2 = 5, 10
    mean, var = f_distribution_stats(df1, df2)
    print(f"F({df1}, {df2}):")
    print(f"  Mean = {mean:.4f}" if mean else "  Mean = undefined")
    print(f"  Variance = {var:.4f}" if var else "  Variance = undefined")
    print()
    
    # Test 2: Another normal case
    df1, df2 = 10, 20
    mean, var = f_distribution_stats(df1, df2)
    print(f"F({df1}, {df2}):")
    print(f"  Mean = {mean:.4f}" if mean else "  Mean = undefined")
    print(f"  Variance = {var:.4f}" if var else "  Variance = undefined")
    print()
    
    # Test 3: Edge case - df2 = 3 (mean exists, variance doesn't)
    df1, df2 = 5, 3
    mean, var = f_distribution_stats(df1, df2)
    print(f"F({df1}, {df2}):")
    print(f"  Mean = {mean:.4f}" if mean else "  Mean = undefined")
    print(f"  Variance = {var:.4f}" if var else "  Variance = undefined")
    print()
    
    # Test 4: Edge case - df2 = 2 (neither exists)
    df1, df2 = 5, 2
    mean, var = f_distribution_stats(df1, df2)
    print(f"F({df1}, {df2}):")
    print(f"  Mean = {mean:.4f}" if mean else "  Mean = undefined")
    print(f"  Variance = {var:.4f}" if var else "  Variance = undefined")


In [None]:
"""
Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
sample variance is found to be 25. Another random sample of 15 measurements is taken from another
normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
at the 10% significance level to determine if the variances are significantly different.

I'll conduct an F-test to determine if the population variances are significantly different.

## Given Information
- Sample 1: n₁ = 10, s₁² = 25
- Sample 2: n₂ = 15, s₂² = 20
- Significance level: α = 0.10

## Step 1: Set up the Hypotheses

**H₀:** σ₁² = σ₂² (the population variances are equal)

**H₁:** σ₁² ≠ σ₂² (the population variances are different)

This is a two-tailed test.

## Step 2: Calculate the F-statistic

The F-statistic is the ratio of the larger sample variance to the smaller sample variance:

**F = s₁²/s₂² = 25/20 = 1.25**

## Step 3: Determine the Critical Values

For a two-tailed test at α = 0.10:
- Each tail has α/2 = 0.05
- Degrees of freedom: df₁ = n₁ - 1 = 9, df₂ = n₂ - 1 = 14

**Upper critical value:** F₀.₀₅(9, 14) ≈ 2.65

**Lower critical value:** F₀.₉₅(9, 14) = 1/F₀.₀₅(14, 9) ≈ 1/3.03 ≈ 0.33

The rejection region is: F < 0.33 or F > 2.65

## Step 4: Decision

Since F = 1.25 falls within the acceptance region (0.33 < 1.25 < 2.65), we **fail to reject H₀**.

## Conclusion

At the 10% significance level, there is **insufficient evidence** to conclude that the population variances are significantly different. The observed difference in sample variances can be attributed to random sampling variation.

The data suggests that the two populations likely have equal variances.

"""

In [None]:
"""
Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
significance level to determine if the variances are significantly different.

I'll conduct an F-test to determine if the variances of waiting times at the two restaurants are significantly different.

## Step 1: State the Hypotheses

- **H₀**: σ²ᴀ = σ²ʙ (variances are equal)
- **H₁**: σ²ᴀ ≠ σ²ʙ (variances are different)
- **Significance level**: α = 0.05

## Step 2: Calculate Sample Statistics

**Restaurant A:** 24, 25, 28, 23, 22, 20, 27
- n₁ = 7
- Mean (x̄ᴀ) = (24 + 25 + 28 + 23 + 22 + 20 + 27) ÷ 7 = 169 ÷ 7 = 24.14

Calculating variance:
- Σ(xᵢ - x̄)² = (24-24.14)² + (25-24.14)² + (28-24.14)² + (23-24.14)² + (22-24.14)² + (20-24.14)² + (27-24.14)²
- = 0.02 + 0.74 + 14.90 + 1.30 + 4.58 + 17.16 + 8.18 = 46.88
- s²ᴀ = 46.88 ÷ (7-1) = 46.88 ÷ 6 = **7.813**

**Restaurant B:** 31, 33, 35, 30, 32, 36
- n₂ = 6
- Mean (x̄ʙ) = (31 + 33 + 35 + 30 + 32 + 36) ÷ 6 = 197 ÷ 6 = 32.83

Calculating variance:
- Σ(xᵢ - x̄)² = (31-32.83)² + (33-32.83)² + (35-32.83)² + (30-32.83)² + (32-32.83)² + (36-32.83)²
- = 3.35 + 0.03 + 4.71 + 8.01 + 0.69 + 10.05 = 26.84
- s²ʙ = 26.84 ÷ (6-1) = 26.84 ÷ 5 = **5.368**

## Step 3: Calculate the F-statistic

For a two-tailed test, we place the larger variance in the numerator:

F = s²ᴀ / s²ʙ = 7.813 / 5.368 = **1.455**

## Step 4: Determine Critical Values

- Degrees of freedom: df₁ = n₁ - 1 = 6, df₂ = n₂ - 1 = 5
- For a two-tailed test at α = 0.05, we use α/2 = 0.025 in each tail
- **Critical values**: F₀.₀₂₅(6,5) and F₀.₉₇₅(6,5)
- F₀.₀₂₅(6,5) ≈ **5.99**
- F₀.₉₇₅(6,5) = 1/F₀.₀₂₅(5,6) ≈ 1/4.95 ≈ **0.202**

## Step 5: Decision

**Rejection region**: F < 0.202 or F > 5.99

Our calculated F-statistic = 1.455

Since 0.202 < 1.455 < 5.99, the F-statistic falls within the acceptance region.

## Conclusion

**We fail to reject the null hypothesis** at the 5% significance level. There is insufficient evidence to conclude that the variances of waiting times at the two restaurants are significantly different. The variability in waiting times appears to be similar for both restaurants.

"""

In [None]:
"""
Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
are significantly different.

I'll conduct an F-test to determine if the variances of the two groups are significantly different at the 1% significance level.

## Step 1: State the Hypotheses

- **H₀ (Null Hypothesis)**: σ₁² = σ₂² (The variances are equal)
- **H₁ (Alternative Hypothesis)**: σ₁² ≠ σ₂² (The variances are different)

This is a two-tailed test at α = 0.01.

## Step 2: Calculate the Sample Variances

**Group A: 80, 85, 90, 92, 87, 83**

Mean of Group A: x̄ₐ = (80 + 85 + 90 + 92 + 87 + 83) / 6 = 517 / 6 = 86.17

Variance of Group A:
- (80 - 86.17)² = 38.07
- (85 - 86.17)² = 1.37
- (90 - 86.17)² = 14.67
- (92 - 86.17)² = 34.00
- (87 - 86.17)² = 0.69
- (83 - 86.17)² = 10.05

s₁² = (38.07 + 1.37 + 14.67 + 34.00 + 0.69 + 10.05) / (6 - 1) = 98.85 / 5 = **19.77**

**Group B: 75, 78, 82, 79, 81, 84**

Mean of Group B: x̄ᵦ = (75 + 78 + 82 + 79 + 81 + 84) / 6 = 479 / 6 = 79.83

Variance of Group B:
- (75 - 79.83)² = 23.33
- (78 - 79.83)² = 3.35
- (82 - 79.83)² = 4.71
- (79 - 79.83)² = 0.69
- (81 - 79.83)² = 1.37
- (84 - 79.83)² = 17.39

s₂² = (23.33 + 3.35 + 4.71 + 0.69 + 1.37 + 17.39) / (6 - 1) = 50.84 / 5 = **10.17**

## Step 3: Calculate the F-statistic

The F-statistic is the ratio of the larger variance to the smaller variance:

F = s₁² / s₂² = 19.77 / 10.17 = **1.944**

## Step 4: Determine the Critical Value

For a two-tailed test at α = 0.01:
- Degrees of freedom: df₁ = n₁ - 1 = 5, df₂ = n₂ - 1 = 5
- Critical values: F₀.₀₀₅(5, 5) and F₀.₉₉₅(5, 5)
- Upper critical value: F₀.₀₀₅(5, 5) ≈ **10.97**
- Lower critical value: F₀.₉₉₅(5, 5) ≈ 1/10.97 ≈ **0.091**

## Step 5: Decision

The calculated F-statistic is **1.944**.

Since 0.091 < 1.944 < 10.97, the F-statistic falls within the acceptance region.

## Conclusion

**We fail to reject the null hypothesis** at the 1% significance level. There is insufficient evidence to conclude that the variances of the two groups are significantly different. The variances can be considered equal at the 1% significance level.

"""