In [26]:
import math
from scipy.stats import norm, stats, chisquare, chi2, chi2_contingency
import numpy as np

In [1]:
# Problem Statement 1
# Define each hypothesis scenario and validate if it's correctly stated
def validate_hypothesis():
    hypotheses = [
        {"H0": "μ = 25", "H1": "μ ≠ 25", "parameter": "μ", "correct": True},
        {"H0": "σ > 10", "H1": "σ = 10", "parameter": "σ", "correct": False},
        {"H0": "x = 50", "H1": "x ≠ 50", "parameter": "x", "correct": False},
        {"H0": "p = 0.1", "H1": "p = 0.5", "parameter": "p", "correct": False},
        {"H0": "s = 30", "H1": "s > 30", "parameter": "s", "correct": False},
    ]

    results = []
    for i, hypo in enumerate(hypotheses, 1):
        parameter = hypo["parameter"]
        correct = hypo["correct"]

        if correct:
            reason = f"Correctly stated: Hypothesis is about the population parameter {parameter}."
        else:
            if parameter == "σ" and hypo["H0"] == "σ > 10":
                reason = "Incorrect: The null hypothesis should typically include equality (e.g., H0: σ = 10)."
            elif parameter == "x":
                reason = "Incorrect: Hypothesis testing should involve population parameters, not sample statistics."
            elif parameter == "p":
                reason = "Incorrect: The null and alternative hypotheses should refer to the same parameter with different assumed values."
            elif parameter == "s":
                reason = "Incorrect: Hypothesis testing should involve the population parameter (σ), not the sample standard deviation (s)."

        results.append(f"Problem {i}: {reason}")

    return results

# Display validation results
for result in validate_hypothesis():
    print(result)


Problem 1: Correctly stated: Hypothesis is about the population parameter μ.
Problem 2: Incorrect: The null hypothesis should typically include equality (e.g., H0: σ = 10).
Problem 3: Incorrect: Hypothesis testing should involve population parameters, not sample statistics.
Problem 4: Incorrect: The null and alternative hypotheses should refer to the same parameter with different assumed values.
Problem 5: Incorrect: Hypothesis testing should involve the population parameter (σ), not the sample standard deviation (s).


In [3]:
# Problem Statement 2
mu_0 = 52            # Population mean
sigma = 4.50         # Population standard deviation
n = 100              # Sample size
x_bar = 52.80        # Sample mean
alpha = 0.05         # Significance level

# Step 1: Calculate the z-score
z = (x_bar - mu_0) / (sigma / math.sqrt(n))

# Step 2: Determine the critical value for a one-tailed test
z_critical = norm.ppf(1 - alpha)

# Step 3: Make a decision
decision = "Reject the null hypothesis" if z > z_critical else "Fail to reject the null hypothesis"

# Results
print(f"Test statistic (z): {z:.2f}")
print(f"Critical value (z_critical): {z_critical:.2f}")
print(f"Decision: {decision}")


Test statistic (z): 1.78
Critical value (z_critical): 1.64
Decision: Reject the null hypothesis


In [4]:
# Problem Statement 3
mu_0 = 34             # Population mean
sigma = 8             # Population standard deviation
n = 50                # Sample size
x_bar = 32.5          # Sample mean
alpha = 0.01          # Significance level

# Step 1: Calculate the z-score
z = (x_bar - mu_0) / (sigma / math.sqrt(n))

# Step 2: Determine the critical value for a one-tailed test
z_critical = norm.ppf(alpha)  # Since it's a left-tailed test

# Step 3: Make a decision
decision = "Reject the null hypothesis" if z < z_critical else "Fail to reject the null hypothesis"

# Results
print(f"Test statistic (z): {z:.2f}")
print(f"Critical value (z_critical): {z_critical:.2f}")
print(f"Decision: {decision}")


Test statistic (z): -1.33
Critical value (z_critical): -2.33
Decision: Fail to reject the null hypothesis


In [9]:
# Problem Statement 4
data = np.array([
    1008, 812, 1117, 1323, 1308, 1415, 831, 1021, 1287,
    851, 930, 730, 699, 872, 913, 944, 954, 987, 1695,
    995, 1003, 994
])

# Constants
mu_0 = 1135  # Population mean
alpha = 0.05  # Significance level
n = len(data)  # Sample size

# Step 1: Calculate sample mean and sample standard deviation
x_bar = np.mean(data)
s = np.std(data, ddof=1)  # Sample standard deviation

# Step 2: Calculate the t-statistic
t_statistic = (x_bar - mu_0) / (s / np.sqrt(n))

# Step 3: Determine the critical value for a two-tailed test
t_critical = stats.t.ppf(1 - alpha/2, df=n-1)

# Step 4: Make a decision
decision = "Reject the null hypothesis" if abs(t_statistic) > t_critical else "Fail to reject the null hypothesis"

# Results
print(f"Sample Mean (x_bar): {x_bar:.2f}")
print(f"Sample Standard Deviation (s): {s:.2f}")
print(f"Test Statistic (t): {t_statistic:.2f}")
print(f"Critical Value (t_critical): {t_critical:.2f}")
print(f"Decision: {decision}")


Sample Mean (x_bar): 1031.32
Sample Standard Deviation (s): 240.37
Test Statistic (t): -2.02
Critical Value (t_critical): 2.08
Decision: Fail to reject the null hypothesis


In [10]:
# Problem Statement 5
mu_0 = 48432  # Null hypothesis mean
x_bar = 48574  # Sample mean
s = 2000  # Sample standard deviation
n = 400  # Sample size
alpha = 0.05  # Significance level

# Step 4: Calculate the Z statistic
z_statistic = (x_bar - mu_0) / (s / np.sqrt(n))

# Step 5: Calculate the critical Z value for a two-tailed test
z_critical = stats.norm.ppf(1 - alpha/2)

# Step 6: Make a decision
decision = "Reject the null hypothesis" if abs(z_statistic) > z_critical else "Fail to reject the null hypothesis"

# Results
print(f"Sample Mean (x_bar): {x_bar}")
print(f"Population Mean (mu_0): {mu_0}")
print(f"Z Statistic: {z_statistic:.4f}")
print(f"Critical Z Value: {z_critical:.4f}")
print(f"Decision: {decision}")


Sample Mean (x_bar): 48574
Population Mean (mu_0): 48432
Z Statistic: 1.4200
Critical Z Value: 1.9600
Decision: Fail to reject the null hypothesis


In [11]:
# Problem Statement 6
mu_0 = 32.28  # Null hypothesis mean
x_bar = 31.67  # Sample mean
s = 1.29  # Sample standard deviation
n = 19  # Sample size
alpha = 0.05  # Significance level

# Step 4: Calculate the t statistic
t_statistic = (x_bar - mu_0) / (s / np.sqrt(n))

# Step 5: Calculate the critical t value for a two-tailed test
t_critical = stats.t.ppf(1 - alpha/2, df=n-1)

# Step 6: Make a decision
decision = "Reject the null hypothesis" if abs(t_statistic) > t_critical else "Fail to reject the null hypothesis"

# Results
print(f"Sample Mean (x_bar): {x_bar}")
print(f"Population Mean (mu_0): {mu_0}")
print(f"T Statistic: {t_statistic:.4f}")
print(f"Critical T Value: {t_critical:.4f}")
print(f"Decision: {decision}")


Sample Mean (x_bar): 31.67
Population Mean (mu_0): 32.28
T Statistic: -2.0612
Critical T Value: 2.1009
Decision: Fail to reject the null hypothesis


In [12]:
# Problem Statement 8
n = 16  # Sample size
x_bar = 12  # Sample mean
mu = 10  # Population mean
s = 1.5  # Sample standard deviation

# Calculate standard error
SE = s / np.sqrt(n)

# Calculate t-score
t_score = (x_bar - mu) / SE

t_score


5.333333333333333

In [13]:
# Problem Statement 9
n = 16
# Degrees of freedom
df = n - 1

# Get the t-score for 99% (0.99) for df = 15
t_score_99 = stats.t.ppf(0.99, df)

t_score_99


2.602480294995493

In [14]:
# Problem Statement 10

# Degrees of freedom
df = 24

# Find the t-scores for the middle 95%
t_lower = stats.t.ppf(0.025, df)  # lower bound (2.5%)
t_upper = stats.t.ppf(0.975, df)  # upper bound (97.5%)

t_lower, t_upper


(-2.063898561628021, 2.0638985616280205)

In [15]:
# Problem Statement 11

# Population 1 (Bangalore to Chennai)
n1 = 1200
x1 = 452
s1 = 212

# Population 2 (Bangalore to Hosur)
n2 = 800
x2 = 523
s2 = 185

# Calculate the standard error
SE = np.sqrt((s1**2 / n1) + (s2**2 / n2))

# Calculate the test statistic
z = (x1 - x2) / SE

# Determine the critical z-values for a two-tailed test at alpha = 0.05
alpha = 0.05
critical_value = stats.norm.ppf(1 - alpha / 2)

z, critical_value


(-7.926428526759299, 1.959963984540054)

In [16]:
# Problem Statement 12

# Population 1 (Duracell)
n1 = 100
x1 = 308
s1 = 84

# Population 2 (Energizer)
n2 = 100
x2 = 254
s2 = 67

# Calculate the standard error
SE = np.sqrt((s1**2 / n1) + (s2**2 / n2))

# Calculate the test statistic
z = (x1 - x2) / SE

# Determine the critical z-values for a two-tailed test at alpha = 0.05
alpha = 0.05
critical_value = stats.norm.ppf(1 - alpha / 2)

# Display results
z, critical_value


(5.025702668336442, 1.959963984540054)

In [17]:
# Problem Statement 13

# Population 1 (Price of sugar = Rs. 27.50)
n1 = 14
x1 = 0.317
s1 = 0.12

# Population 2 (Price of sugar = Rs. 20.00)
n2 = 9
x2 = 0.21
s2 = 0.11

# Step 1: Calculate the pooled variance
sp_squared = ((n1 - 1) * s1**2 + (n2 - 1) * s2**2) / (n1 + n2 - 2)

# Step 2: Calculate the standard error
SE = np.sqrt(sp_squared * (1/n1 + 1/n2))

# Step 3: Calculate the test statistic
t = (x1 - x2) / SE

# Step 4: Determine degrees of freedom
df = n1 + n2 - 2

# Step 5: Determine the critical t-value for alpha = 0.05 (two-tailed)
alpha = 0.05
critical_value = stats.t.ppf(1 - alpha / 2, df)

# Display results
print("Test statistic (t):", t)
print("Critical t-value for alpha = 0.05:", critical_value)

# Decision
if abs(t) > critical_value:
    print("Reject the null hypothesis: There is a significant difference in the average percentage increase in the price of sugar.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in the average percentage increase in the price of sugar.")


Test statistic (t): 2.15355322387416
Critical t-value for alpha = 0.05: 2.079613844727662
Reject the null hypothesis: There is a significant difference in the average percentage increase in the price of sugar.


In [18]:
# Problem Statement 14

# Population 1 (Before reduction)
n1 = 15
x1 = 6598
s1 = 844

# Population 2 (After reduction)
n2 = 12
x2 = 6870
s2 = 669

# Step 1: Calculate the pooled variance
sp_squared = ((n1 - 1) * s1**2 + (n2 - 1) * s2**2) / (n1 + n2 - 2)

# Step 2: Calculate the standard error
SE = np.sqrt(sp_squared * (1/n1 + 1/n2))

# Step 3: Calculate the test statistic
t = (x1 - x2) / SE

# Step 4: Determine degrees of freedom
df = n1 + n2 - 2

# Step 5: Determine the critical t-value for alpha = 0.05 (two-tailed)
alpha = 0.05
critical_value = stats.t.ppf(1 - alpha / 2, df)

# Display results
print("Test statistic (t):", t)
print("Critical t-value for alpha = 0.05:", critical_value)

# Decision
if abs(t) > critical_value:
    print("Reject the null hypothesis: There is evidence that the price reduction increased sales.")
else:
    print("Fail to reject the null hypothesis: There is no evidence that the price reduction increased sales.")


Test statistic (t): -0.9098300343990461
Critical t-value for alpha = 0.05: 2.059538552753294
Fail to reject the null hypothesis: There is no evidence that the price reduction increased sales.


In [19]:
# Problem Statement 15

# Population 1 (1980)
n1 = 1000
x1 = 53
p1 = x1 / n1

# Population 2 (1995)
n2 = 100
x2 = 43
p2 = x2 / n2

# Step 1: Calculate the pooled proportion
p = (x1 + x2) / (n1 + n2)

# Step 2: Calculate the standard error
SE = np.sqrt(p * (1 - p) * (1/n1 + 1/n2))

# Step 3: Calculate the test statistic (Z)
Z = (p1 - p2) / SE

# Step 4: Determine the critical Z-value for alpha = 0.05 (two-tailed)
alpha = 0.05
critical_value = stats.norm.ppf(1 - alpha / 2)

# Display results
print("Test statistic (Z):", Z)
print("Critical Z-value for alpha = 0.05:", critical_value)

# Decision
if abs(Z) > critical_value:
    print("Reject the null hypothesis: There is evidence that the banks’ share of the car loan market is different in 1980 and 1995.")
else:
    print("Fail to reject the null hypothesis: There is no evidence that the banks’ share of the car loan market is different in 1980 and 1995.")


Test statistic (Z): -12.736063621349617
Critical Z-value for alpha = 0.05: 1.959963984540054
Reject the null hypothesis: There is evidence that the banks’ share of the car loan market is different in 1980 and 1995.


In [20]:
# Problem Statement 16

# Population 1 (With Sweepstakes)
n1 = 300
x1 = 120
p1 = x1 / n1  # Sample proportion with sweepstakes

# Population 2 (No Sweepstakes)
n2 = 700
x2 = 140
p2 = x2 / n2  # Sample proportion without sweepstakes

# Step 1: Calculate the pooled proportion
p = (x1 + x2) / (n1 + n2)

# Step 2: Calculate the standard error (SE)
SE = np.sqrt(p * (1 - p) * (1/n1 + 1/n2))

# Step 3: Calculate the test statistic (Z)
Z = (p1 - p2 - 0.10) / SE

# Step 4: Determine the critical Z-value for alpha = 0.05 (one-tailed)
alpha = 0.05
critical_value = stats.norm.ppf(1 - alpha)

# Display results
print("Test statistic (Z):", Z)
print("Critical Z-value for alpha = 0.05:", critical_value)

# Decision
if Z > critical_value:
    print("Reject the null hypothesis: There is evidence that the proportion of buyers with sweepstakes is at least 10% higher than without.")
else:
    print("Fail to reject the null hypothesis: There is no evidence that the proportion of buyers with sweepstakes is at least 10% higher than without.")


Test statistic (Z): 3.303749523611152
Critical Z-value for alpha = 0.05: 1.6448536269514722
Reject the null hypothesis: There is evidence that the proportion of buyers with sweepstakes is at least 10% higher than without.


In [25]:
# Problem Statement 17

# Observed frequencies for the die rolls
observed_frequencies = np.array([16, 20, 25, 14, 29, 28])

# Expected frequencies (assuming a fair die)
total_rolls = 132
expected_frequency = total_rolls / 6  # 22 for each side
expected_frequencies = np.array([expected_frequency] * 6)

# Perform Chi-Square goodness-of-fit test
chi2_statistic, p_value = chisquare(observed_frequencies, expected_frequencies)

# Degrees of freedom
degrees_of_freedom = len(observed_frequencies) - 1

# Display results
print(f"Chi-Square Statistic: {chi2_statistic:.4f}")
print(f"P-Value: {p_value:.4f}")
print(f"Degrees of Freedom: {degrees_of_freedom}")

# Critical value for alpha = 0.05
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, degrees_of_freedom)

print(f"Critical Value (alpha = {alpha}): {critical_value:.4f}")

# Decision based on significance level
if chi2_statistic > critical_value:
    print("Reject the null hypothesis: The die is biased.")
else:
    print("Fail to reject the null hypothesis: The die is unbiased.")



Chi-Square Statistic: 9.0000
P-Value: 0.1091
Degrees of Freedom: 5
Critical Value (alpha = 0.05): 11.0705
Fail to reject the null hypothesis: The die is unbiased.


In [27]:
# Problem Statement 18

# Create the contingency table
contingency_table = np.array([[2792, 1486],
                               [3591, 2131]])

# Perform the chi-square test
chi2_statistic, p_value, dof, expected = chi2_contingency(contingency_table)

# Output the results
print(f"Chi-Square Statistic: {chi2_statistic:.4f}")
print(f"P-Value: {p_value:.4f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies:\n", expected)

# Decision based on significance level
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between gender and voting participation.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between gender and voting participation.")


Chi-Square Statistic: 6.5523
P-Value: 0.0105
Degrees of Freedom: 1
Expected Frequencies:
 [[2730.6474 1547.3526]
 [3652.3526 2069.6474]]
Reject the null hypothesis: There is a significant association between gender and voting participation.


In [28]:
# Problem Statement 19

# Observed frequencies
observed = np.array([41, 19, 24, 16])

# Expected frequencies (if all candidates are equally popular)
expected = np.array([25, 25, 25, 25])

# Calculate the Chi-Square statistic
chi_square_statistic = np.sum((observed - expected) ** 2 / expected)

# Calculate degrees of freedom
df = len(observed) - 1

# Get the critical value for Chi-Square at df = 3 and alpha = 0.05
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, df)

# Output the results
print(f"Chi-Square Statistic: {chi_square_statistic:.2f}")
print(f"Critical Value (df={df}, alpha={alpha}): {critical_value:.2f}")

# Decision based on the comparison
if chi_square_statistic > critical_value:
    print("Reject the null hypothesis: Not all candidates are equally popular.")
else:
    print("Fail to reject the null hypothesis: There is no evidence that the candidates are not equally popular.")


Chi-Square Statistic: 14.96
Critical Value (df=3, alpha=0.05): 7.81
Reject the null hypothesis: Not all candidates are equally popular.


In [29]:
# Problem Statement 20

# Observed frequencies (rows represent age groups, columns represent photographs)
observed = np.array([[18, 22, 20],
                     [2, 28, 40],
                     [20, 10, 40]])

# Total observed values
total_observed = observed.sum()

# Row totals and column totals
row_totals = observed.sum(axis=1)
column_totals = observed.sum(axis=0)

# Calculate expected frequencies
expected = np.zeros_like(observed, dtype=float)
for i in range(observed.shape[0]):
    for j in range(observed.shape[1]):
        expected[i, j] = (row_totals[i] * column_totals[j]) / total_observed

# Calculate Chi-Square statistic
chi_square_statistic = np.sum((observed - expected) ** 2 / expected)

# Calculate degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

# Get the critical value for Chi-Square at df = 4 and alpha = 0.05
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, df)

# Output the results
print(f"Chi-Square Statistic: {chi_square_statistic:.2f}")
print(f"Critical Value (df={df}, alpha={alpha}): {critical_value:.2f}")

# Decision based on the comparison
if chi_square_statistic > critical_value:
    print("Reject the null hypothesis: There is a significant relationship between age and photograph preference.")
else:
    print("Fail to reject the null hypothesis: There is no evidence of a significant relationship between age and photograph preference.")


Chi-Square Statistic: 29.60
Critical Value (df=4, alpha=0.05): 9.49
Reject the null hypothesis: There is a significant relationship between age and photograph preference.


In [30]:
# Problem Statement 21

# Observed frequencies
observed = np.array([[18, 40],  # Conform
                     [32, 10]]) # Not Conform

# Total observed values
total_observed = observed.sum()

# Row totals and column totals
row_totals = observed.sum(axis=1)
column_totals = observed.sum(axis=0)

# Calculate expected frequencies
expected = np.zeros_like(observed, dtype=float)
for i in range(observed.shape[0]):
    for j in range(observed.shape[1]):
        expected[i, j] = (row_totals[i] * column_totals[j]) / total_observed

# Calculate Chi-Square statistic
chi_square_statistic = np.sum((observed - expected) ** 2 / expected)

# Calculate degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

# Get the critical value for Chi-Square at df = 1 and alpha = 0.05
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, df)

# Output the results
print(f"Chi-Square Statistic: {chi_square_statistic:.2f}")
print(f"Critical Value (df={df}, alpha={alpha}): {critical_value:.2f}")

# Decision based on the comparison
if chi_square_statistic > critical_value:
    print("Reject the null hypothesis: There is a significant difference in conformity rates between the support and no support conditions.")
else:
    print("Fail to reject the null hypothesis: There is no evidence of a significant difference in conformity rates between the support and no support conditions.")


Chi-Square Statistic: 19.87
Critical Value (df=1, alpha=0.05): 3.84
Reject the null hypothesis: There is a significant difference in conformity rates between the support and no support conditions.


In [31]:
# Problem Statement 22

# Observed frequencies
observed = np.array([[12, 32],  # Leaders
                     [22, 14],  # Followers
                     [9, 6]])   # Unclassifiable

# Total observed values
total_observed = observed.sum()

# Row totals and column totals
row_totals = observed.sum(axis=1)
column_totals = observed.sum(axis=0)

# Calculate expected frequencies
expected = np.zeros_like(observed, dtype=float)
for i in range(observed.shape[0]):
    for j in range(observed.shape[1]):
        expected[i, j] = (row_totals[i] * column_totals[j]) / total_observed

# Calculate Chi-Square statistic
chi_square_statistic = np.sum((observed - expected) ** 2 / expected)

# Calculate degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

# Get the critical value for Chi-Square at df = 2 and alpha = 0.01
alpha = 0.01
critical_value = chi2.ppf(1 - alpha, df)

# Output the results
print(f"Chi-Square Statistic: {chi_square_statistic:.2f}")
print(f"Critical Value (df={df}, alpha={alpha}): {critical_value:.2f}")

# Decision based on the comparison
if chi_square_statistic > critical_value:
    print("Reject the null hypothesis: There is a significant relationship between height and leadership qualities.")
else:
    print("Fail to reject the null hypothesis: There is no evidence of a significant relationship between height and leadership qualities.")


Chi-Square Statistic: 10.71
Critical Value (df=2, alpha=0.01): 9.21
Reject the null hypothesis: There is a significant relationship between height and leadership qualities.


In [32]:
# Problem Statement 23

# Observed frequencies
observed = np.array([[679, 103, 114],  # Employed
                     [63, 10, 20],    # Unemployed
                     [42, 18, 25]])   # Not in Labor Force

# Total observed values
total_observed = observed.sum()

# Row totals and column totals
row_totals = observed.sum(axis=1)
column_totals = observed.sum(axis=0)

# Calculate expected frequencies
expected = np.zeros_like(observed, dtype=float)
for i in range(observed.shape[0]):
    for j in range(observed.shape[1]):
        expected[i, j] = (row_totals[i] * column_totals[j]) / total_observed

# Calculate Chi-Square statistic
chi_square_statistic = np.sum((observed - expected) ** 2 / expected)

# Calculate degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

# Get the critical value for Chi-Square at alpha = 0.05
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, df)

# Output the results
print(f"Chi-Square Statistic: {chi_square_statistic:.2f}")
print(f"Critical Value (df={df}, alpha={alpha}): {critical_value:.2f}")

# Decision based on the comparison
if chi_square_statistic > critical_value:
    print("Reject the null hypothesis: There is a significant association between marital status and labor force status.")
else:
    print("Fail to reject the null hypothesis: There is no evidence of a significant association between marital status and labor force status.")


Chi-Square Statistic: 31.61
Critical Value (df=4, alpha=0.05): 9.49
Reject the null hypothesis: There is a significant association between marital status and labor force status.
