In [1]:
# ===================================================
#  Synthetic Data Generation and Statistical Analysis
# ===================================================

import numpy as np
from scipy import stats
import math

# ===================================================
#  PART 1: Teachers' Rating Dataset
# ===================================================
print("## Part 1: Teachers' Rating Dataset")
print("-" * 40)

# Create a synthetic dataset of 50 teacher evaluation scores
# Ratings are randomly distributed between 2.5 and 5.0, with most scores higher (typical pattern)
np.random.seed(42)
ratings = np.round(np.random.normal(loc=4.0, scale=0.4, size=50), 1)

# Ensure values stay within valid range (2.5 to 5.0)
ratings = np.clip(ratings, 2.5, 5.0)
print("Sample of Ratings:", ratings[:10], "...\n")


## Part 1: Teachers' Rating Dataset
----------------------------------------
Sample of Ratings: [4.2 3.9 4.3 4.6 3.9 3.9 4.6 4.3 3.8 4.2] ...




Q1. Using the teachers' rating dataset, what is the probability of receiving an evaluation score of greater than 4.5

In [2]:
count_gt_4_5 = np.sum(ratings > 4.5)
total_count = len(ratings)
prob_gt_4_5 = count_gt_4_5 / total_count

print(f"Q1: Probability of receiving a score > 4.5")
print(f"   - Count of scores > 4.5: {count_gt_4_5}")
print(f"   - Total scores: {total_count}")
print(f"   - Probability: {count_gt_4_5}/{total_count} = {prob_gt_4_5:.2f} or {prob_gt_4_5:.0%}\n")

Q1: Probability of receiving a score > 4.5
   - Count of scores > 4.5: 4
   - Total scores: 50
   - Probability: 4/50 = 0.08 or 8%



Q2. Using the teachers' rating dataset, what is the probability of receiving an evaluation score greater than 3.5 and less than 4.2

In [7]:
print("## Part 2: Hypothesis Test for Basketball Teams")
print("-" * 50)
p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))

print("Performing the Two-Tailed Z-test:")
print(f"   - Sample Mean (x̄): {x_bar:.2f}")
print(f"   - Z-score: {z_score:.4f}")
print(f"   - P-value: {p_value:.4f}\n")

## Part 2: Hypothesis Test for Basketball Teams
--------------------------------------------------
Performing the Two-Tailed Z-test:
   - Sample Mean (x̄): 9.81
   - Z-score: -2.3939
   - P-value: 0.0167



Q3. Using the two-tailed test from a normal distribution:
1. A professional basketball team wants to compare its performance with that of players in a regional league.
2. The pros are known to have a historic mean of 12 points per game with a standard deviation of 5.5.
3. A group of 36 regional players recorded on average 10.7 points per game.
4. The pro coach would like to know whether his professional team scores on average are different from that of the regional players.
State the null hypothesis
Null Hypo: ("The mean point of the regional players is not different from the historic mean")
Alternative Hypo: ("The mean point of the regional players is different from the historic mean")

In [6]:
print("Stating the Hypotheses:")
print("Null Hypothesis (H₀): The mean point of the regional players is not different from the historic mean (μ = 12).")
print("Alternative Hypothesis (H₁): The mean point of the regional players is different from the historic mean (μ ≠ 12).\n")

# Given information
mu_0 = 12       # Historic mean (population mean)
sigma = 5.5     # Population standard deviation
n = 36          # Sample size

# Generate synthetic dataset for 36 regional players
np.random.seed(42)
regional_scores = np.round(np.random.normal(loc=10.7, scale=sigma, size=n), 1)

# Calculate sample mean
x_bar = np.mean(regional_scores)

# Step 1: Standard error of the mean
sem = sigma / math.sqrt(n)

# Step 2: Z-score
z_score = (x_bar - mu_0) / sem

# Step 3: Two-tailed p-value
p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))

print("Performing the Two-Tailed Z-test:")
print(f"   - Sample Mean (x̄): {x_bar:.2f}")
print(f"   - Z-score: {z_score:.4f}")
print(f"   - P-value: {p_value:.4f}\n")

# Step 4: Conclusion
alpha = 0.05
print(f"Conclusion (at α = {alpha}):")
if p_value < alpha:
    print(f"   - Since p-value ({p_value:.4f}) < {alpha}, we REJECT the null hypothesis.")
    print("   - There is statistically significant evidence that the regional players' mean differs from the historic mean.")
else:
    print(f"   - Since p-value ({p_value:.4f}) > {alpha}, we FAIL TO REJECT the null hypothesis.")
    print("   - There is not enough evidence to say the regional players' mean differs from the historic mean.")

Stating the Hypotheses:
Null Hypothesis (H₀): The mean point of the regional players is not different from the historic mean (μ = 12).
Alternative Hypothesis (H₁): The mean point of the regional players is different from the historic mean (μ ≠ 12).

Performing the Two-Tailed Z-test:
   - Sample Mean (x̄): 9.81
   - Z-score: -2.3939
   - P-value: 0.0167

Conclusion (at α = 0.05):
   - Since p-value (0.0167) < 0.05, we REJECT the null hypothesis.
   - There is statistically significant evidence that the regional players' mean differs from the historic mean.
