In [2]:
# Class_Activities_Statistics.ipynb
# All three class activities (copy-paste and run)

import numpy as np
import pandas as pd
from scipy import stats
import math

print("Activity 1: Central Tendency from frequency table \n")

# Activity 1 data (from screenshot)
values = [0,1,2,3,4,5]
freq   = [27,96,58,54,18,7]

# expand to raw data
data = []
for v,f in zip(values,freq):
    data += [v]*f
data = np.array(data)

N = len(data)
mean_val = data.mean()
median_val = np.median(data)
mode_res = stats.mode(data, keepdims=True)
mode_val = int(mode_res.mode[0])
mode_count = int(mode_res.count[0])

print("N =", N)
print("Mean =", round(mean_val, 4))
print("Median =", median_val)
print("Mode =", mode_val, "(frequency:", mode_count, ")")

print("\nExpected (manual):")
print(" Σ(f·x) = 481, N = 260, Mean = 481/260 = 1.85")
print(" Median position (N+1)/2 = 130.5 -> Median value = 2")
print(" Mode = 1 (highest freq = 96)")

print("\n\nActivity 2: Heights grouped frequency (percentages) \n")

# Activity 2 data (from screenshot)
ranges = ["59.95-61.95","61.95-63.95","63.95-65.95","65.95-67.95",
          "67.95-69.95","69.95-71.95","71.95-73.95","73.95-75.95"]
freq2 = [5,3,15,40,17,12,7,1]

df = pd.DataFrame({"Range": ranges, "Frequency": freq2})
total = sum(freq2)
print(df.to_string(index=False))
print("\nTotal =", total)

# (a) less than 65.95 -> include first three classes
count_less = freq2[0] + freq2[1] + freq2[2]
pct_less = count_less / total * 100

# (b) between 61.95 and 65.95 -> classes 1 and 2
count_between = freq2[1] + freq2[2]
pct_between = count_between / total * 100

# (c) number between 61.95 and 71.95 -> classes index 1..5
count_range = sum(freq2[1:6])

print("\na) Percentage less than 65.95 = {:.2f}% ({} players)".format(pct_less, count_less))
print("b) Percentage between 61.95 and 65.95 = {:.2f}% ({} players)".format(pct_between, count_between))
print("c) Number of players between 61.95 and 71.95 = {}".format(count_range))

print("\nExpected (manual): a) 23% (23 players), b) 18% (18 players), c) 87 players")

print("\n\nActivity 3: Sample Variance & Standard Deviation \n")

# Activity 3 data (from screenshot)
scores = np.array([46,69,32,60,52,41])
n = scores.size
mean_score = scores.mean()

deviations = scores - mean_score
squared = deviations**2
ss = squared.sum()                # Σ(x - mean)^2
sample_variance = ss / (n - 1)    # sample variance
sample_sd = math.sqrt(sample_variance)

print("Data:", scores.tolist())
print("n =", n)
print("Mean =", mean_score)
print("\nDeviations:", deviations.tolist())
print("Squared deviations:", squared.tolist())
print("\nSum of squared deviations (Σ(x-mean)^2) =", int(ss))
print("Sample variance (s^2) =", round(sample_variance, 4))
print("Sample standard deviation (s) =", round(sample_sd, 4))

print("\nExpected (manual):")
print("Mean = 50")
print("Σ(x-mean)^2 = 886")
print("Sample variance = 886/(6-1) = 177.2")
print("Sample SD = sqrt(177.2) ≈ 13.3107")

Activity 1: Central Tendency from frequency table 

N = 260
Mean = 1.85
Median = 2.0
Mode = 1 (frequency: 96 )

Expected (manual):
 Σ(f·x) = 481, N = 260, Mean = 481/260 = 1.85
 Median position (N+1)/2 = 130.5 -> Median value = 2
 Mode = 1 (highest freq = 96)


Activity 2: Heights grouped frequency (percentages) 

      Range  Frequency
59.95-61.95          5
61.95-63.95          3
63.95-65.95         15
65.95-67.95         40
67.95-69.95         17
69.95-71.95         12
71.95-73.95          7
73.95-75.95          1

Total = 100

a) Percentage less than 65.95 = 23.00% (23 players)
b) Percentage between 61.95 and 65.95 = 18.00% (18 players)
c) Number of players between 61.95 and 71.95 = 87

Expected (manual): a) 23% (23 players), b) 18% (18 players), c) 87 players


Activity 3: Sample Variance & Standard Deviation 

Data: [46, 69, 32, 60, 52, 41]
n = 6
Mean = 50.0

Deviations: [-4.0, 19.0, -18.0, 10.0, 2.0, -9.0]
Squared deviations: [16.0, 361.0, 324.0, 100.0, 4.0, 81.0]

Sum of square