In [1]:
import numpy as np # numerical ops
import pandas as pd # optional: for a small summary table

In [2]:
# ------------------ Static population of 1,000 scores ------------------
# Choose 20 realistic exam scores and repeat them 50 times (20*50 = 1000). Deterministic.
base_scores = [72, 85, 91, 67, 88, 94, 76, 83, 95, 69,
81, 77, 90, 86, 73, 92, 64, 79, 87, 84]

# np.tile(array_like, reps) repeats the sequence.
# - array_like = base_scores (original list)
# - reps = 50 (number of repetitions)
population = np.tile(base_scores, 50) # length will be 1000
print(f"Population size: {len(population)}") # sanity check -> 1000

Population size: 1000


In [3]:
# ------------------ Deterministic sample of size 100 ------------------
# Take every 10th element using slicing with a step:
# - start=0 (begin at first element)
# - stop omitted (go to end)
# - step=10 (pick every 10th -> 1000 / 10 = 100 elements)
sample = population[0:None:10]
print(f"Sample size: {len(sample)}") # sanity check -> 100

Sample size: 100


In [4]:
# ------------------ Means ------------------
# np.mean(array) computes arithmetic mean (average)
pop_mean = np.mean(population) # population mean
samp_mean = np.mean(sample) # sample mean

print(pop_mean)
print(samp_mean)

81.65
76.5


In [5]:
# ------------------ Standard deviations ------------------
# np.std(array, ddof=...) computes standard deviation.
# - ddof=0 => population formula (divide by N).
# - ddof=1 => sample formula (unbiased estimator; divide by N-1).
pop_std = np.std(population, ddof=0) # population SD
samp_std = np.std(sample, ddof=1) # sample SD (unbiased)

print(pop_std)
print(samp_std)

8.990411558988832
4.522670168666454


In [6]:
# ------------------ Print results ------------------
print(f"Population mean: {pop_mean:.2f}")
print(f"Population std : {pop_std:.2f}")
print(f"Sample mean : {samp_mean:.2f}")
print(f"Sample std : {samp_std:.2f}")

Population mean: 81.65
Population std : 8.99
Sample mean : 76.50
Sample std : 4.52


In [7]:
# ------------------ Optional: tidy table ------------------
summary = pd.DataFrame({
"Dataset": ["Population", "Sample"],
"Size": [len(population), len(sample)],
"Mean": [round(pop_mean, 2), round(samp_mean, 2)],
"StdDev": [round(pop_std, 2), round(samp_std, 2)]
})
print("\nSummary table:")
print(summary)


Summary table:
      Dataset  Size   Mean  StdDev
0  Population  1000  81.65    8.99
1      Sample   100  76.50    4.52


In [9]:
# ------------------ Explanation (printed) ------------------
print("\nExplanation:")
print("- Population uses every score (N=1000); population SD uses ddof=0 (divide by N).")
print("- Sample uses a subset (n=100); sample SD uses ddof=1 (divide by n-1) for an unbiased estimate.")


Explanation:
- Population uses every score (N=1000); population SD uses ddof=0 (divide by N).
- Sample uses a subset (n=100); sample SD uses ddof=1 (divide by n-1) for an unbiased estimate.
