In [1]:
import numpy as np
import pandas as pd
from scipy.stats import trim_mean

# Data: Daily maximum rainfall for each year (in mm)
rainfall = np.array([
    1468, 909, 841, 475, 846, 452,
    3830, 1397, 556, 978, 1715, 747,
    909, 2002, 1331, 1227, 2543, 2649,
    1781, 1717, 2718, 584, 1859, 1138,
    2675, 1872, 1359, 1544, 1372, 1334,
    955, 1849, 719, 1737, 1389, 681,
    1565, 701, 994, 1188, 962, 1564,
    1800, 580, 1106, 880, 850
])

# (a) Five-number summary
five_num_summary = {
    'Min': np.min(rainfall),
    'Q1': np.percentile(rainfall, 25),
    'Median (Q2)': np.median(rainfall),
    'Q3': np.percentile(rainfall, 75),
    'Max': np.max(rainfall)
}

# (b) Rough estimate of SD from IQR
iqr = five_num_summary['Q3'] - five_num_summary['Q1']
estimated_sd = iqr / 1.35

# Actual sample SD
sample_sd = np.std(rainfall, ddof=1)

# (c) Compare 10% trimmed mean with sample mean
sample_mean = np.mean(rainfall)
trimmed_mean_10pct = trim_mean(rainfall, 0.10)

(five_num_summary, iqr, estimated_sd, sample_sd, sample_mean, trimmed_mean_10pct)


({'Min': np.int64(452),
  'Q1': np.float64(865.0),
  'Median (Q2)': np.float64(1331.0),
  'Q3': np.float64(1727.0),
  'Max': np.int64(3830)},
 np.float64(862.0),
 np.float64(638.5185185185185),
 np.float64(693.6701770464038),
 np.float64(1369.1063829787233),
 np.float64(1292.6410256410256))