In [1]:
import os, random
import pathlib

import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns

SEED = 41
random.seed(SEED)
np.random.seed(SEED)

%matplotlib inline

In [2]:
# my custom color palette
COLORS = ["#64E6FF", "#007DC5", "#4D2F9E", "#BE0046", "#EB5000", "#FFE600"]
sns.set_palette(sns.color_palette(COLORS))

In [3]:
# calculating mean of a dataset
data = np.random.randint(10, 50, 100)  # generate 100 random numbers between 10 & 50
print(f"Data sample (first 20): {data[:20]}")
# calculate the mean
mean = np.mean(data)
print(f"Mean: {mean:.3f}")

Data sample (first 20): [10 45 22 44 26 11 35 33 31 36 13 45 16 30 32 38 34 21 38 27]
Mean: 29.380


In [4]:
# calculate trimmed mean (using scipy.stats)
trimmed_mean = stats.trim_mean(data, 0.10)  # drop top & bottom 10% (0.1) of data
print(f"Trimmed mean: {trimmed_mean:.3f}")

Trimmed mean: 29.438


In [5]:
# calculating median of a dataset
data = np.random.randint(10, 50, 100)  # generate 100 random numbers between 10 & 50
median = np.median(data)
print(f"Median: {median:.3f}")

Median: 27.000


In [6]:
# calculate the mode
data = np.random.randint(10, 50, 100)  # generate 100 random numbers between 10 & 50
print(f"Data sample (first 20): {data[:20]}")

mode = stats.mode(data, keepdims=True)
print(f"Mode: {mode[0]} (occurs {mode[1]} times)")

Data sample (first 20): [43 35 44 18 46 38 44 11 22 14 25 41 39 12 29 49 36 29 37 41]
Mode: [17] (occurs [6] times)
