# Chapter 3: Descriptive and Inferential Statistics

## Descriptive Statistics
##### Example 3-1. Calculating mean in Python

In [1]:
sample = [1, 3, 2, 5, 7, 0, 2, 3]
mean = sum(sample) / len(sample)
print(mean)

2.875


##### Example 3-2. Calculating a weighted mean in Python

In [16]:
sample = [90, 80, 63, 87]
weights = [0.20, 0.20, 0.20, 0.40]

weighted_mean = sum(s * w for s,w in zip(sample, weights)) / sum(weights)
print(weighted_mean)

81.4


##### Example 3-3. Calculating a weighted mean in Python

In [17]:
sample = [90, 80, 63, 87]
weights = [1.0, 1.0, 1.0, 2.0]

weighted_mean = sum(s * w for s,w in zip(sample, weights)) / sum(weights)
print(weighted_mean)

81.4


##### Example 3-4. Calculating the median in Python

In [30]:
sample = [0, 1, 5, 7, 9, 10, 14]

def median(values):
    ordered = sorted(values)
    n = len(ordered)
    mid = int(n/2) - 1 if n%2 == 0 else int(n/2)
    
    if n%2 == 0:
        return (ordered[mid] + ordered[mid+1]) / 2
    else:
        return ordered[mid]
print(median(sample))

7


##### Example 3-5. Calculating mode in Python

In [31]:
from collections import defaultdict 
sample = [1, 3, 2, 5, 7, 0, 2, 3]

def mode(values):
    counts = defaultdict(lambda: 0)
    for s in values:
        counts[s] += 1
    max_count = max(counts.values())
    modes = [v for v in set(values) if counts[v] == max_count]
    return modes

print(mode(sample))

[2, 3]


##### Example 3-6. Calculating variance in Python

In [33]:
data = [0, 1, 5, 7, 9, 10, 14]

def variance(values):
    mean = sum(values) / len(values)
    _variance = sum((v - mean)**2 for v in values) / len(values)
    return _variance

print(variance(data))

21.387755102040813


##### Example 3-7. Calculating standard deviation in Python

In [35]:
from math import sqrt

data = [0, 1, 5, 7, 9, 10, 14]

def variance(values):
    mean = sum(values) / len(values)
    _variance = sum((v - mean)**2 for v in values) / len(values)
    return _variance

def std_dev(values):
    return sqrt(variance(values))

print(std_dev(data))

4.624689730353898


##### Example 3-8. Calculating standard deviation for a sample

In [37]:
data = [0, 1, 5, 7, 9, 10, 14]

def variance(values, is_sample: bool=False):
    mean = sum(values) / len(values)
    _variance = sum((v - mean)**2 for v in values) / (len(values) - (1 if is_sample else 0))
    return _variance

def std_dev(values, is_sample: bool=False):
    return sqrt(variance(values, is_sample))

print(f'VARIANCE = {variance(data, True)}')
print(f'STD DEV = {std_dev(data, True)}')

VARIANCE = 24.95238095238095
STD DEV = 4.99523582550223


##### Example 3-9. The normal distribution function in Python

In [40]:
# normal distribution, returns likelyhood
def normal_pdf(x: float, mean: float, std_dev: float) -> float:
    return(1.0 / (2.0 * math.pi * std_dev**2)**0.5) * math.exp(-1.0 * ((x-mean)**2 / (2.0*std_dev**2)))

##### Example 3-10. The normal distribution CDF in Python

In [42]:
from scipy.stats import norm

mean = 64.43
std_dev = 2.99

x = norm.cdf(64.43, mean, std_dev)
print(x)

0.5


##### Example 3-11. Getting a middle range probability using CDF

In [43]:
from scipy.stats import norm

mean = 64.43
std_dev = 2.99

x = norm.cdf(66, mean, std_dev) - norm.cdf(62, mean, std_dev)
print(x)

0.4920450147062894


##### Example 3-12. Using inverse CDF (called ppf()) in Python

In [44]:
x = norm.ppf(0.95, loc=64.43, scale=2.99)
print(x)

69.3481123445849


##### Example 3-13. Generating random numbers from a normal distribution

In [45]:
import random

for i in range(0, 1000):
    random_p = random.uniform(0.0, 1.0)
    random_weight = norm.ppf(random_p, loc=64.43, scale=2.99)
    print(random_weight)

61.9346801258466
63.06632429101478
64.94479818853613
64.07434422166402
61.898405803552315
69.31565760657621
62.96975416075083
64.98452553686842
63.589730032653485
65.02710249678498
63.53831758946926
68.02996055479339
62.27329192256637
64.02517005638548
63.12731071026711
66.5607326456596
63.14347049519795
60.82359341859026
66.73276067500537
65.58128052967352
64.58457747559098
64.87282183163688
63.649173894142024
62.809137340824996
67.20759030124592
60.32172200469164
67.99889646213374
62.77339317122621
64.98359762945043
63.33214493559527
66.61826786005473
65.9972855333601
61.225518350913724
68.16011730000555
65.05193698291957
63.424079497256905
61.96778646268741
67.56605079516419
60.75740153120966
62.585528324026406
65.97415444776246
64.84225395216606
66.20603889292227
66.49705247556551
59.259230877459046
61.413741811165146
66.92579602399547
62.22109431048139
61.227422417089436
66.64176150616575
72.07396933878854
63.58408574497386
65.96882190912355
61.01527003475203
61.046817851903846
63

##### Example 3-14. Tunr Z-scores in x-values and vice versa

In [46]:
mean = 140000
std = 3000
x = 150000

def z_score(x, mean, std):
    return (x - mean) / std

def z_to_x(z, mean, std):
    return (z * std) + mean

# Convert to Z-score and then back to x
z = z_score(x, mean, std_dev)
back_to_x = z_to_x(z, mean, std)

print(f'Z-Score = {z}')
print(f'Back to x = {back_to_x}')

Z-Score = 3344.4816053511704
Back to x = 10173444.816053512
