# Summary Stats from Scratch using Python

## Plain Vanilla Python

In [1]:
import random


### Creating a list of numbers


In [2]:
size = 1000
values = list( range( 1, size + 1 ) )
random.seed(42)
random.shuffle(values)
values[:5]


[777, 508, 896, 923, 34]

### Mean

In [3]:
total = sum(values)
f"{total:_}"

'500_500'

In [4]:
count = len(values)
count


1000

In [5]:
# mean
mean = total/count
mean


500.5

### Standard Deviation

In [6]:
delta = [ x - mean for x in values ]
delta[:5]

[276.5, 7.5, 395.5, 422.5, -466.5]

In [7]:
delta_squared = [ x**2 for x in delta ]
delta_squared[:5]

[76452.25, 56.25, 156420.25, 178506.25, 217622.25]

In [8]:
sum_delta_squared = sum( delta_squared )
sum_delta_squared

83333250.0

In [9]:
sample_variance = sum_delta_squared/( count - 1)
sample_variance

83416.66666666667

In [10]:
stdev = sample_variance ** 0.5
stdev

288.8194360957494

## Using numpy in Python

### The fast way

In [11]:
import numpy as np

In [32]:
size = 100_000_000
values = np.arange( 1, size + 1 )
np.random.seed(42)
np.random.shuffle(values)
values[:5]


array([45300404, 90756617, 85566922, 50096875, 55130687])

In [33]:
values.mean()

50000000.5

In [34]:
values.std()

28867513.4594813

### From scratch


In [15]:
total = values.sum()
total

500500

In [16]:
count = len( values )
count

1000

In [17]:
mean = total / count
mean

500.5

In [18]:
delta = values - mean
delta[:5]

array([ 21.5, 237.5, 240.5, 160.5, -88.5])

In [19]:
delta_squared = delta**2
delta_squared[:5]

array([  462.25, 56406.25, 57840.25, 25760.25,  7832.25])

In [20]:
sum_delta_squared = delta_squared.sum()
sum_delta_squared

83333250.0

In [21]:
sample_variance = sum_delta_squared / ( count - 1 )
sample_variance

83416.66666666667

In [22]:
stdev = sample_variance**0.5
stdev

288.8194360957494