In [1]:
# Implementing statistics without using Python

In [22]:
# 1. Mean and Median
def mean(numbers):
  if len(numbers) == 0:
    return 0

  sum = 0
  for number in numbers:
    sum += number
  
  return sum / len(numbers)

def median(numbers):
  if len(numbers) == 0:
    return 0
  numbers.sort()
  N = len(numbers)
  if N % 2 == 0:
    return mean([numbers[N//2], numbers[-1 + (N//2)]])
  return numbers[((N + 1) // 2) - 1]

In [31]:
import numpy as np
N_TESTS = 1_000
threshold = .00000000001
tests = []
for i in range(1, N_TESTS):
  numbers = np.random.random(i) * 100 + np.random.random()
  if np.abs(median(numbers) - np.median(numbers)) > threshold:
    tests.append(f'median(numbers) = {median(numbers)} but np.median(numbers) = {np.median(numbers)}, at i={i}')
  if np.abs(mean(numbers) - np.mean(numbers)) > threshold:
    tests.append(f'mean(numbers) = {mean(numbers)} but np.mean(numbers) = {np.mean(numbers)}, at i={i}')

for report in tests:
  print(report)
if len(tests) == 0:
  print('All tests succeeded!')

All tests succeeded!


In [41]:
# Number of times values are present

def countEntries(numbers):
  result = dict()
  for number in numbers:
    count = result[number] if number in result else 0
    result[number] = count + 1
  return result


In [51]:
numbers = np.random.randint(0,5,20)
print(countEntries(numbers))
print(numbers)

{0: 3, 1: 4, 3: 8, 4: 3, 2: 2}
[0 1 0 3 0 4 3 3 3 3 3 1 4 1 3 3 4 1 2 2]


In [58]:
def mode(numbers):
  if len(numbers) == 0: return None

  occurrences = countEntries(numbers)
  max = 0
  for value in list(occurrences.values()):
    max = max if value < max else value
  
  keys = []
  for key in list(occurrences.keys()):
    if occurrences[key] == max:
      keys.append(key)
  
  return keys if len(keys) > 1 else keys[0]


In [63]:
numbers = np.random.randint(0,10,20)
print(countEntries(numbers))
print(mode(numbers))

{8: 4, 9: 3, 3: 2, 5: 4, 7: 2, 2: 3, 4: 1, 1: 1}
[8, 5]


In [79]:
# Standard deviation

def standardDeviation(numbers):
  if len(numbers) < 2:
    return 0
  average = mean(numbers)
  sumSquared = sum( [ (n - average)**2 for n in numbers] )
  return (sumSquared * ( 1 / ( len(numbers) )) ) ** 0.5


In [80]:
numbers = np.random.randint(0,10,20)
for i in range(1_000):
  numbers = np.random.randint(0,np.random.randint(5,100),100)
  std = standardDeviation(numbers)
  npstd = np.std(numbers)
  if np.abs(std - npstd) > threshold:
    print(f'Standard deviation differed for {numbers}. std={std}, np.std={npstd}')