In [11]:
import pandas as pd
import numpy as np
from scipy.stats import trim_mean
import os

In [12]:
project_root = os.path.abspath(os.pardir)
data_loc = os.path.join(project_root, 'data')
raw_data_loc = os.path.join(data_loc, 'raw')
state_data = os.path.join(raw_data_loc, 'state.csv')

In [13]:
df = pd.read_csv(state_data)
df.describe()

Unnamed: 0,Population,Murder.Rate
count,50.0,50.0
mean,6162876.0,4.066
std,6848235.0,1.915736
min,563626.0,0.9
25%,1833004.0,2.425
50%,4436370.0,4.0
75%,6680312.0,5.55
max,37253960.0,10.3


In [14]:
# Page 12 Compute the mean, trimmed mean and media population
pop_mean = df.Population.mean()
print('Mean Population: {0}'.format(pop_mean))

Mean Population: 6162876.3


In [21]:
pop_median = df.Population.median()
print('Median Population: {0}'.format(pop_median))

Median Population: 4436369.5


In [26]:
# Pandas cannot get you the trimmed mean, so use SciPy
trimmed_mean = trim_mean(df.Population, 0.1)
print('Trimmed mean: {0}'.format(trimmed_mean))

Trimmed mean: 4783697.125


In [60]:
mr_mean = df['Murder.Rate'].mean()
print('Mean murder rate: {0}'.format(mr_mean))

def get_weighted_murder_rate(d):
    return (d['Population'] * d['Murder.Rate']).sum()/d['Population'].sum()

weighted_mean_mr = get_weighted_murder_rate(df)
print('Weighted median murder rate : {0}'.format(weighted_mean_mr))

Mean murder rate: 4.066
Weighted median murder rate : 4.445833981123394


In [61]:
# Page 18 Variability
# Standard deviation
standard_dev = df.Population.std()
print('Standard Deviantion: {0}'.format(standard_dev))

Standard deviantion: 6848235.347401142


In [62]:
# Interquartile Range (IQR)
iqr = df.Population.quantile(0.75) - df.Population.quantile(0.25)
print('Interquartile Range: {0}'.format(iqr))

Interquartile Range: 4847308.0


In [63]:
#Median Absolute Deviation from the median (MAD)
mad = df.Population.mad
# Scale seems different than R (3849870)
print('Median Absolute Deviation from the Median: {0}'.format(mad))

Median Absolute Deviation from the Median: 4450933.356000001
