In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set()
''' Note that by calculating mortality by taking the sum of deaths and dividing by sum of pop, for all
    age groups, we get the correct avg. mortality. 
    If we instead average over the age grp specific mortalities, we get a misleading number.
    Note also that by taking the formal weighted average of the age grp specific mortality rates, 
    we get the same answer as when working with the absolute dead and pop numbers. 
'''


In [None]:
df = pd.DataFrame({'age' : ['0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79',
                           '80-89','90-99','100+'],
                  'pop' : [100,1000,10000,10000,10000,10000,10000,10000,10000,10000,10000],
                  'dead' : [1,5,1000,1100,1200,1300,1400,1500,1600,1700,1800]})

df['mortality'] = df['dead'] / df['pop']

total_mort = df['dead'].sum() / df['pop'].sum()

print ('total mortality ',total_mort)

df

In [None]:
### note that you can't sum averages from different groups - with enough groups, you get > 100% mortality ! ###
### however, you can sum monthly mortalities for a cohort to a yearly mortality, ###
### as in post SWEDEN FINAL SUMMARY ###

df['mortality'].sum()

In [None]:
from IPython.display import Image, display,Latex

display(Latex(r'$\frac{\sum_{i=1}^n weight_i * data_i}{\sum_{i=1}^n weight_i}$'))

In [None]:

weighted_mortality = ( df['pop'] * df['mortality'] ).sum() / df['pop'].sum()
weighted_mortality

In [None]:
avg_mortality = df['mortality'].mean()
avg_mortality

In [None]:
### Baseball Batting Example ###

df_baseball = pd.DataFrame({'A_bats' : [4,40],'A_hits' : [1,15],
                  'B_bats' : [10,5],'B_hits' : [3,2]},index=['1H','2H'])

df_baseball

In [None]:
df_baseball.loc['full_season'] = df_baseball.sum()


In [None]:
df_baseball

In [None]:
df_baseball['A_avg'] = df_baseball['A_hits'] / df_baseball['A_bats']
df_baseball['B_avg'] = df_baseball['B_hits'] / df_baseball['B_bats']

df_baseball

In [None]:
### Naive averaging ###
df_baseball.loc[:'2H',['A_avg','B_avg']].mean()

In [None]:
baseball_weighted_avg_A = ( df_baseball.loc[:'2H','A_bats'] *\
                           df_baseball.loc[:'2H','A_avg'] ).sum() / df_baseball.loc[:'2H','A_bats'].sum()

baseball_weighted_avg_B = ( df_baseball.loc[:'2H','B_bats'] *\
                           df_baseball.loc[:'2H','B_avg'] ).sum() / df_baseball.loc[:'2H','B_bats'].sum()

print (baseball_weighted_avg_A)
print (baseball_weighted_avg_B)

In [None]:
### example grades ###

grades = pd.DataFrame({'Grade' : [1,2,3,4,5,5]})
grades

In [None]:
grades.mean()

In [None]:
weights = np.array([1,1,1,1,2])
unique_grades = grades['Grade'].unique() 
print (weights)
print (unique_grades)

In [None]:
weighted_grades = (weights * unique_grades).sum() / weights.sum()
print (weighted_grades)

In [None]:
### SECTION ###

pop_data = pd.DataFrame({'pop' : [900,100],'dead' : [9,50]},index=['A','B'])
                        

pop_data.at['tot'] = pop_data.sum()

pop_data['mort'] = pop_data['dead'] / pop_data['pop']

weighted_avg = (pop_data.loc['A','mort'] * pop_data.loc['A','pop'] +\
pop_data.loc['B','mort'] * pop_data.loc['B','pop']) / (pop_data.loc['A','pop'] + pop_data.loc['B','pop'])

print (weighted_avg)
print ( ( pop_data.loc['A','mort'] + pop_data.loc['B','mort'] ) / 2 )

pop_data

