In [2]:
import pandas as pd
import numpy as np

In [3]:
data = [
    {'strata': 'a', 'avg0': 8.0, 'count0': 10, 'avg1': 10.0, 'count1': 2},
    {'strata': 'b', 'avg0': 2.0, 'count0': 10, 'avg1': 9.0, 'count1': 10},
    {'strata': 'c', 'avg0': 10.0, 'count0': 10, 'avg1': 7.0, 'count1': 9},
    {'strata': 'd', 'avg0': 10.0, 'count0': 10, 'avg1': 9.0, 'count1': 10},   
]
df = pd.DataFrame(data=data, columns=['strata','avg0','count0','avg1','count1' ])
scale = 10.0   

In [4]:
df

Unnamed: 0,strata,avg0,count0,avg1,count1
0,a,8.0,10,10.0,2
1,b,2.0,10,9.0,10
2,c,10.0,10,7.0,9
3,d,10.0,10,9.0,10


In [5]:
df['trend'] = (df['avg1']-df['avg0'])/scale
df['is_pos_trend'] = df['trend'] > 0
df['vol_change'] =  abs(df['avg1']*df['count1'] - df['avg0']*df['count0'])
df

Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change
0,a,8.0,10,10.0,2,0.2,True,60.0
1,b,2.0,10,9.0,10,0.7,True,70.0
2,c,10.0,10,7.0,9,-0.3,False,37.0
3,d,10.0,10,9.0,10,-0.1,False,10.0


In [6]:
def dunya(df):
    df_agg = pd.DataFrame()
    df_agg['avg0'] = [sum(df['avg0']*df['count0'])/df['count0'].sum()]
    df_agg['avg1'] = [sum(df['avg1']*df['count1'])/df['count1'].sum()]
    df_agg['count0'] = df['count0'].sum()
    df_agg['count1'] = df['count1'].sum()
    df_agg['trend'] = (df_agg['avg1'] - df_agg['avg0'])/scale
#     df_agg['vol_change'] = abs(df_agg['avg1']*df_agg['count1'] - df_agg['avg0']*df_agg['count0'])
    df_agg['vol_change'] = df['vol_change'].sum()
    return df_agg      

In [7]:
df_pop = dunya(df)
df_pop

Unnamed: 0,avg0,avg1,count0,count1,trend,vol_change
0,7.5,8.483871,40,31,0.098387,177.0


In [8]:
df_strat = df.groupby(by='is_pos_trend').apply(dunya)
df_strat.reset_index(inplace=True)
df_strat

Unnamed: 0,is_pos_trend,level_1,avg0,avg1,count0,count1,trend,vol_change
0,False,0,10.0,8.052632,20,19,-0.194737,47.0
1,True,0,5.0,9.166667,20,12,0.416667,130.0


In [9]:
df_impact = df.join(df_strat[['trend','vol_change']], on='is_pos_trend',rsuffix='_strat')
df_impact['impact'] = df_impact['trend_strat']*df_impact['vol_change']/df_impact['vol_change_strat']
df_impact

Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change,trend_strat,vol_change_strat,impact
0,a,8.0,10,10.0,2,0.2,True,60.0,0.416667,130.0,0.192308
1,b,2.0,10,9.0,10,0.7,True,70.0,0.416667,130.0,0.224359
2,c,10.0,10,7.0,9,-0.3,False,37.0,-0.194737,47.0,-0.153303
3,d,10.0,10,9.0,10,-0.1,False,10.0,-0.194737,47.0,-0.041433


In [10]:
def calc_impacts(df):
    df['trend'] = (df['avg1']-df['avg0'])/scale
    df['is_pos_trend'] = df['trend'] > 0
    df['vol_change'] =  abs(df['avg1']*df['count1'] - df['avg0']*df['count0'])
        
    df_pop = dunya(df)
        
    df_strat = df.groupby(by='is_pos_trend').apply(dunya)
    df_strat.reset_index(inplace=True)    
    
    df_impact = df.join(df_strat[['trend','vol_change']], on='is_pos_trend',rsuffix='_strat')
    df_impact['impact'] = df_impact['trend_strat']*df_impact['vol_change']/df_impact['vol_change_strat']
    
    return df_impact, df_pop

In [11]:
df_impact, df_pop = calc_impacts(df)
print df_pop
df_impact

   avg0      avg1  count0  count1     trend  vol_change
0   7.5  8.483871      40      31  0.098387       177.0


Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change,trend_strat,vol_change_strat,impact
0,a,8.0,10,10.0,2,0.2,True,60.0,0.416667,130.0,0.192308
1,b,2.0,10,9.0,10,0.7,True,70.0,0.416667,130.0,0.224359
2,c,10.0,10,7.0,9,-0.3,False,37.0,-0.194737,47.0,-0.153303
3,d,10.0,10,9.0,10,-0.1,False,10.0,-0.194737,47.0,-0.041433


In [12]:
df_pop

Unnamed: 0,avg0,avg1,count0,count1,trend,vol_change
0,7.5,8.483871,40,31,0.098387,177.0


In [13]:
data2 = [
    {'strata': 'dunya', 'avg0': 10.0, 'count0': 10, 'avg1': 1.0, 'count1': 30},
    {'strata': 'rapnik', 'avg0': 1.0, 'count0': 30, 'avg1': 10.0, 'count1': 20},
 
]
df2 = pd.DataFrame(data=data2, columns=['strata','avg0','count0','avg1','count1' ])
df2

Unnamed: 0,strata,avg0,count0,avg1,count1
0,dunya,10.0,10,1.0,30
1,rapnik,1.0,30,10.0,20


In [14]:
df_impact, df_pop = calc_impacts(df2)
print df_pop
df_impact

   avg0  avg1  count0  count1  trend  vol_change
0  3.25   4.6      40      50  0.135       240.0


Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change,trend_strat,vol_change_strat,impact
0,dunya,10.0,10,1.0,30,-0.9,False,70.0,-0.9,70.0,-0.9
1,rapnik,1.0,30,10.0,20,0.9,True,170.0,0.9,170.0,0.9


In [15]:
data3 = [
    {'strata': 'a', 'avg0': 10.0, 'count0': 10, 'avg1': 1.0, 'count1': 10},
    {'strata': 'b', 'avg0': 10.0, 'count0': 20, 'avg1': 1.0, 'count1': 20},
    {'strata': 'c', 'avg0': 1.0, 'count0': 30, 'avg1': 10.0, 'count1': 30},
 
]
df3 = pd.DataFrame(data=data3, columns=['strata','avg0','count0','avg1','count1' ])
df_impact, df_pop = calc_impacts(df3)
print df_pop
df_impact

   avg0  avg1  count0  count1  trend  vol_change
0   5.5   5.5      60      60    0.0       540.0


Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change,trend_strat,vol_change_strat,impact
0,a,10.0,10,1.0,10,-0.9,False,90.0,-0.9,270.0,-0.3
1,b,10.0,20,1.0,20,-0.9,False,180.0,-0.9,270.0,-0.6
2,c,1.0,30,10.0,30,0.9,True,270.0,0.9,270.0,0.9


In [22]:
data4 = [
    {'strata': 'a', 'avg0': 5.0, 'count0': 10, 'avg1': 9.0, 'count1': 10},
    {'strata': 'b', 'avg0': 5.0, 'count0': 10, 'avg1': 6.0, 'count1': 10},
    {'strata': 'c', 'avg0': 5.0, 'count0': 10, 'avg1': 4.0, 'count1': 10},
 
]
df4 = pd.DataFrame(data=data4, columns=['strata','avg0','count0','avg1','count1' ])
df_impact, df_pop = calc_impacts(df4)
print df_pop
df_impact

   avg0      avg1  count0  count1     trend  vol_change
0   5.0  6.333333      30      30  0.133333        60.0


Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change,trend_strat,vol_change_strat,impact
0,a,5.0,10,9.0,10,0.4,True,40.0,0.25,50.0,0.2
1,b,5.0,10,6.0,10,0.1,True,10.0,0.25,50.0,0.05
2,c,5.0,10,4.0,10,-0.1,False,10.0,-0.1,10.0,-0.1


In [23]:
data4 = [
    {'strata': 'a', 'avg0': 5.0, 'count0': 100, 'avg1': 9.0, 'count1': 10},
    {'strata': 'b', 'avg0': 5.0, 'count0': 100, 'avg1': 6.0, 'count1': 10},
    {'strata': 'c', 'avg0': 5.0, 'count0': 100, 'avg1': 4.0, 'count1': 10},
 
]
df4 = pd.DataFrame(data=data4, columns=['strata','avg0','count0','avg1','count1' ])
df_impact, df_pop = calc_impacts(df4)
print df_pop
df_impact

   avg0      avg1  count0  count1     trend  vol_change
0   5.0  6.333333     300      30  0.133333      1310.0


Unnamed: 0,strata,avg0,count0,avg1,count1,trend,is_pos_trend,vol_change,trend_strat,vol_change_strat,impact
0,a,5.0,100,9.0,10,0.4,True,410.0,0.25,850.0,0.120588
1,b,5.0,100,6.0,10,0.1,True,440.0,0.25,850.0,0.129412
2,c,5.0,100,4.0,10,-0.1,False,460.0,-0.1,460.0,-0.1
