In [5]:
import pandas as pd

# Group by

## Working with ratios

In [6]:
df = pd.read_csv('https://bit.ly/drinksbycountry')
df['continent_first_letter'] = df['continent'].apply(lambda x: x[0])
print(df.head())

       country  beer_servings  spirit_servings  wine_servings  \
0  Afghanistan              0                0              0   
1      Albania             89              132             54   
2      Algeria             25                0             14   
3      Andorra            245              138            312   
4       Angola            217               57             45   

   total_litres_of_pure_alcohol continent continent_first_letter  
0                           0.0      Asia                      A  
1                           4.9    Europe                      E  
2                           0.7    Africa                      A  
3                          12.4    Europe                      E  
4                           5.9    Africa                      A  


We want to calculate a ratio of summed values by continent or other grouping:

In [14]:
df1 = df.groupby('continent')['beer_servings','spirit_servings'].sum()
df1['ratio'] = df1['beer_servings'] / df1['spirit_servings']
df1

Unnamed: 0_level_0,beer_servings,spirit_servings,ratio
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,3258,866,3.762125
Asia,1630,2677,0.608891
Europe,8720,5965,1.461861
North America,3345,3812,0.877492
Oceania,1435,935,1.534759
South America,2101,1377,1.525781


In [15]:
df2 = df.groupby('continent_first_letter')['beer_servings','spirit_servings'].sum()
df2['ratio'] = df2['beer_servings'] / df2['spirit_servings']
df2

Unnamed: 0_level_0,beer_servings,spirit_servings,ratio
continent_first_letter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,4888,3543,1.379622
E,8720,5965,1.461861
N,3345,3812,0.877492
O,1435,935,1.534759
S,2101,1377,1.525781


Try to do it with a transform to conserve dimensions:

In [27]:
df3 = df.copy()
df3['ratio_per_continent'] = df3.groupby('continent')['beer_servings'].transform(lambda s: sum(s)) / df3.groupby('continent')['spirit_servings'].transform(lambda s: sum(s))
df3['ratio_per_continent_first_letter'] = df3.groupby('continent_first_letter')['beer_servings'].transform(lambda s: sum(s)) / df3.groupby('continent_first_letter')['spirit_servings'].transform(lambda s: sum(s))
df3.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,continent_first_letter,ratio_per_continent,ratio_per_continent_first_letter
0,Afghanistan,0,0,0,0.0,Asia,A,0.608891,1.379622
1,Albania,89,132,54,4.9,Europe,E,1.461861,1.461861
2,Algeria,25,0,14,0.7,Africa,A,3.762125,1.379622
3,Andorra,245,138,312,12.4,Europe,E,1.461861,1.461861
4,Angola,217,57,45,5.9,Africa,A,3.762125,1.379622


And then group by:

In [36]:
a = df3.groupby('continent_first_letter')['ratio_per_continent_first_letter'].max()
a

continent_first_letter
A    1.379622
E    1.461861
N    0.877492
O    1.534759
S    1.525781
Name: ratio_per_continent_first_letter, dtype: float64

A little long...

Or create a reuseable function:

In [33]:
def calc_beer_spirit_ratio(group):
    res = df.groupby(group)['beer_servings', 'spirit_servings'].sum()
    res['ratio'] = res['beer_servings'] / res['spirit_servings']
    return res

calc_beer_spirit_ratio('continent_first_letter').head()

Unnamed: 0_level_0,beer_servings,spirit_servings,ratio
continent_first_letter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,4888,3543,1.379622
E,8720,5965,1.461861
N,3345,3812,0.877492
O,1435,935,1.534759
S,2101,1377,1.525781


In [34]:
calc_beer_spirit_ratio('continent').head()

Unnamed: 0_level_0,beer_servings,spirit_servings,ratio
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,3258,866,3.762125
Asia,1630,2677,0.608891
Europe,8720,5965,1.461861
North America,3345,3812,0.877492
Oceania,1435,935,1.534759
