In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
MONEY_FIELDS = 'donations_grants	sponsorship	registration_fees	travel_accommodation	fees	related_expenses	total'.split()
MONEY_FIELDS_ONLY = list(set(MONEY_FIELDS) - {'total'})
DOC_MONEY_FIELDS = list(set(MONEY_FIELDS_ONLY) - {'sponsorship', 'donations_grants'})
MONEY_FIELDS_DIRTY = ['%s_dirty' % x for x in MONEY_FIELDS_ONLY]

pd.set_option('display.float_format', lambda x: '{0:,.2f}'.format(x))
%matplotlib inline

In [20]:
df = pd.read_csv('aerztefranken.csv', encoding='utf-8')
df.head()

Unnamed: 0,company,name,name_unq,location,location_fix,address,donations_grants,sponsorship,registration_fees,travel_accommodation,fees,related_expenses,total,type
0,"AbbVie AG, Baar",Patrick Aepli,Patrick Aepli,Luzern,Luzern,Spitalstrasse,0.0,0.0,247.0,439.0,0.0,0.0,686.0,hcp
1,"AbbVie AG, Baar",Andre Aeschlimann,Andre Aeschlimann,Bad Zurzach,Bad Zurzach,Quellenstrasse 34,0.0,0.0,0.0,0.0,1000.0,0.0,1000.0,hcp
2,"AbbVie AG, Baar",Carlo Albani,Albani Carlo,Zürich,Zürich,Bahnhofplatz 5,0.0,0.0,93.0,277.0,0.0,0.0,370.0,hcp
3,"AbbVie AG, Baar",Andreas Altmann,Andreas Altmann,Riehen,Riehen,In den Neumatten 63,0.0,0.0,0.0,0.0,600.0,0.0,600.0,hcp
4,"AbbVie AG, Baar",Anca Antonino,Anca Antonino,Vevey,Vevey,Boulevard Paderewski 3,0.0,0.0,330.0,734.0,0.0,0.0,1064.0,hcp


In [22]:
df[MONEY_FIELDS_ONLY] = df[MONEY_FIELDS_ONLY].applymap(lambda x: x if pd.isnull(x) else (np.nan if x == 0.0 else x))
df['computed_total'] = df[MONEY_FIELDS_ONLY].sum(1)

0     686.00
1   1,000.00
2     370.00
3     600.00
4   1,064.00
Name: computed_total, dtype: float64

In [180]:
def company_aggregate(x):
    result = {}
    for m in MONEY_FIELDS:
        result[m+'_cnt'] = x[m].count()
        result[m+'_sum'] = x[m].sum()
    return pd.Series(result, name='metrics')
df.groupby(['company','type']).apply(company_aggregate)

Unnamed: 0_level_0,metrics,donations_grants_cnt,donations_grants_sum,fees_cnt,fees_sum,registration_fees_cnt,registration_fees_sum,related_expenses_cnt,related_expenses_sum,sponsorship_cnt,sponsorship_sum,total_cnt,total_sum,travel_accommodation_cnt,travel_accommodation_sum
company,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"A Menarini AG, Zürich",hco,11.00,164271.00,4.00,5411.00,3.00,1812.00,0.00,,109.00,598476.00,122.00,773840.00,3.00,3870.00
"A Menarini AG, Zürich",hcp,0.00,,50.00,75178.00,151.00,59563.00,0.00,,4.00,11000.00,220.00,286576.00,181.00,140835.00
"AbbVie AG, Baar",hco,30.00,446030.00,19.00,80936.00,6.00,3597.00,0.00,,106.00,3859535.00,131.00,4396511.00,6.00,6413.00
"AbbVie AG, Baar",hcp,0.00,,70.00,89355.00,219.00,87498.00,5.00,4663.00,0.00,,281.00,362391.00,217.00,180875.00
"Actelion Pharma Schweiz AG, Baden",hco,23.00,498505.46,7.00,13355.00,1.00,3850.00,0.00,,21.00,65843.92,40.00,581554.38,0.00,
"Actelion Pharma Schweiz AG, Baden",hcp,0.00,,1.00,1000.00,87.00,32532.20,5.00,4037.24,0.00,,89.00,90785.28,85.00,53215.84
"Alcon Switzerland SA, Rotkreuz",hco,0.00,,2.00,1700.00,0.00,,0.00,,1.00,8000.00,2.00,9700.00,0.00,
"Alcon Switzerland SA, Rotkreuz",hcp,0.00,,15.00,15850.00,0.00,,0.00,,0.00,,15.00,15850.00,0.00,
"Allergan AG, Lachen",hco,2.00,6200.00,0.00,,0.00,,0.00,,18.00,56600.00,20.00,62800.00,0.00,
"Allergan AG, Lachen",hcp,0.00,,28.00,44000.00,4.00,2908.00,0.00,,0.00,,37.00,54612.00,9.00,6954.00


In [133]:
# TODO: company_df?

pd.merge(
    pd.merge(
        df.groupby(['company','type']).apply(lambda x:
            x[MONEY_FIELDS_ONLY].sum()
        ),
        df.groupby(['company','type']).apply(lambda x: pd.Series({
            'count': x[MONEY_FIELDS_ONLY].count().sum()
        })),
        left_index=True, right_index=True
    ),
    df.groupby(['company','type']).apply(lambda x: pd.Series({
        'total': x['total'].sum()
    })),
    left_index=True, right_index=True
)

Unnamed: 0_level_0,Unnamed: 1_level_0,registration_fees,fees,related_expenses,sponsorship,donations_grants,travel_accommodation,count,total
company,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"A Menarini AG, Zürich",hco,1812.00,5411.00,,598476.00,164271.00,3870.00,130,773840.00
"A Menarini AG, Zürich",hcp,59563.00,75178.00,,11000.00,,140835.00,386,286576.00
"AbbVie AG, Baar",hco,3597.00,80936.00,,3859535.00,446030.00,6413.00,167,4396511.00
"AbbVie AG, Baar",hcp,87498.00,89355.00,4663.00,,,180875.00,511,362391.00
"Actelion Pharma Schweiz AG, Baden",hco,3850.00,13355.00,,65843.92,498505.46,,52,581554.38
"Actelion Pharma Schweiz AG, Baden",hcp,32532.20,1000.00,4037.24,,,53215.84,178,90785.28
"Alcon Switzerland SA, Rotkreuz",hco,,1700.00,,8000.00,,,3,9700.00
"Alcon Switzerland SA, Rotkreuz",hcp,,15850.00,,,,,15,15850.00
"Allergan AG, Lachen",hco,,,,56600.00,6200.00,,20,62800.00
"Allergan AG, Lachen",hcp,2908.00,44000.00,,,,6954.00,41,54612.00


In [6]:
agg_df = []
for company in df.company.unique():
    
    agg_df.append({
            'company': company,
            'year': 2015
            'type': hcphco,
            'label': field,
            'count': count,
            'amount': amount
        })

array(['AbbVie AG, Baar', 'Actelion Pharma Schweiz AG, Baden',
       'Alcon Switzerland SA, Rotkreuz', 'Allergan AG, Lachen',
       'Almirall AG, Wallisellen', 'A Menarini AG, Zürich',
       'Amgen Switzerland AG, Zug', 'Astellas Pharma AG, Wallisellen',
       'AstraZeneca AG, Zug', 'Baxalta Schweiz AG, Volketswil',
       'Baxter AG, Volketswil', 'Bayer Schweiz AG, Zürich',
       'BGP Products GmbH, Baar', 'Biogen Switzerland AG, Zug',
       'Biotest (Schweiz) AG, Rupperswil',
       'Boehringer Ingelheim (Schweiz) GmbH, Basel',
       'Bristol-Myers Squibb GmbH, Cham', 'Celegene GmbH, Zürich',
       'CSL Behring AG, Bern', 'Daiichi Sankyo (Shweiz) AG, Thalwil',
       'Eisai Pharma AG, Zürich', 'Eli Lilly (Suisse) SA, Vernier',
       'Ferring AG, Baar', 'Future Health Pharma GmbH, Wetzikon',
       'Galderma Schweiz AG', 'Genzyme a Sanofi Company, Baar',
       'GILEAD Sciences Switzerland Sàrl, Zug',
       'GlaxoSmithKline AG, Münchenbuchsee',
       'Grünenthal Pharma AG, 