# Biggest winner - biggest loser and other stuff

In [9]:
import pandas as pd
import mysql.connector
pd.options.display.float_format = "{:,.0f}".format

## Connect

In [2]:
db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="",
    database = "pharmagelder"
)

## Load Data

In [20]:
df_transaction = pd.read_sql("""SELECT 
      tra_id as id,
      pha_id as pharma_id,
      pha_name as pharma_name,
      rec_id as recipient_id,
      rec_name as recipient_name,
      rec_address as recipient_address,
      rec_location as recipient_location,
      rec_type as recipient_type,
      trc_id as transaction_category_id,
      trc_name as transaction_category_name,
      tra_year as year,
      tra_value as value
  FROM transaction
  LEFT JOIN pharma ON tra_fk_pharma = pha_id 
  LEFT JOIN recipient ON tra_fk_recipient = rec_id
  LEFT JOIN transaction_category ON tra_fk_transaction_category = trc_id;""", con=db_connection)

## Analyze

In [21]:
df_rec = df_transaction.groupby(['recipient_id', 'year']).agg({
    'recipient_name': 'first',
    'recipient_address': 'first',
    'recipient_location': 'first',
    'recipient_type': 'first',
    'value': 'sum'
})

df_pivot = pd.pivot_table(df_rec, values='value', index=['recipient_id', 'recipient_name', 'recipient_type'], columns='year')
df_pivot = df_pivot.reset_index()

### Delta 2018

In [6]:
df_2018 = df_pivot[df_pivot[2017].notna() & df_pivot[2018].notna()].copy()
df_2018['delta_2018'] = round(df_2018[2018] - df_2018[2017])
df_2018['ratio_2018'] = round(100 / df_2018[2017] * df_2018[2018])

## HCO

### Absolut am meisten zugelegt

In [7]:
df_2018 = df_2018.sort_values('delta_2018', ascending=False)
df_2018[df_2018.recipient_type == 'hco'].head(10)

year,recipient_id,recipient_name,recipient_type,2015,2016,2017,2018,delta_2018,ratio_2018
8995,9021,ESMO,hco,3062325.11,10322180.21,10340144.9,12308069.48,1967925.0,119.0
8988,9014,ERS European Respiratory Society ERS Headquarters,hco,2372427.34,3286866.48,2809678.77,4671194.24,1861515.0,166.0
8954,8979,EASL European Association for the Study of the...,hco,1497026.7,2868655.75,2166821.77,3672046.91,1505225.0,169.0
9760,9799,Universität Zürich,hco,812904.59,694331.7,565715.28,1505651.86,939937.0,266.0
9055,9082,Forum für medizinische Fortbildung - FomF GmbH,hco,314224.0,518402.0,761400.8,1566502.16,805101.0,206.0
9007,9033,EULAR - European League Against Rheumatism,hco,987598.0,3383589.2,3541336.97,4301806.18,760469.0,121.0
9004,9030,ETH Zürich,hco,798353.64,626080.0,249716.0,983580.0,733864.0,394.0
8962,8988,ECTRIMS European Committee for Treatment and R...,hco,434606.43,968478.05,666265.95,1190873.15,524607.0,179.0
9095,9122,FOREUM Foundation for Research in Rheumatology,hco,1940172.45,329320.55,307972.44,755983.48,448011.0,245.0
10454,10517,WSO World Stroke Organization,hco,213524.48,98134.0,50007.0,467951.4,417944.0,936.0


### Prozentual am meisten zugelegt

In [27]:
df_2018 = df_2018.sort_values('ratio_2018', ascending=False)
df_2018[df_2018.recipient_type == 'hco'].head(10)

year,recipient_id,recipient_name,recipient_type,2015,2016,2017,2018,delta_2018,ratio_2018
10285,10341,Swiss Oncology & Hematology Congress,hco,,,600.0,358019.0,357419.0,59670.0
9547,9581,Lung Cancer Europe (LUCE),hco,5000.0,28851.0,442.96,209105.28,208662.0,47206.0
9307,9337,High Tech Home Care AG,hco,,,327.04,47300.75,46974.0,14463.0
9290,9320,Hepatitis Schweiz,hco,,,2000.0,230000.0,228000.0,11500.0
10001,10048,Schweizerische MD-PhD Gesellschaft,hco,3586.0,,2000.0,206101.0,204101.0,10305.0
9969,10016,Schweizerische Vereinigung gegen die Osteoporo...,hco,4000.0,36000.0,3000.0,206000.0,203000.0,6867.0
9796,9836,Praxis am Bahnhof,hco,,,200.0,7860.0,7660.0,3930.0
9584,9618,Medical Image Analysis Center (MIAC AG),hco,,22314.84,2497.7,71579.95,69082.0,2866.0
10431,10494,"Viollier AG Kardiologie, Ambulatorium",hco,784.33,,1342.0,35000.0,33658.0,2608.0
10331,10388,"Thurgau Spital, Frauenklinik Spital Frauenfeld",hco,,,2808.0,61325.5,58518.0,2184.0


## HCP

### Absolut am meisten zugelegt

In [28]:
df_2018 = df_2018.sort_values('delta_2018', ascending=False)
df_2018[df_2018.recipient_type == 'hcp'].head(10)

year,recipient_id,recipient_name,recipient_type,2015,2016,2017,2018,delta_2018,ratio_2018
2864,2874,François Mach,hcp,6545.8,11656.83,39239.48,86803.64,47564.0,221.0
6934,6950,Rolf A. Stahel,hcp,81973.84,75887.0,98930.11,141462.94,42533.0,143.0
5428,5443,Matti Aapro,hcp,97214.55,34925.87,39577.09,72624.21,33047.0,184.0
7862,7880,Thomas Szucs,hcp,1000.0,1800.0,1000.0,32317.24,31317.0,3232.0
6733,6749,Reinhard Dummer,hcp,,2000.0,7914.1,38902.88,30989.0,492.0
117,121,Albina Nowak,hcp,7373.51,,600.0,25828.84,25229.0,4305.0
134,138,Alessandra Curioni,hcp,2324.0,642.0,8327.0,31162.95,22836.0,374.0
6857,6873,Robert Steffen,hcp,6000.0,1200.0,16337.5,38066.0,21728.0,233.0
7247,7263,Serge Ferrari,hcp,,3157.0,1500.0,21214.23,19714.0,1414.0
5697,5712,Miklos Pless,hcp,8709.27,17827.0,12395.02,31452.66,19058.0,254.0


### Prozentual am meisten zugelegt

In [29]:
df_2018 = df_2018.sort_values('ratio_2018', ascending=False)
df_2018[df_2018.recipient_type == 'hcp'].head(10)

year,recipient_id,recipient_name,recipient_type,2015,2016,2017,2018,delta_2018,ratio_2018
117,121,Albina Nowak,hcp,7373.51,,600.0,25828.84,25229.0,4305.0
7862,7880,Thomas Szucs,hcp,1000.0,1800.0,1000.0,32317.24,31317.0,3232.0
6310,6326,Peter Bartel,hcp,1450.3,1239.46,83.42,1949.98,1867.0,2338.0
521,527,Angela Pugliesi,hcp,,1500.0,400.2,9167.26,8767.0,2291.0
1975,1984,Jan Izakovic,hcp,7933.37,2471.06,500.0,10432.25,9932.0,2086.0
5662,5677,Michel Zuber,hcp,,,1000.0,20000.0,19000.0,2000.0
2378,2388,Doris Zürcher,hcp,,,500.0,8309.3,7809.0,1662.0
7157,7173,Sandrine Zweifel,hcp,2000.0,1830.98,750.0,12314.05,11564.0,1642.0
1006,1014,Photis Beris,hcp,7914.57,6673.9,160.36,2623.95,2464.0,1636.0
628,634,Anne Rosselet,hcp,9802.67,2472.93,528.67,8523.12,7994.0,1612.0


## Absolut tops

In [12]:
df_2018 = df_2018.sort_values(2018, ascending=False)
df_2018[df_2018.recipient_type == 'hco'].head(10)

year,recipient_id,recipient_name,recipient_type,2015,2016,2017,2018,delta_2018,ratio_2018
8995,9021,ESMO,hco,3062325,10322180,10340145,12308069,1967925,119
8988,9014,ERS European Respiratory Society ERS Headquarters,hco,2372427,3286866,2809679,4671194,1861515,166
9007,9033,EULAR - European League Against Rheumatism,hco,987598,3383589,3541337,4301806,760469,121
8954,8979,EASL European Association for the Study of the...,hco,1497027,2868656,2166822,3672047,1505225,169
9028,9054,EXCEMED - Excellence in Medical Education,hco,4043310,3137022,4694351,2722032,-1972319,58
8916,8938,Insel Gruppe AG,hco,1994105,2419922,2709783,2562998,-146785,95
8693,8713,Universitätsspital Basel,hco,1695838,2010889,2607496,2511371,-96125,96
8952,8977,EADV European Academy of Dermatology and Vener...,hco,1135978,837189,1900173,2254468,354296,119
8800,8822,Centre Hospitalier Universitaire Vaudois CHUV,hco,1685507,2025378,2462314,2167948,-294366,88
8914,8936,UniversitätsSpital Zürich,hco,1321398,1441830,2173823,2047589,-126234,94


## Franken pro Empfänger pro Pharma

In [39]:
df_hcp = df_transaction[df_transaction.recipient_type == 'hcp']
df_hcp = df_hcp.groupby(['pharma_name', 'year']).agg({'value': 'sum', 'recipient_id': 'count'})
df_pivot = pd.pivot_table(df_hcp, values=['value', 'recipient_id'], columns='year', index='pharma_name')
df_pivot['2015_avg'] = df_pivot['value'][2015] / df_pivot['recipient_id'][2015]
df_pivot['2016_avg'] = df_pivot['value'][2016] / df_pivot['recipient_id'][2016]
df_pivot['2017_avg'] = df_pivot['value'][2017] / df_pivot['recipient_id'][2017]
df_pivot['2018_avg'] = df_pivot['value'][2018] / df_pivot['recipient_id'][2018]

df_pivot = df_pivot.sort_values(['2018_avg'], ascending=False)

df_pivot.head(10)

Unnamed: 0_level_0,recipient_id,recipient_id,recipient_id,recipient_id,value,value,value,value,2015_avg,2016_avg,2017_avg,2018_avg
year,2015,2016,2017,2018,2015,2016,2017,2018,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
pharma_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Neurim Pharmaceuticals AG,1.0,,1,1,800.0,,900,2500,800.0,,900,2500
Takeda Pharmaceutical,416.0,171.0,80,51,329287.0,179202.0,118882,124074,792.0,1048.0,1486,2433
Sandoz Pharmaceuticals AG,205.0,191.0,28,31,129196.0,112734.0,57976,63795,630.0,590.0,2071,2058
MSD,105.0,145.0,137,265,240423.0,271219.0,257042,480680,2290.0,1870.0,1876,1814
Mepha,146.0,303.0,70,55,145223.0,173109.0,92663,91316,995.0,571.0,1324,1660
Stallergenes Greer,104.0,104.0,38,17,60726.0,60726.0,22466,24073,584.0,584.0,591,1416
Sanofi,182.0,151.0,215,159,191170.0,162840.0,218950,224902,1050.0,1078.0,1018,1414
Shire Pharmaceuticals,,6.0,30,18,,10584.0,32794,25347,,1764.0,1093,1408
Galderma,48.0,57.0,56,64,59463.0,95745.0,80086,81302,1239.0,1680.0,1430,1270
Hoffmann-La Roche,493.0,429.0,448,413,457103.0,465683.0,448393,524206,927.0,1086.0,1001,1269


In [32]:
df_pivot['recipient_id'][2015]

pharma_name
A. Menarini AG                     386
AbbVie                             511
Actelion                           178
Alcon                               15
Allergan                            41
Almirall                            49
Amgen                              404
Astellas Pharma AG                 164
AstraZeneca                        201
Basilea Pharmaceutica              nan
Baxalta                              4
Baxter AG                           75
Bayer AG                           426
Biogen                              53
Biotest AG                           9
Boehringer Ingelheim               171
Bristol-Myers Squibb SA            110
CSL Behring                         52
Celgene                            350
Daiichi Sankyo                      82
Eisai                               15
Eli Lilly SA                       189
Ferring Pharmaceuticals             80
Future Health Pharma GmbH           35
Galderma                            48
Gilead Scienc

In [16]:
df_hcp.columns

Index(['id', 'pharma_id', 'pharma_name', 'recipient_id', 'recipient_name',
       'recipient_address', 'recipient_location', 'recipient_type',
       'transaction_category_id', 'transaction_category_name', 'year',
       'value'],
      dtype='object')

## Ärzte Honorare
**ToDo: Zählen, von wievielen Pharmafirmen bekommen. Suchen, wer am meisten nur von einer Firma bekommen hat**

In [50]:
df_hcp = df_transaction[df_transaction.recipient_type == 'hcp']
df_hcp = df_hcp[df_hcp.transaction_category_name == 'fees']
df_grouped = df_hcp.groupby(['recipient_id']).agg({'recipient_name': 'first', 'value': 'sum'})
df_grouped = df_grouped.sort_values('value', ascending=False)
df_grouped.head(10)


Unnamed: 0_level_0,recipient_name,value
recipient_id,Unnamed: 1_level_1,Unnamed: 2_level_1
6950,Rolf A. Stahel,267365
5443,Matti Aapro,174374
1987,Jan Steffel,143220
1156,Wolf-Henning Boehncke,136012
1140,Johannes Bitzer,132897
3017,Gerhard Rogler,126471
2205,Jean Dudler,110825
2370,Marc Donat,99569
2874,François Mach,96121
2590,Erich Seifritz,88610


In [44]:
df_hcp.transaction_category_name.unique()

array(['registration_fees', 'travel_accommodation', 'fees',
       'related_expenses', 'sponsorship', 'donations_grants'],
      dtype=object)

In [46]:
df_hcp[df_hcp.recipient_name.str.contains('Savaskan')]

Unnamed: 0,id,pharma_id,pharma_name,recipient_id,recipient_name,recipient_address,recipient_location,recipient_type,transaction_category_id,transaction_category_name,year,value
7971,7972,15,Boehringer Ingelheim,2449,Egemen Savaskan,Minervastrasse 145,Zürich,hcp,5,fees,2017,1050
7972,7973,57,Vifor Pharma,2449,Egemen Savaskan,Minervastrasse 145,Zürich,hcp,5,fees,2016,1000
7973,7974,57,Vifor Pharma,2449,Egemen Savaskan,Minervastrasse 145,Zürich,hcp,5,fees,2015,1000
24510,24511,21,Eli Lilly SA,2449,Egemen Savaskan,Minervastrasse 145,Zürich,hcp,5,fees,2016,2600
