# Summarize Pharmas by Disclosure
This script creates an overview over the data

In [1]:
import pandas as pd
import sys
sys.path.insert(0, '../../data/lib/')
import consts
from pdfexport import *
import openpyxl
import subprocess, os, sys
import mysql.connector

## Connect

In [2]:
db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="",
    database = "pharmagelder"
)

## Load Data

In [3]:
df_list_raw = pd.read_sql("""SELECT 
    sum(tra_value) as sum_value,
    trc_name as category,
    pha_name as pharma,
    tra_year as year
  FROM transaction
  LEFT JOIN pharma ON tra_fk_pharma = pha_id
  LEFT JOIN transaction_category on tra_fk_transaction_category = trc_id
  GROUP BY tra_fk_pharma, tra_year, tra_fk_transaction_category""", con=db_connection)

df_accu_raw = pd.read_sql("""SELECT 
    sum(acc_value) as sum_value,
    trc_name as category,
    pha_name as pharma,
    acc_year as year
  FROM accumulation
  LEFT JOIN pharma ON acc_fk_pharma = pha_id
  LEFT JOIN transaction_category on acc_fk_transaction_category = trc_id
  GROUP BY acc_fk_pharma, acc_year, acc_fk_transaction_category""", con=db_connection)

## Export

In [4]:
def export_year(df_data, year):
    print(year)
    
    if year == 'all':
        df_part = df_data
    else:
        df_part = df_data[df_data.year == year]
    
    #Generate total
    df_pivot_total = df_part.pivot_table(index=['pharma'], columns='category', values='sum_value', fill_value=0, aggfunc='sum', margins=True, margins_name='total')
    df_pivot_total = df_pivot_total.fillna(0)
    df_pivot_total = df_pivot_total[['donations_grants', 'sponsorship', 'registration_fees', 'travel_accommodation', 'fees', 'related_expenses', 'rnd', 'total']]
    df_pivot_total = df_pivot_total.apply(lambda x: round(x, 2), axis=1)    
    
    #Generate detailed
    df_pivot_detail = df_part.pivot_table(index=['pharma', 'declaration'], columns='category', values='sum_value', aggfunc='sum', margins=True, margins_name='total')
    df_pivot_detail = df_pivot_detail.fillna(0)
    df_pivot_detail = df_pivot_detail[['donations_grants', 'sponsorship', 'registration_fees', 'travel_accommodation', 'fees', 'related_expenses', 'rnd', 'total']]
    df_pivot_detail = df_pivot_detail.apply(lambda x: round(x, 2), axis=1)

    #Write to Excel
    writer = pd.ExcelWriter('../../data/99. analyzes/excel/nach_offenlegung_%s.xlsx' % year, options={'encoding':'utf-8'})

    df_pivot_total.to_excel(writer, 'total %s' % year, index=True)
    df_pivot_detail.to_excel(writer, 'detail %s' % year, index=True)

    writer.save()

In [5]:
df_list = df_list_raw.copy()
df_accu = df_accu_raw.copy()

df_list['declaration'] = 'with_name'
df_accu['declaration'] = 'anonymously'

#Concat
df_data = pd.concat([df_list, df_accu], sort=False)

#Export all
export_year(df_data, 'all')

#Export years
for year in df_data['year'].unique():
    export_year(df_data, year)

all
2015
2016
2017
2018
