In [54]:
import pandas as pd
import numpy as np

In [43]:
amendments = pd.read_csv('../data/2016-12-18-amendments.xz', dtype={'proposal_id': np.str,
                                                                   'amendment_beneficiary': np.str,
                                                                   'amendment_program_code': np.str,
                                                                   'amendment_proposal_tranfer_value': np.float,
                                                                   'amendment_tranfer_value': np.float})

amendments.shape

(81111, 10)

In [113]:
amendments['proposal_id'].unique().size

78926

In [61]:
agreements = pd.read_csv('../data/2016-12-18-agreements.xz', 
                         usecols=(['NR_CONVENIO', 'ID_PROPOSTA', 'ANO']),
                         dtype={'NR_CONVENIO': np.str, 
                                'ID_PROPOSTA': np.str,
                                'ANO': np.int}, low_memory=False)
agreements.shape

(104967, 3)

In [57]:
payments = pd.read_csv('../data/2016-12-18-payments.xz', dtype={'NR_CONVENIO': np.str}, low_memory=False)
payments.shape

(2742433, 9)

In [44]:
amendments['congressperson_type'] = amendments['congressperson_type'].astype('category')
amendments['congressperson_type'].cat.categories

Index(['committee', 'individual', 'seat'], dtype='object')

In [45]:
amendments['proponent_qualification'] = amendments['proponent_qualification'].astype('category')
amendments['proponent_qualification'].cat.categories

Index(['parliamentary amendment beneficiary'], dtype='object')

In [46]:
amendments['tax_indicative'] = amendments['tax_indicative'].astype('category')
amendments['tax_indicative'].cat.categories

Index(['no', 'yes'], dtype='object')

In [47]:
amendments.iloc[0]

proposal_id                                                      588275
proponent_qualification             parliamentary amendment beneficiary
amendment_program_code                                    5100020120077
amendment_number                                               31250010
congressperson_name                                    ANTONIO BALHMANN
amendment_beneficiary                                    07535446000160
tax_indicative                                                       no
congressperson_type                                          individual
amendment_proposal_tranfer_value                                 292500
amendment_tranfer_value                                          292500
Name: 0, dtype: object

In [48]:
amendments.head()

Unnamed: 0,proposal_id,proponent_qualification,amendment_program_code,amendment_number,congressperson_name,amendment_beneficiary,tax_indicative,congressperson_type,amendment_proposal_tranfer_value,amendment_tranfer_value
0,588275,parliamentary amendment beneficiary,5100020120077,31250010,ANTONIO BALHMANN,7535446000160,no,individual,292500.0,292500.0
1,1066705,parliamentary amendment beneficiary,2200020160030,28650006,JOSE STEDILE,94436342000100,yes,individual,243750.0,243750.0
2,490954,parliamentary amendment beneficiary,2200020110005,10560008,SERGIO SOUZA,75771279000106,no,individual,97500.0,97500.0
3,646294,parliamentary amendment beneficiary,5600020120020,27650005,RENZO BRAZ,66230384000147,no,individual,265630.0,264245.4
4,586235,parliamentary amendment beneficiary,5200020120071,26850008,PADRE TON,4092714000128,no,individual,250000.0,250000.0


### Rank of Congresspersons with highest amounts of amendments and their values  

In [49]:
per_congressperson = amendments[amendments['congressperson_type'] == 'individual'].groupby(
                        'congressperson_name').agg({ 'proposal_id': {
                                                        'total_proposals': 'count'
                                                        },
                                                        'amendment_proposal_tranfer_value': {
                                                            'amendment_proposal_tranfer_value': 'sum'
                                                        },
                                                     })
per_congressperson.sort_values(by=[('proposal_id', 'total_proposals')], ascending=False).head()

Unnamed: 0_level_0,amendment_proposal_tranfer_value,proposal_id
Unnamed: 0_level_1,amendment_proposal_tranfer_value,total_proposals
congressperson_name,Unnamed: 1_level_2,Unnamed: 2_level_2
LUIS CARLOS HEINZE,73776180.37,388
MARCO MAIA,62910101.03,354
ANA AMELIA,57655546.43,336
DARCISIO PERONDI,69749394.78,327
PAULO PAIM,56328489.7,317


### Rank of Beneficiaries (CNPJs) with highest amounts of amendments and their values  

In [50]:
per_beneficiary = amendments[amendments['congressperson_type'] == 'individual'].groupby(
                                    'amendment_beneficiary').agg({ 'proposal_id': {
                                                                    'total_proposals': 'count'
                                                                    },
                                                                    'amendment_proposal_tranfer_value': {
                                                                        'amendment_proposal_tranfer_value': 'sum'
                                                                    },
                                                                 })
per_beneficiary.sort_values(by=[('proposal_id', 'total_proposals')], ascending=False).head()

Unnamed: 0_level_0,amendment_proposal_tranfer_value,proposal_id
Unnamed: 0_level_1,amendment_proposal_tranfer_value,total_proposals
amendment_beneficiary,Unnamed: 1_level_2,Unnamed: 2_level_2
394577000125,189124500.0,271
4034583000122,105640200.0,150
63606479000124,119683300.0,146
49150352000112,49540890.0,133
5995766000177,120213500.0,129


### Rank of pair Congressperson/Beneficiaries with highest amounts of amendments and their values  

In [95]:
per_congressperson_and_beneficiary = amendments[amendments['congressperson_type'] == 'individual'].groupby(
                                    ['congressperson_name', 'amendment_beneficiary']
                                ).agg({ 'proposal_id': {
                                        'total_proposals': 'count'
                                        },
                                        'amendment_proposal_tranfer_value': {
                                            'amendment_proposal_tranfer_value': 'sum'
                                        },
                                     })
per_congressperson_and_beneficiary.sort_values(by=[('proposal_id', 'total_proposals')], ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,amendment_proposal_tranfer_value,proposal_id
Unnamed: 0_level_1,Unnamed: 1_level_1,amendment_proposal_tranfer_value,total_proposals
congressperson_name,amendment_beneficiary,Unnamed: 2_level_2,Unnamed: 3_level_2
JANETE CAPIBERIBE,394577000125,37164540.0,84
PASTOR EURICO,8113327000181,227061100.0,42
SEBASTIAO BALA ROCHA,394577000125,32980000.0,36
WANDENKOLK GONCALVES,5054945000100,23757700.0,32
MARCIO FRANCA,8574719000148,165870900.0,32
MARCELO MATOS,29138336000105,36291980.0,32
RENATO MOLLING,87366159000102,29201140.0,30
JANETE ROCHA PIETA,46319000000150,19830560.0,28
DOMINGOS NETO,7849532000147,31170840.0,26
DR. JORGE SILVA,27167477000112,13943180.0,26


In [110]:
data = pd.merge(amendments[amendments['congressperson_type'] == 'individual'], 
                agreements, 
                left_on='proposal_id', right_on='ID_PROPOSTA')
data = data.filter(['proposal_id', 'congressperson_name', 'amendment_beneficiary', 'ANO','NR_CONVENIO'])
data = pd.merge(data, payments, on='NR_CONVENIO')

In [99]:
per_congress_person_and_supplier = data.groupby(
                                    ['IDENTIF_FORNECEDOR', 'NOME_FORNECEDOR', 'congressperson_name']
                                ).agg({ 'proposal_id': {
                                        'proposals': 'count'
                                        },
                                        'VL_PAGO': {
                                            'total_value': 'sum'
                                        },
                                     })
per_congress_person_and_supplier.sort_values(by=[('VL_PAGO', 'total_value')], ascending=False).head(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,VL_PAGO,proposal_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,total_value,proposals
IDENTIF_FORNECEDOR,NOME_FORNECEDOR,congressperson_name,Unnamed: 3_level_2,Unnamed: 4_level_2
08059768000142,CINZEL ENGENHARIA LTDA,RAUL HENRY,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,ANDERSON FERREIRA,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,AUGUSTO COUTINHO,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,CARLOS EDUARDO CADOCA,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,FERNANDO COELHO FILHO,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,HUMBERTO COSTA,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,JOAO PAULO LIMA,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,JORGE CORTE REAL,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,JOSE AUGUSTO MAIA,45966681.68,37
08059768000142,CINZEL ENGENHARIA LTDA,LUCIANA SANTOS,45966681.68,37


In [111]:
data[data['IDENTIF_FORNECEDOR'] == '08059768000142']

Unnamed: 0,proposal_id,congressperson_name,amendment_beneficiary,ANO,NR_CONVENIO,NR_MOV_FIN,IDENTIF_FORNECEDOR,NOME_FORNECEDOR,TP_MOV_FINANCEIRA,DATA_PAG,NR_DL,DESC_DL,VL_PAGO
56119,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,3966427,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,11-10-2016,1551 -A - SALDO,NOTA FISCAL,147631.38
56120,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,3964186,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,11-10-2016,1605,NOTA FISCAL,1211883.69
56121,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,3840131,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,28-07-2016,1605,NOTA FISCAL,147631.38
56122,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,3837025,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,28-07-2016,1603,NOTA FISCAL,246114.26
56123,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,3837042,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,28-07-2016,1604,NOTA FISCAL,389141.44
56124,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,3837131,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,28-07-2016,1605,NOTA FISCAL,333171.69
56125,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,2818538,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,11-03-2015,1390,NOTA FISCAL,750604.76
56126,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,2818540,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,11-03-2015,1392,NOTA FISCAL,2570438.00
56127,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,2818541,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,11-03-2015,1393,NOTA FISCAL,7799499.80
56128,777690,AUGUSTO COUTINHO,41090291000133,2013,791580,2818543,08059768000142,CINZEL ENGENHARIA LTDA,PAGAMENTO A FAVORECIDO COM OBTV,11-03-2015,1395,NOTA FISCAL,2139089.09
