In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv('../data/amendments.xz', decimal=',', dtype={'proposal_id': np.str,
                                                                   'amendment_beneficiary': np.str,
                                                                   'amendment_program_code': np.str,
                                                                   'amendment_proposal_tranfer_value': np.double,
                                                                   'amendment_tranfer_value': np.double})
data.shape

(80972, 10)

In [2]:
data['congressperson_type'] = data['congressperson_type'].astype('category')
data['congressperson_type'].cat.categories

Index(['committee', 'individual', 'seat'], dtype='object')

In [3]:
data['proponent_qualification'] = data['proponent_qualification'].astype('category')
data['proponent_qualification'].cat.categories

Index(['parliamentary amendment beneficiary'], dtype='object')

In [4]:
data['tax_indicative'] = data['tax_indicative'].astype('category')
data['tax_indicative'].cat.categories

Index(['no', 'yes'], dtype='object')

In [6]:
data.iloc[0]

proposal_id                                                      588275
proponent_qualification             parliamentary amendment beneficiary
amendment_program_code                                    5100020120077
amendment_number                                               31250010
congressperson_name                                    ANTONIO BALHMANN
amendment_beneficiary                                     7535446000160
tax_indicative                                                       no
congressperson_type                                          individual
amendment_proposal_tranfer_value                                 292500
amendment_tranfer_value                                          292500
Name: 0, dtype: object

In [7]:
data.head()

Unnamed: 0,proposal_id,proponent_qualification,amendment_program_code,amendment_number,congressperson_name,amendment_beneficiary,tax_indicative,congressperson_type,amendment_proposal_tranfer_value,amendment_tranfer_value
0,588275,parliamentary amendment beneficiary,5100020120077,31250010,ANTONIO BALHMANN,7535446000160,no,individual,292500.0,292500.0
1,1066705,parliamentary amendment beneficiary,2200020160030,28650006,JOSE STEDILE,94436342000100,yes,individual,243750.0,243750.0
2,490954,parliamentary amendment beneficiary,2200020110005,10560008,SERGIO SOUZA,75771279000106,no,individual,97500.0,97500.0
3,646294,parliamentary amendment beneficiary,5600020120020,27650005,RENZO BRAZ,66230384000147,no,individual,265630.0,264245.4
4,586235,parliamentary amendment beneficiary,5200020120071,26850008,PADRE TON,4092714000128,no,individual,250000.0,250000.0


### Rank of Congresspersons with highest amounts of amendments and their values  

In [47]:
per_congressperson = data[data['congressperson_type'] == 'individual'].groupby(
                        'congressperson_name').agg({ 'proposal_id': {
                                                        'total_proposals': 'count'
                                                        },
                                                        'amendment_proposal_tranfer_value': {
                                                            'amendment_proposal_tranfer_value': 'sum'
                                                        },
                                                     })
per_congressperson.sort_values(by=[('proposal_id', 'total_proposals')], ascending=False).head()

Unnamed: 0_level_0,proposal_id,amendment_proposal_tranfer_value
Unnamed: 0_level_1,total_proposals,amendment_proposal_tranfer_value
congressperson_name,Unnamed: 1_level_2,Unnamed: 2_level_2
LUIS CARLOS HEINZE,388,73776180.37
MARCO MAIA,354,62910101.03
ANA AMELIA,336,57655546.43
DARCISIO PERONDI,327,69749394.78
PAULO PAIM,317,56328489.7


### Rank of Beneficiaries (CNPJs) with highest amounts of amendments and their values  

In [50]:
per_beneficiary = data[data['congressperson_type'] == 'individual'].groupby(
                                    'amendment_beneficiary').agg({ 'proposal_id': {
                                                                    'total_proposals': 'count'
                                                                    },
                                                                    'amendment_proposal_tranfer_value': {
                                                                        'amendment_proposal_tranfer_value': 'sum'
                                                                    },
                                                                 })
per_beneficiary.sort_values(by=[('proposal_id', 'total_proposals')], ascending=False).head()

Unnamed: 0_level_0,proposal_id,amendment_proposal_tranfer_value
Unnamed: 0_level_1,total_proposals,amendment_proposal_tranfer_value
amendment_beneficiary,Unnamed: 1_level_2,Unnamed: 2_level_2
394577000125,271,189124500.0
4034583000122,150,105640200.0
63606479000124,146,119683300.0
5995766000177,129,120213500.0
49150352000112,128,47664590.0


### Rank of pair Congressperson/Beneficiaries with highest amounts of amendments and their values  

In [55]:
per_congress_person_and_beneficiary = data[data['congressperson_type'] == 'individual'].groupby(
                                    ['congressperson_name', 'amendment_beneficiary']
                                ).agg({ 'proposal_id': {
                                        'total_proposals': 'count'
                                        },
                                        'amendment_proposal_tranfer_value': {
                                            'amendment_proposal_tranfer_value': 'sum'
                                        },
                                     })
per_congress_person_and_beneficiary.sort_values(by=[('proposal_id', 'total_proposals')], ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,proposal_id,amendment_proposal_tranfer_value
Unnamed: 0_level_1,Unnamed: 1_level_1,total_proposals,amendment_proposal_tranfer_value
congressperson_name,amendment_beneficiary,Unnamed: 2_level_2,Unnamed: 3_level_2
JANETE CAPIBERIBE,394577000125,84,37164540.0
PASTOR EURICO,8113327000181,42,227061100.0
SEBASTIAO BALA ROCHA,394577000125,36,32980000.0
WANDENKOLK GONCALVES,5054945000100,32,23757700.0
MARCIO FRANCA,8574719000148,32,165870900.0
MARCELO MATOS,29138336000105,32,36191010.0
RENATO MOLLING,87366159000102,29,28984960.0
JANETE ROCHA PIETA,46319000000150,28,19830560.0
EDINHO ARAUJO,46588950000180,26,41200000.0
DOMINGOS NETO,7849532000147,26,31170840.0
