In [1]:
import pandas as pd
import numpy as np

In [2]:
# Dataset description:
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbyists.htm
columns_lob_lobbyist = ['uniqid', 'lobbyist_lastname_std', 'lobbyist_firstname_std', 'lobbyist_lastname_raw', 
                     'lobbyist_firstname_raw', 'lobbyist_id', 'year', 'officialposition', 'cid', 'formercongmem']

columns_lob_lobbyist = dict(enumerate(columns_lob_lobbyist))

In [3]:
df = pd.read_csv('../../data/open_secrets/Lobby/lob_lobbyist.csv', on_bad_lines='skip', sep=',', header=None, na_values=['N/A', 'NA'], encoding='ISO-8859-1')
df = df.rename(columns=columns_lob_lobbyist)

In [4]:
# Get rid of the pipes on both sides of the data.

df = df.map(lambda x: x.replace('|', '') if type(x) == str else x)

df['year'] = pd.to_numeric(df['year'], errors='coerce')
df = df.dropna(subset=['year'])
df['year'] = df['year'].astype(int)

In [5]:
# Look at most recent data.
df_recent = df[df['year'] >= 2014]

In [6]:
pd.options.display.max_columns = None
df_recent

Unnamed: 0,uniqid,lobbyist_lastname_std,lobbyist_firstname_std,lobbyist_lastname_raw,lobbyist_firstname_raw,lobbyist_id,year,officialposition,cid,formercongmem
2152,7EAB24E9-C597-4E1D-A04D-F32C8A898D44,MACKINNON,JEFF,MacKinnon,Jeffrey M,Y0000040530L,2014,LD & LA Rep. Joe Barton (R-Texas) LD & LA Rep....,,n
2153,7EAB24E9-C597-4E1D-A04D-F32C8A898D44,MORTIER,JEFF,Mortier,Jeff,Y0000047083L,2014,Prof Staff on E&C Cmte and LA Rep Whitfield Pr...,,n
2154,7EAB24E9-C597-4E1D-A04D-F32C8A898D44,RYAN,THOMAS,Ryan,Thomas M,Y0000025838L,2014,,,n
2155,7EADE25B-D4EF-4965-A17C-108BB6D89BA0,KELLY,KEVIN F,Kelly,Kevin F,Y0000041925L,2014,,,n
2156,7EADE25B-D4EF-4965-A17C-108BB6D89BA0,THOMSON,JASPER,Thomson,Jasper,Y0000006654L,2014,,,n
...,...,...,...,...,...,...,...,...,...,...
3148495,fa2e5ac1-1463-40b7-8f2b-7769134fc4b6,ADAIR,JOHN LUCAS,Adair,Lucas,YX000058147L,2023,,,n
3148496,fa2e5ac1-1463-40b7-8f2b-7769134fc4b6,GARFINKEL,ANDREW,Garfinkel,Andrew,Y0000008935L,2023,,,n
3148497,fa2e5ac1-1463-40b7-8f2b-7769134fc4b6,KATZ,ALAN M,Katz,Alan,Y0000052389L,2023,,,n
3148498,fa2e5ac1-1463-40b7-8f2b-7769134fc4b6,KELLER,THOMAS,Keller,Thomas C,Y0000017161L,2023,,,n


In [7]:
# Lobbyists who have cid's (were in government at one time)
df[df['cid'] != '']

Unnamed: 0,uniqid,lobbyist_lastname_std,lobbyist_firstname_std,lobbyist_lastname_raw,lobbyist_firstname_raw,lobbyist_id,year,officialposition,cid,formercongmem
2,5CBE61EC-87F1-401E-9D57-620975C9A1F8,COSTELLO,RYAN,Costello,Ryan,Y0000027292L,2002,,N00031064,y
14,287F0E68-3350-4D72-89BD-63A9D0B40F80,BENTSEN,LLOYD M.,Bentsen,Lloyd,Y0000005721L,2000,,N00006003,y
42,6979F7B5-FCB2-447C-95A0-7C6EE7EA9350,MICHEL,ROBERT H.,Michel,Robert H,Y0000019350L,2002,,N00004929,y
85,A5D185B7-3D2B-4C6B-B881-6385E3918B89,ENGLISH,GLENN,English,Glenn Lee Jr,Y0000014080L,1999,,N00005560,y
100,8847A337-FE5C-41F1-96FF-B268BC59F655,BREAUX,JOHN,Breaux,John,Y0000041198L,2006,UNITED STATES SENATOR,N00005385,y
...,...,...,...,...,...,...,...,...,...,...
3148194,ff38946f-5c59-4bc2-b36b-af24cb95ff14,ROYCE,EDWARD RANDALL,Royce,Edward,Y0000056481L,2023,,N00008264,y
3148219,07853b06-7ff2-4c3a-896f-27d5b1746c7b,POMBO,RICHARD,Pombo,Richard,Y0000048209L,2023,Former Member of Congress,N00007516,y
3148273,3c56191e-4b7c-436a-a79a-b5faee31d70d,POMBO,RICHARD,Pombo,Richard,Y0000048209L,2023,Former Member of Congress,N00007516,y
3148436,c69d22ef-35a9-4e78-958b-2a246ef8c93b,POMBO,RICHARD,Pombo,Richard,Y0000048209L,2023,Former Member of Congress,N00007516,y
