This notebook takes TX state lobbying records and tries to create a chart that shows what lobbyists favor which
political parties. The output is a gephi file called lobby.gexf. I started with these files: 

LaTran.csv, which is from the TEC here: https://www.ethics.state.tx.us/dfs/search_LOBBY.html.

tx_legislators.csv, from http://openstates.org/csv_downloads/

then made these changes with csvkit:

csvcut -c 1,4,5,6,15,14,13,16,17,11,31 LaTran.csv > TXTran.csv
csvcut -c 1,2,3,4,5,12,10,11,8,13 tx_legislators.csv > TXLeg.csv

In [9]:
import csv

def fileToList(file):
    newFile = open(file)
    newReader = csv.reader(newFile)
    newData = list(newReader)
    return newData

file = 'data/TXTran.csv'
file2 = 'data/TXLeg.csv'
file3 = 'data/TXFood.csv'

travel = []
leg = []
food = []

newData = fileToList(file)

for row in newData[1:]:
    travel.append(row)

legData = fileToList(file2)

for row in legData[1:]:
    leg.append(row)

foodData = fileToList(file3)

for row in foodData[1:]:
    food.append(row)

In [10]:
from fuzzywuzzy import fuzz

travelFound = []
foodFound = []

for item in travel:
    itemname = item[5] + ' ' + item[6]
    found = False
    for person in leg:
        personname = person[2] + ' ' + person[4]
        if itemname == personname or itemname == person[1]:
            found = True
            item.extend([person[0],person[5],person[6],person[7],person[8],person[9],personname])
            travelFound.append(item)
    if found == False:
        for person in leg:
            if fuzz.token_sort_ratio(itemname, personname) > 80:
                found = True
                item.extend([person[0],person[5],person[6],person[7],person[8],person[9],personname])
                print(itemname + ' fuzzy match ' + personname)
                travelFound.append(item)

for item in food:
    itemname = item[5] + ' ' + item[6]
    found = False
    for person in leg:
        personname = person[2] + ' ' + person[4]
        if itemname == personname or itemname == person[1]:
            found = True
            item.extend([person[0],person[5],person[6],person[7],person[8],person[9],personname])
            foodFound.append(item)
    if found == False:
        for person in leg:
            if fuzz.token_sort_ratio(itemname, personname) > 80:
                found = True
                item.extend([person[0],person[5],person[6],person[7],person[8],person[9],personname])
                print(itemname + ' fuzzy match ' + personname)
                foodFound.append(item)
    
    # if found == False:
        # print('No match found for ' + itemname)

print(travelFound[5])
print(foodFound[5])

['301', '2005', '00010205', 'Johnson, Thomas L.', 'Mr.', 'John', 'Whitmire', '', 'AGC of Texas 2005 Management Conference', 'TXL000211', 'Democratic', 'upper', '15', 'True', 'c2c2d651cb9f4bd3b00364f8d9b84715', 'John Whitmire']
['1461', '2005', '00028479', 'McCandless, Patricia L.', '', 'Myra', 'Crownover', '', "Eddie V's Edgewater Grille", 'TXL000241', 'Republican', 'lower', '64', 'True', '8278f958078b45fda0ada3e667be78ee', 'Myra Crownover']


In [11]:
import networkx as nx
G=nx.MultiDiGraph()

for row in travelFound:
    G.add_node(row[2], name=row[3], role="Lobbyist")
    G.add_node(row[9], name=row[-1], role=row[10])
    G.add_edge(row[2], row[9], gift="Travel", year=row[1], detail=row[8])

for row in foodFound:
    G.add_node(row[2], name=row[3], role="Lobbyist")
    G.add_node(row[9], name=row[-1], role=row[10])
    G.add_edge(row[2], row[9], gift="food", year=row[1], detail=row[8])


In [12]:
G.nodes(data=True)[-10:]

[('00013250', {'name': 'Roberts, Jack', 'role': 'Lobbyist'}),
 ('TXL000264', {'name': 'Charlie Geren', 'role': 'Republican'}),
 ('TXL000220', {'name': 'Leo Berman', 'role': ''}),
 ('00068579', {'name': 'Clayton, Molly', 'role': 'Lobbyist'}),
 ('TXL000426', {'name': 'Greg Bonnen', 'role': 'Republican'}),
 ('00060971', {'name': 'James, Nick', 'role': 'Lobbyist'}),
 ('00059189', {'name': 'Boyer, Victor', 'role': 'Lobbyist'}),
 ('TXL000356', {'name': 'Allen Vaught', 'role': ''}),
 ('00010008', {'name': 'Rich, Sidney D.', 'role': 'Lobbyist'}),
 ('TXL000463', {'name': 'Drew Springer', 'role': 'Republican'})]

In [13]:
G.is_directed()

True

In [14]:
G.node['TXL000211']

{'name': 'John Whitmire', 'role': 'Democratic'}

In [15]:
nx.degree(G)

{'00010008': 4,
 '00010038': 1,
 '00010044': 1,
 '00010063': 4,
 '00010066': 1,
 '00010102': 2,
 '00010150': 6,
 '00010205': 145,
 '00010245': 1,
 '00010257': 20,
 '00010273': 8,
 '00010769': 2,
 '00010798': 5,
 '00010799': 29,
 '00010806': 1,
 '00010963': 1,
 '00010972': 9,
 '00010977': 22,
 '00011042': 2,
 '00011177': 23,
 '00011628': 2,
 '00011645': 8,
 '00011710': 1,
 '00011961': 10,
 '00012312': 7,
 '00012652': 6,
 '00012695': 3,
 '00012750': 4,
 '00012847': 5,
 '00012853': 1,
 '00012860': 2,
 '00012879': 4,
 '00012889': 3,
 '00012890': 3,
 '00012897': 1,
 '00012904': 7,
 '00012934': 15,
 '00012967': 2,
 '00012980': 1,
 '00012985': 337,
 '00012987': 2,
 '00013124': 1,
 '00013189': 1,
 '00013201': 1,
 '00013211': 5,
 '00013250': 9,
 '00013332': 1,
 '00013335': 43,
 '00013336': 9,
 '00013341': 8,
 '00013356': 1,
 '00013379': 3,
 '00013391': 6,
 '00013437': 6,
 '00013438': 4,
 '00013455': 1,
 '00013490': 9,
 '00013547': 3,
 '00013582': 5,
 '00013591': 3,
 '00013593': 6,
 '00013612': 

In [16]:
H = nx.Graph(G)
answer = nx.connected_components(H)
for i in answer:
    print(i)

{'00056018', '00014717', '00013582', 'TXL000490', 'TXL000400', '00034621', '00035306', 'TXL000317', 'TXL000240', '00053635', 'TXL000506', '00034785', '00014618', '00020440', '00056970', 'TXL000467', 'TXL000378', '00032904', '00053187', '00068579', '00039254', '00040174', 'TXL000481', 'TXL000246', '00062366', '00063391', '00019678', '00066856', '00067073', '00013356', '00029805', '00052056', '00065174', '00066729', '00060454', 'TXL000507', 'TXL000385', '00018549', 'TXL000266', '00013802', 'TXL000325', '00013335', '00042973', 'TXL000259', '00064097', 'TXL000249', 'TXL000219', 'TXL000356', 'TXL000225', 'TXL000234', 'TXL000281', '00011645', '00039361', 'TXL000252', 'TXL000265', '00060695', 'TXL000305', 'TXL000443', 'TXL000502', '00066252', 'TXL000408', 'TXL000487', '00010044', 'TXL000187', '00035794', '00053966', '00050571', '00061245', '00070018', '00014266', 'TXL000424', '00065473', '00051202', '00053477', 'TXL000184', 'TXL000211', '00053871', '00056681', '00012750', 'TXL000459', '000508

In [8]:
help(nx.readwrite)

Help on package networkx.readwrite in networkx:

NAME
    networkx.readwrite - A package for reading and writing graphs in various formats.

PACKAGE CONTENTS
    adjlist
    edgelist
    gexf
    gml
    gpickle
    graph6
    graphml
    json_graph (package)
    leda
    multiline_adjlist
    nx_shp
    nx_yaml
    p2g
    pajek
    sparse6

FILE
    /Users/Matt/anaconda/lib/python3.5/site-packages/networkx/readwrite/__init__.py




In [17]:
# This is the output. The lines after this are just experiments.

nx.readwrite.write_gexf(G,"lobbyTravelFood.gexf")

In [None]:
# I tried making this CSV to output for graphing. But this isn't that useful because it puts all the 
# data on the edges, none on the nodes, so I didn't end up using it for anything.

outputFile = open('StateLobbyistTravel.csv', 'w', newline='')
outputWriter = csv.writer(outputFile)
outputWriter.writerow(['Record', 'Year', 'Source', 'LobbyName', 'Target','LegName','Party'])
for row in travelFound:
    outputWriter.writerow([row[0],row[1],row[2],row[3],row[9],row[-1],row[10]])
outputFile.close()

In [10]:
# Testing to see how many records there are where the legislator's party is unknown because OpenStates doesn't provide
# that info after they leave office.

noParty = []

for record in travel:
    if len(record) > 9:
        if record[10] == '':
            noParty.append(record)
        
len(noParty)

215

In [None]:
# Here's where I tried using Neo4Jj

from py2neo import Graph

graph = Graph()
graph.delete_all()

In [None]:
# This works to add nodes into a Neo4j database, but not relationships. Don't know why.

from py2neo import Node, Relationship

for record in travelFound:
    itemname = record[5] + ' ' + record[6]
    a = graph.merge(Node("Lobbyist", id=record[2], name=record[3]))
    b = graph.merge(Node("Legislator", id=record[9], name=itemname, party=record[10]))
    # Two attempted solutions below. neither of them is working.
    ab = graph.create(Path(a, "Travel", b))
    # graph.create(Relationship(a, b))