<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#imports" data-toc-modified-id="imports-0.1"><span class="toc-item-num">0.1&nbsp;&nbsp;</span>imports</a></span></li><li><span><a href="#methods" data-toc-modified-id="methods-0.2"><span class="toc-item-num">0.2&nbsp;&nbsp;</span>methods</a></span></li></ul></li><li><span><a href="#Connect-to-local-Neo4J-database" data-toc-modified-id="Connect-to-local-Neo4J-database-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Connect to local Neo4J database</a></span></li><li><span><a href="#graph-creation-in-Py2Neo" data-toc-modified-id="graph-creation-in-Py2Neo-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>graph creation in Py2Neo</a></span></li></ul></div>

### imports

In [29]:
import py2neo
from py2neo import Database
from py2neo import Graph, Node, Relationship
from py2neo import NodeMatcher, RelationshipMatcher
import neoconfig
import pickle


In [3]:
def get_industries(mems): # pass list-list-dict-dict data
    industries = [] # returns list of unique industries
    for chamber in mems: # this is awfully nested
        for member in chamber: # dict
            for k, v in member.items():
                if k!='party' and k!='name' and k!='id':
                    if k not in industries:
                        industries.append(k)
    return industries

def add_industry_nodes(G, industries): # pass graph, list of industries
    for industry in industries:
        G.add_node(str(industry), size=0, ind=industry)
        print(type(industry), type(G.node[industry]))
    return G # returns modified graph

def pickle_obj(obj, filename):
    with open(filename, 'wb') as f:
        pickle.dump(obj, f)
def unpickle_obj(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)
    
    


In [None]:
def populate_graph(G, mems, industries): # add member nodes + edges
    edges = []
    ec=0
    for chamber in mems:
        for c, member in enumerate(chamber):
            if len(member)!=0:
                try:
                    mem_node = G.add_node(member['name'], size=0)
                    total_mem_size = 0
                    print(c)
                    for k, v in member.items():
                        if k!='party' and k!='name' and k!='id': # industry dict
                            G.add_edge(member['name'], v['name'], weight=v['total'])
#                             G[member['name']][v['name']]['party'] = member['party']
                            total_mem_size += int(v['total']) # update member's total bribery $
                            G.node[v['name']]['size'] += int(v['total']) # update industry's total bribery $
                    G.node[member['name']]['size'] = total_mem_size # reassign mem_node size
                    G.node[member['name']]['party'] = member['party']
                    G.node[member['name']]['name'] = member['name'] # give a name property
                except:
                    ec+=1 # count errors to ensure data integrity
    print(f'{ec} errors')
    return G

In [4]:
# members in json format
mems = unpickle_obj('json_data')

In [19]:
industries = get_industries(mems)

### methods

## Connect to local Neo4J database

In [5]:
db = py2neo.Database() # instantiate using default bolt port

In [6]:
g = py2neo.Graph(host='localhost', auth = ('neo4j', neoconfig.password))

the logic of py2neo seems to be:
- create transaction, the 'subgraph'
    - create Nodes or Relationships, assign placeholder variable names
    - transaction.create(placeholder_name) for anything you want to add
    - tx.commit() to push the subgraph up to the connected Neo db

In [7]:
tx = g.begin() # begin transaction
a = Node('Crab', name='Reginald', weight=4) # create nodes
b = Node('Whale', name='Jake', weight=2000)
ab = Relationship(a, 'FRIENDS', b) # create relationship
tx.create(a) # create using variable names
tx.create(b)
tx.create(ab)

In [34]:
g.exists(ab)

False

In [8]:
tx.commit() # exists in graph after commit
g.exists(ab)

True

In [9]:
tx = g.begin()
c = Node('Dolphin', name='George', weight=50)
ca = Relationship(c, 'FRIENDS', a)
tx.create(c)
tx.create(ca)
tx.commit()

In [27]:
tx = g.begin()
c = Node('Shrimp', name='Hugh', weight=1)
tx.create(c)
tx.commit()

In [None]:
tx = g.begin()
matcher = NodeMatcher(g)

In [15]:
matcher.match('Dolphin', weight=50).first() # returns a node

(_20:Dolphin {name: 'George', weight: 50})

In [17]:
# assign match to varname for edge creation
george = matcher.match('Dolphin', name='George').first()
jake = matcher.match('Whale', name='Jake').first()
rel = Relationship(george, 'RIVALS', jake, competitions=5)
tx.create(rel)
tx.commit()


In [None]:
tx = g.begin()
ematcher = RelationshipMatcher(g)
george = matcher.match('Dolphin', name='George').first()
jake = matcher.match('Whale', name='Jake').first()
m = ematcher.match(nodes=(george, jake)).first()

In [64]:
tx = g.begin()
george = matcher.match('Dolphin', name='George').first()
jake = matcher.match('Whale', name='Jake').first()
m = ematcher.match(nodes=(george, jake)).first()
m

(George)-[:RIVALS {competitions: 200}]->(Jake)

In [66]:
m['competitions'] = 500
m

(George)-[:RIVALS {competitions: 500}]->(Jake)

In [None]:
tx.

In [63]:
tx.merge
tx.commit()

In [42]:
m = ematcher.match(nodes=(george, jake)).first()
m

(George)-[:RIVALS {competitions: 5}]->(Jake)

## graph creation in Py2Neo

In [22]:
industries[2]

'Agricultural Services/Products'

In [None]:
def add_industries(g, industries): # populates graph with Industry nodes
    tx = g.begin()
    for indu in industries:
        n = Node('Industry', name=indu, weight=0)
        tx.create(n)
    tx.commit() # push industry nodes to local DB
    return g

def populate_graph(g, mems, industries): # add member nodes + edges
    g = add_industries(g, industries) # add industry nodes
    nmatch = NodeMatcher(g)
    ematch = RelationshipMatcher(g)
    edges = []
    ec=0
    
    for chamber in mems:
        for c, member in enumerate(chamber):
            if len(member)!=0:
                try:
                    mem_node = G.add_node(member['name'], size=0)
                    total_mem_size = 0
                    print(c)
                    for k, v in member.items():
                        if k!='party' and k!='name' and k!='id': # industry dict
                            G.add_edge(member['name'], v['name'], weight=v['total'])
#                             G[member['name']][v['name']]['party'] = member['party']
                            total_mem_size += int(v['total']) # update member's total bribery $
                            G.node[v['name']]['size'] += int(v['total']) # update industry's total bribery $
                    G.node[member['name']]['size'] = total_mem_size # reassign mem_node size
                    G.node[member['name']]['party'] = member['party']
                    G.node[member['name']]['name'] = member['name'] # give a name property
                except:
                    ec+=1 # count errors to ensure data integrity
    print(f'{ec} errors')
    return G

In [28]:
mems[0][0]

{'name': 'Ralph Abraham (R)',
 'party': 'R',
 'id': 'N00036633',
 'Health Professionals': {'code': 'H01',
  'name': 'Health Professionals',
  'indiv': '32400',
  'pac': '55500',
  'total': '87900'},
 'Crop Production & Basic Processing': {'code': 'A01',
  'name': 'Crop Production & Basic Processing',
  'indiv': '30100',
  'pac': '52250',
  'total': '82350'},
 'Agricultural Services/Products': {'code': 'A07',
  'name': 'Agricultural Services/Products',
  'indiv': '750',
  'pac': '37500',
  'total': '38250'},
 'General Contractors': {'code': 'C01',
  'name': 'General Contractors',
  'indiv': '29100',
  'pac': '4500',
  'total': '33600'},
 'Air Transport': {'code': 'M01',
  'name': 'Air Transport',
  'indiv': '0',
  'pac': '30500',
  'total': '30500'},
 'Leadership PACs': {'code': 'Q03',
  'name': 'Leadership PACs',
  'indiv': '0',
  'pac': '29750',
  'total': '29750'},
 'Defense Aerospace': {'code': 'D01',
  'name': 'Defense Aerospace',
  'indiv': '0',
  'pac': '29000',
  'total': '29000

In [None]:
def graphify(g, mems, industries):
    ''' args: p2neo graph, congressmembers_list_(dicts), industries_list(strings)
    returns: p2neo graph obj.  also commits changes to local neo4j database'''
    
    g = add_industries(g, industries) # add industry nodes
    matcher = NodeMatcher(g)
    
    tx = g.begin() # create transaction
    for indu in industries:
        n = Node('Industry', ind=indu, weight=0)
        tx.create(n) # create node in graph
    tx.commit() # push industry nodes
    for chamber in mems:
        for c, mem in enumerate(chamber): # 
            for key, val in mem.items():
                if key == 'name': # identifying properties
                    mem_name = val
                if key == 'party':
                    mem_party = val
                if key == 'id':
                    mem_id = val
                else: # remaining keys are industry donations
                    ind_name = val['name'] # to connect to existing Industry node
                    donation = val['total']
                    
                    
                    
                    
                    
                    
        

In [33]:
help(tx.create)

Help on method create in module py2neo.database:

create(subgraph) method of py2neo.database.Transaction instance
    Create remote nodes and relationships that correspond to those in a
    local subgraph. Any entities in *subgraph* that are already bound to
    remote entities will remain unchanged, those which are not will become
    bound to their newly-created counterparts.
    
    For example::
    
        >>> from py2neo import Graph, Node, Relationship
        >>> g = Graph()
        >>> tx = g.begin()
        >>> a = Node("Person", name="Alice")
        >>> tx.create(a)
        >>> b = Node("Person", name="Bob")
        >>> ab = Relationship(a, "KNOWS", b)
        >>> tx.create(ab)
        >>> tx.commit()
        >>> g.exists(ab)
        True
    
    :param subgraph: a :class:`.Node`, :class:`.Relationship` or other
                creatable object

