<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#imports" data-toc-modified-id="imports-0.1"><span class="toc-item-num">0.1&nbsp;&nbsp;</span>imports</a></span></li><li><span><a href="#methods" data-toc-modified-id="methods-0.2"><span class="toc-item-num">0.2&nbsp;&nbsp;</span>methods</a></span></li></ul></li><li><span><a href="#Connect-to-local-Neo4J-database" data-toc-modified-id="Connect-to-local-Neo4J-database-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Connect to local Neo4J database</a></span></li><li><span><a href="#graph-creation-in-Py2Neo" data-toc-modified-id="graph-creation-in-Py2Neo-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>graph creation in Py2Neo</a></span><ul class="toc-item"><li><span><a href="#Graph-population-method" data-toc-modified-id="Graph-population-method-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Graph population method</a></span></li></ul></li></ul></div>

### imports

In [1]:
import py2neo
from py2neo import Database
from py2neo import Graph, Node, Relationship
from py2neo import NodeMatcher, RelationshipMatcher
import neoconfig
import pickle


In [2]:
def get_industries(mems): # pass list-list-dict-dict data
    industries = [] # returns list of unique industries
    for chamber in mems: # this is awfully nested
        for member in chamber: # dict
            for k, v in member.items():
                if k!='party' and k!='name' and k!='id':
                    if k not in industries:
                        industries.append(k)
    return industries

def add_industry_nodes(G, industries): # pass graph, list of industries
    for industry in industries:
        G.add_node(str(industry), size=0, ind=industry)
        print(type(industry), type(G.node[industry]))
    return G # returns modified graph

def pickle_obj(obj, filename):
    with open(filename, 'wb') as f:
        pickle.dump(obj, f)
def unpickle_obj(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)
    
    


In [3]:
def populate_graph(G, mems, industries): # add member nodes + edges
    edges = []
    ec=0
    for chamber in mems:
        for c, member in enumerate(chamber):
            if len(member)!=0:
                try:
                    mem_node = G.add_node(member['name'], size=0)
                    total_mem_size = 0
                    print(c)
                    for k, v in member.items():
                        if k!='party' and k!='name' and k!='id': # industry dict
                            G.add_edge(member['name'], v['name'], weight=v['total'])
#                             G[member['name']][v['name']]['party'] = member['party']
                            total_mem_size += int(v['total']) # update member's total bribery $
                            G.node[v['name']]['size'] += int(v['total']) # update industry's total bribery $
                    G.node[member['name']]['size'] = total_mem_size # reassign mem_node size
                    G.node[member['name']]['party'] = member['party']
                    G.node[member['name']]['name'] = member['name'] # give a name property
                except:
                    ec+=1 # count errors to ensure data integrity
    print(f'{ec} errors')
    return G

In [3]:
# members in json format
mems = unpickle_obj('json_data')

In [4]:
industries = get_industries(mems)

### methods

## Connect to local Neo4J database

In [5]:
db = py2neo.Database() # instantiate using default bolt port

In [25]:
g = py2neo.Graph(host='localhost', auth = ('neo4j', neoconfig.password))

the logic of py2neo seems to be:
- create transaction, the 'subgraph'
    - create Nodes or Relationships, assign placeholder variable names
    - transaction.create(placeholder_name) for anything you want to add
    - tx.commit() to push the subgraph up to the connected Neo db

In [7]:
# tx = g.begin() # begin transaction
# a = Node('Crab', name='Reginald', weight=4) # create nodes
# b = Node('Whale', name='Jake', weight=2000)
# ab = Relationship(a, 'FRIENDS', b) # create relationship
# tx.create(a) # create using variable names
# tx.create(b)
# tx.create(ab)

In [8]:
# tx = g.begin()
# col = Node('Spinny boy', name='Colin', moves=100)
# ram = Node('Spinny boy', name='Ramone', moves=200)
# tx.create(col)
# tx.create(ram)
# tx.commit()

In [11]:
# tx = g.begin()
# frnd = Relationship(col, 'FRIENDS', ram, same_crew='yes')
# tx.create(frnd)
# tx.commit()

In [34]:
g.exists(ab)

False

In [8]:
tx.commit() # exists in graph after commit
g.exists(ab)

True

In [12]:
mat = NodeMatcher(g)

dolph = mat.match('Dolphin', name='George').first() # matches first node with type Dolphin


In [13]:
dolph

(_20:Dolphin {name: 'George', weight: 50})

In [None]:
tx = g.begin()
matcher = NodeMatcher(g)

In [15]:
matcher.match('Dolphin', weight=50).first() # returns a node

(_20:Dolphin {name: 'George', weight: 50})

In [17]:
# assign match to varname for edge creation
george = matcher.match('Dolphin', name='George').first()
jake = matcher.match('Whale', name='Jake').first()
rel = Relationship(george, 'RIVALS', jake, competitions=5)
tx.create(rel)
tx.commit()


In [None]:
tx = g.begin()
ematcher = RelationshipMatcher(g)
george = matcher.match('Dolphin', name='George').first()
jake = matcher.match('Whale', name='Jake').first()
m = ematcher.match(nodes=(george, jake)).first()

In [83]:
tx = g.begin()
hugh = matcher.match('Shrimp').first()
george = matcher.match('Dolphin', name='George').first()
jake = matcher.match('Whale', name='Jake').first()
m = ematcher.match(nodes=(george, jake)).first()


In [84]:
hugh

(_40:Shrimp {name: 'Hugh', weight: 1})

In [89]:
con = ematcher.match(nodes=(hugh, george)).first()
type(con)

NoneType

In [90]:
if con:
    print('yes')
if not con:
    print('no')

no


In [None]:
c0

In [66]:
m['competitions'] = 500
m

(George)-[:RIVALS {competitions: 500}]->(Jake)

In [79]:
tx = g.begin()

In [None]:
(
f''
f''
    
)

In [78]:
name = "John"
num = 5
print(''''Hello, %s%d!
''' % (name, num))

'Hello, John5!



In [80]:
mon = 45
query = '''
MATCH (:Dolphin { name: 'George' })-[r]-(:Crab)
SET r.money = %d
RETURN r.money
''' % (mon)
tx.run(query)
tx.commit()

In [None]:
tx.

In [69]:
tx.finished()

False

In [67]:
tx.merge()
tx.commit()

TypeError: merge() missing 1 required positional argument: 'subgraph'

In [42]:
m = ematcher.match(nodes=(george, jake)).first()
m

(George)-[:RIVALS {competitions: 5}]->(Jake)

In [73]:
f'''a{1+2}
b'''

'a3\nb'

## graph creation in Py2Neo

In [22]:
industries[2]

'Agricultural Services/Products'


- Py2Neo's Database.Transaction.commit() presents an issue updating new edge properties.
- it commits a subgraph to the larger Database, but:
            - "Any entities in subgraph that are already bound to remote entities will remain unchanged, those which are not will become bound to their newly-created counterparts"
- so since our edges are going to be programatically updated, we can't use Py2Neo's RelationshipMatcher to update the main graph
    - fix: stop mucking about with a wrapper and just use a Cypher statement
        - SET relationship.property = value  will instantiate a new property if not exists, and update if exists
    - cypher investigations: i can't use my beloved f-strings for a multiline query, because cypher uses the same {curly braces} that f-strings do.
        - but the % formatting works with '''multiline''', so no worries

In [None]:

''' old method '''
def populate_graph(g, mems, industries): # add member nodes + edges
    g = add_industries(g, industries) # add industry nodes
    nmatch = NodeMatcher(g)
    ematch = RelationshipMatcher(g)
    edges = []
    ec=0
    
    for chamber in mems:
        for c, member in enumerate(chamber):
            if len(member)!=0:
                try:
                    mem_node = G.add_node(member['name'], size=0)
                    total_mem_size = 0
                    print(c)
                    for k, v in member.items():
                        if k!='party' and k!='name' and k!='id': # industry dict
                            G.add_edge(member['name'], v['name'], weight=v['total'])
#                             G[member['name']][v['name']]['party'] = member['party']
                            total_mem_size += int(v['total']) # update member's total bribery $
                            G.node[v['name']]['size'] += int(v['total']) # update industry's total bribery $
                    G.node[member['name']]['size'] = total_mem_size # reassign mem_node size
                    G.node[member['name']]['party'] = member['party']
                    G.node[member['name']]['name'] = member['name'] # give a name property
                except:
                    ec+=1 # count errors to ensure data integrity
    print(f'{ec} errors')
    return G

In [28]:
mems[0][0]

{'name': 'Ralph Abraham (R)',
 'party': 'R',
 'id': 'N00036633',
 'Health Professionals': {'code': 'H01',
  'name': 'Health Professionals',
  'indiv': '32400',
  'pac': '55500',
  'total': '87900'},
 'Crop Production & Basic Processing': {'code': 'A01',
  'name': 'Crop Production & Basic Processing',
  'indiv': '30100',
  'pac': '52250',
  'total': '82350'},
 'Agricultural Services/Products': {'code': 'A07',
  'name': 'Agricultural Services/Products',
  'indiv': '750',
  'pac': '37500',
  'total': '38250'},
 'General Contractors': {'code': 'C01',
  'name': 'General Contractors',
  'indiv': '29100',
  'pac': '4500',
  'total': '33600'},
 'Air Transport': {'code': 'M01',
  'name': 'Air Transport',
  'indiv': '0',
  'pac': '30500',
  'total': '30500'},
 'Leadership PACs': {'code': 'Q03',
  'name': 'Leadership PACs',
  'indiv': '0',
  'pac': '29750',
  'total': '29750'},
 'Defense Aerospace': {'code': 'D01',
  'name': 'Defense Aerospace',
  'indiv': '0',
  'pac': '29000',
  'total': '29000

### Graph population method

In [None]:
MATCH (:Dolphin { name: 'George' })-[r]-(:Crab)
SET r.money = %d # matches rel between George the Dolphin and any crab, sets money
RETURN r.money # wonder what would happen if George linked to 2 crabs? 2 change r?

In [14]:
def update_node_weight(g, name, weight): # string name of node, int weight to update to
    tx = g.begin() # industry & member nodes both have name + weight properties
    query = ''' MATCH (n:{name: %s})
    SET n.weight = %d
    RETURN n
    '''.format(name, weight) # can't use multiline f-strings, cypher uses {} too
    tx.run(query)
    tx.commit()
    return g # return g after modifying

In [22]:
def add_industries(g, industries): # populates graph with Industry nodes
    tx = g.begin()
    for c, indu in enumerate(industries):
        n = Node('Industry', name=indu, weight=0) # weight is total money given
        tx.create(n)
        print(f'created {c+1} industries . . . ')
    tx.commit() # push industry nodes to local DB
    return g

def graphify(g, mems, industries):
    ''' args: p2neo graph, congressmembers_list_(dicts), industries_list_(strings)
    returns: p2neo graph obj.  also commits changes to local neo4j database'''
    
    g = add_industries(g, industries) # add industry nodes
    ind_totals = dict(zip(industries, [0]*len(industries))) # to keep track of industry totals    
    nmatch = NodeMatcher(g) # we could probably just query weight of all edges in Neo & SET it there...

    print(f'created {len(industries)} industry nodes')
    
#     tx = g.begin() # can one tx handle all the data at once? let's find out
    for ch, chamber in enumerate(mems): 
        for c, mem in enumerate(chamber): # mem is dict
            print(f'starting {c} {mem["name"]}')
            tx = g.begin() # new tx for each node+all its edges
            if ch == 0: chamb = 'House'
            else: chamb = 'Senate'
                
            money_received = 0 # total $ for each member
            member_name = mem['name']
            mem_node = Node('Member', name=member_name, party=mem['party'],
                           cid=mem['id'], chamber=chamb, weight=0)
            tx.create(mem_node) # create member node
                # create all edges from member node to industry nodes
            for key, val in mem.items(): # if key is str 'Agriculture', val is dict of that member's agri donation
                if key!='name' and key!='party' and key!='id': # industry donations
                    ind_name = val['name']
                    donation = int(val['total'])
                    money_received += donation
                    ind_node = nmatch.match('Industry', name=ind_name).first() # match target industry node
                    bribe = Relationship(mem_node, ind_node, weight=donation) # link industry node to this member's donation
                    tx.create(bribe) # add one edge to graph
                    ind_totals[ind_name] += donation # add to total for industry
#                     print(f' linked {ind_name} to {mem_name} by {donation}')
            # done adding member's edges. update mem_node total $ weight
            g = update_node_weight(g, member_name, money_received)
            print(f"{c}  {mem['name']} has received {money_receieved} in total")
    
    for key, val in industries.items(): # update using ind_totals
        name = key
        total = val
        g = update_node_weight(g, name, total)
    print('[o] updated all industry node totals')
    return g # why not
                  
                  
                  

In [23]:
print(f'{len(mems[0]), len(mems[1])}  {len(industries)}  ') # good to go

(439, 100)  82  


In [24]:
g = graphify(g, mems, industries)

created 1 industries . . . 
created 2 industries . . . 
created 3 industries . . . 
created 4 industries . . . 
created 5 industries . . . 
created 6 industries . . . 
created 7 industries . . . 
created 8 industries . . . 
created 9 industries . . . 
created 10 industries . . . 
created 11 industries . . . 
created 12 industries . . . 
created 13 industries . . . 
created 14 industries . . . 
created 15 industries . . . 
created 16 industries . . . 
created 17 industries . . . 
created 18 industries . . . 
created 19 industries . . . 
created 20 industries . . . 
created 21 industries . . . 
created 22 industries . . . 
created 23 industries . . . 
created 24 industries . . . 
created 25 industries . . . 
created 26 industries . . . 
created 27 industries . . . 
created 28 industries . . . 
created 29 industries . . . 
created 30 industries . . . 
created 31 industries . . . 
created 32 industries . . . 
created 33 industries . . . 
created 34 industries . . . 
created 35 industries .

KeyboardInterrupt: 

In [33]:
help(tx.create)

Help on method create in module py2neo.database:

create(subgraph) method of py2neo.database.Transaction instance
    Create remote nodes and relationships that correspond to those in a
    local subgraph. Any entities in *subgraph* that are already bound to
    remote entities will remain unchanged, those which are not will become
    bound to their newly-created counterparts.
    
    For example::
    
        >>> from py2neo import Graph, Node, Relationship
        >>> g = Graph()
        >>> tx = g.begin()
        >>> a = Node("Person", name="Alice")
        >>> tx.create(a)
        >>> b = Node("Person", name="Bob")
        >>> ab = Relationship(a, "KNOWS", b)
        >>> tx.create(ab)
        >>> tx.commit()
        >>> g.exists(ab)
        True
    
    :param subgraph: a :class:`.Node`, :class:`.Relationship` or other
                creatable object

