In [0]:
!pip install py2neo
!pip install python-igraph
!pip install textblob

Collecting python-igraph
[?25l  Downloading https://files.pythonhosted.org/packages/0f/a0/4e7134f803737aa6eebb4e5250565ace0e2599659e22be7f7eba520ff017/python-igraph-0.7.1.post6.tar.gz (377kB)
[K    100% |████████████████████████████████| 378kB 4.1MB/s ta 0:00:01
[?25hBuilding wheels for collected packages: python-igraph
  Building wheel for python-igraph (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/dhanendra/.cache/pip/wheels/41/d6/02/34eebae97e25f5b87d60f4c0687e00523e3f244fa41bc3f4a7
Successfully built python-igraph
Installing collected packages: python-igraph
Successfully installed python-igraph-0.7.1.post6
Collecting textblob
[?25l  Downloading https://files.pythonhosted.org/packages/60/f0/1d9bfcc8ee6b83472ec571406bd0dd51c0e6330ff1a51b2d29861d389e85/textblob-0.15.3-py2.py3-none-any.whl (636kB)
[K    100% |████████████████████████████████| 645kB 33kB/s ta 0:00:01
Installing collected packages: textblob
Successfully installed textblob-0.15.3


In [0]:
# Import py2neo and connect to Neo4j
from py2neo import Graph

# just an example, replace with credentials for your own Neo4j instance
graph = Graph(bolt=False, host="54.164.111.140", http_port=32894, user='neo4j', password='subprogram-sidewalk-flame')

In [0]:
# Hello world, sanity check
graph.run("MATCH (a) RETURN COUNT(a) AS numberOfNodes").evaluate()

In [0]:
graph.run("CREATE CONSTRAINT ON (g:Group) ASSERT g.id IS UNIQUE;")

In [0]:
graph.run("CREATE CONSTRAINT ON (t:Topic) ASSERT t.id IS UNIQUE;")

In [0]:
graph.run("CREATE INDEX ON :Group(name)")

In [0]:
graph.run("CREATE INDEX ON :Topic(name)")

In [0]:
graph.run('''
LOAD CSV WITH HEADERS
FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/groups.csv"
AS row
MERGE (group:Group { id:row.id })
ON CREATE SET
  group.name = row.name,
  group.urlname = row.urlname,
  group.rating = toInt(row.rating),
  group.created = toInt(row.created)
''')

In [0]:
graph.run('''
LOAD CSV WITH HEADERS FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/groups_topics.csv"  AS row
MERGE (topic:Topic {id: row.id})
ON CREATE SET topic.name = row.name, topic.urlkey = row.urlkey
''')

In [0]:
graph.run('''
LOAD CSV WITH HEADERS FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/groups_topics.csv"  AS row
MATCH (topic:Topic {id: row.id})
MATCH (group:Group {id: row.groupId})
MERGE (group)-[:HAS_TOPIC]->(topic)
''')

In [0]:
result = graph.run('''
MATCH (group:Group)-[:HAS_TOPIC]->(topic)<-[:HAS_TOPIC]-(otherGroup)
WHERE group.name CONTAINS "Graph Database"
RETURN otherGroup.name, COUNT(topic) AS topicsInCommon,
       COLLECT(topic.name) AS topics
ORDER BY topicsInCommon DESC, otherGroup.name
LIMIT 10
''')

for row in result:
    print(row)

In [0]:
from igraph import Graph as IGraph

In [0]:

query = """
MATCH (topic:Topic)<-[:HAS_TOPIC]-()-[:HAS_TOPIC]->(other:Topic)
WHERE ID(topic) < ID(other)
RETURN topic.name, other.name, COUNT(*) AS weight
ORDER BY weight DESC
LIMIT 10
"""

result = graph.run(query)
for row in result:
    print(row)


In [0]:
query = """
MATCH (topic:Topic)<-[:HAS_TOPIC]-()-[:HAS_TOPIC]->(other:Topic)
WHERE ID(topic) < ID(other)
RETURN topic.name, other.name, COUNT(*) AS weight
"""

ig = IGraph.TupleList(graph.run(query), weights=True)
ig

In [0]:
clusters = IGraph.community_walktrap(ig, weights="weight")
clusters = clusters.as_clustering()
len(clusters)

In [0]:

nodes = [node["name"] for node in ig.vs]
nodes = [{"id": x, "label": x} for x in nodes]
nodes[:5]

for node in nodes:
    idx = ig.vs.find(name=node["id"]).index
    node["group"] = clusters.membership[idx]
    
nodes[:5]

In [0]:
query = """
UNWIND {params} AS p 
MATCH (t:Topic {name: p.id}) 
MERGE (cluster:Cluster {name: p.group})
MERGE (t)-[:IN_CLUSTER]->(cluster)
"""

graph.run(query, params = nodes)

We can see which clusters the Python related topics end up being in:
![](http://guides.neo4j.com/bostonmeetup/img/python_cluster.png)

In [0]:
graph.run('''
MATCH (cluster:Cluster)<-[inCluster:IN_CLUSTER]-(topic)
WHERE topic.name CONTAINS "Python"
RETURN *
''')

In [0]:
graph.run('''
CREATE CONSTRAINT ON (m:Member)
ASSERT m.id IS UNIQUE''')

In [0]:
graph.run('''
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS
FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/members.csv" AS row
WITH DISTINCT row.id AS id, row.name AS name
MERGE (member:Member {id: id})
ON CREATE SET member.name = name
''')

In [0]:
graph.run('''
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS
FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/members.csv" AS row
WITH row WHERE NOT row.joined is null
MATCH (member:Member {id: row.id})
MATCH (group:Group {id: row.groupId})
MERGE (member)-[membership:MEMBER_OF]->(group)
ON CREATE SET membership.joined=toInt(row.joined);
''')

In [0]:
graph.run('''
MATCH (member:Member)-[membership:MEMBER_OF]->(group)
RETURN member, group, membership
LIMIT 10
''')

![](http://guides.neo4j.com/bostonmeetup/img/group_members.png)

In [0]:
graph.run("CREATE INDEX ON :Member(name)")

### Find my similar groups

In [0]:
results = graph.run('''MATCH (member:Member {name: "Will Lyon"})-[:MEMBER_OF]->()-[:HAS_TOPIC]->()<-[:HAS_TOPIC]-(otherGroup:Group)
WHERE NOT (member)-[:MEMBER_OF]->(otherGroup)
RETURN otherGroup.name,
       COUNT(*) AS topicsInCommon
ORDER BY topicsInCommon DESC
LIMIT 10''')

for row in results:
    print(row)

In [0]:
graph.run("CREATE CONSTRAINT ON (e:Event) ASSERT e.id IS UNIQUE")

In [0]:
graph.run("CREATE INDEX ON :Event(time)")

In [0]:
graph.run('''USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/events.csv" AS row
MERGE (event:Event {id: row.id})
ON CREATE SET event.name = row.name,
              event.description = row.description,
              event.time = toInt(row.time),
              event.utcOffset = toInt(row.utc_offset)
''')
              

In [0]:
graph.run('''
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "https://raw.githubusercontent.com/johnymontana/harvard-bar/master/data/events.csv" AS row

WITH distinct row.group_id as groupId, row.id as eventId
MATCH (group:Group {id: groupId})
MATCH (event:Event {id: eventId})
MERGE (group)-[:HOSTED_EVENT]->(event)
''')

In [0]:
graph.run('''
MATCH (group:Group)-[hosted:HOSTED_EVENT]->(event)
WHERE group.name CONTAINS "Graph Database" AND event.time < timestamp()
RETURN event, group, hosted
ORDER BY event.time DESC
LIMIT 10
''')

In [0]:
graph.run('''
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///rsvps.csv" AS row
WITH row WHERE row.response = "yes"

MATCH (member:Member {id: row.member_id})
MATCH (event:Event {id: row.event_id})
MERGE (member)-[rsvp:RSVPD {id: row.rsvp_id}]->(event)
ON CREATE SET rsvp.created = toint(row.created),
              rsvp.lastModified = toint(row.mtime),
              rsvp.guests = toint(row.guests)
''')

![](![](http://guides.neo4j.com/bostonmeetup/img/graph_database_events.png)

In [0]:
from textblob import TextBlob

In [0]:
# fetch one event
desc = graph.run("MATCH (e:Event) WHERE e.description IS NOT null WITH e, rand() AS r ORDER BY r RETURN e.description LIMIT 1").evaluate()
desc

In [0]:
# Helper function for stripping HTML
from HTMLParser import HTMLParser
class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ''.join(self.fed)
    
def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()

In [0]:
# extract keywords with TextBlob
blob = TextBlob(strip_tags(desc))

# sentiment analysis
print("Sentiment: ")
print(blob.sentiment.polarity)
print()

# keyword extraction (using noun phrases)
print("Keywords: ")
print(blob.noun_phrases)

In [0]:
graph.run("CREATE CONSTRAINT ON (k:Keyword) ASSERT k.name IS UNIQUE")

In [0]:
def addKeywords(query):
    result = graph.run(query)
    for row in result:
        blob = TextBlob(strip_tags(row['desc']))
        kws = blob.noun_phrases
        if kws:
            
            p = {
                'kws': kws,
                'e_id': str(row['e_id'])
            }
            print(p)
            
            query = '''
            WITH {kws} AS kws
                MATCH (e:Event) WHERE e.id = {e_id}
                UNWIND kws AS kw
                MERGE (k:Keyword {name: kw})
                MERGE (e)-[:HAS_TAG]->(k)
            '''
            
            graph.run(query, parameters = p)


In [0]:
addKeywords('''
        MATCH (e:Event)<-[:HOSTED_EVENT]-(g:Group {name: "Graph Database - Austin"})
        RETURN e.id AS e_id, e.description AS desc
        ''')

In [0]:
    addKeywords('''
        MATCH (e:Event) WHERE e.description IS NOT NULL AND NOT exists((e)-[:HAS_TAG]->(:Keyword))
        WITH e, rand() AS r ORDER BY r SKIP 0 LIMIT 100 
        RETURN e.id AS e_id, e.description AS desc
        ''')