# Indexing data in Neo4j


In [1]:
#Shell command line in the jupyter
!jupyter nbconvert --to python connection_neo4j.ipynb

[NbConvertApp] Converting notebook connection_neo4j.ipynb to python
[NbConvertApp] Writing 1171 bytes to connection_neo4j.py


In [40]:
from py2neo import Graph, Node, Relationship, NodeSelector
from connection_neo4j import graph
import pandas as pd
import numpy as np

## Accessing Dataset Processed

In [42]:
data = pd.read_csv('../data/dataset_Facebook_processed.csv', sep=";")
data.columns

Index(['Page total likes', 'Type', 'Category', 'Post Month', 'Post Weekday',
       'Post Hour', 'Paid', 'comment', 'like', 'share', 'Post id',
       'Increase likes'],
      dtype='object')

## Creating nodes
There are some import nodes that may be created once, for example:
* The weekdays (Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday)
* Page Likes
* Posts
* Comments
* Likes
* Shares

In [43]:
weekdaysList = ['Sunday','Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

print("Creating Weekdays...")

for i in weekdaysList:
    query  = "CREATE (n: Weekday {name: '" + i +"'}) RETURN n.name"
    result = graph.data(query)
    print(result)
    

Creating Weekdays...
[{'n.name': 'Sunday'}]
[{'n.name': 'Monday'}]
[{'n.name': 'Tuesday'}]
[{'n.name': 'Wednesday'}]
[{'n.name': 'Thursday'}]
[{'n.name': 'Friday'}]
[{'n.name': 'Saturday'}]


In [44]:
weekdaysList = ["Action","Product","Inspiration"]

print("Creating Categories...")

for i in weekdaysList:
    query  = "CREATE (n: Category {name: '" + i +"'}) RETURN n.name"
    result = graph.data(query)
    print(result)
    
    

Creating Categories...
[{'n.name': 'Action'}]
[{'n.name': 'Product'}]
[{'n.name': 'Inspiration'}]


In [45]:
print("Creating Page Likes...")
query  = "CREATE (n: PageLikes{name: 'Page Likes', total: 0}) return n.name"
result = graph.data(query)
print(result)

Creating Page Likes...
[{'n.name': 'Page Likes'}]


In [48]:
selector = NodeSelector(graph)

for i, row in data.iterrows():  
    transaction = graph.begin()    
    
    post = Node("Post",type = str(row['Type']), id = str(row['Post id']) )
    transaction.create(post)
    
    nodeSelected = selector.select("Category", name=str(row['Category']))
    returnedList = list(nodeSelected)
    category = nodeSelected.first()
    
    relationship = Relationship(post, "IS_ABOUT", category)
    transaction.create(relationship)
    
    nodeSelected = selector.select("Weekday", name=str(row['Post Weekday']))
    returnedList = list(nodeSelected)
    weekday = nodeSelected.first()

    relationship = Relationship(post, "POSTED_IN", weekday, month=str(row['Post Month']), hour=str(row['Post Hour']))
    transaction.create(relationship)
    
    comment = Node("Comment", number  = str(row['comment']))
    transaction.create(comment)
    
    relationship = Relationship(comment, "HAS_COMMENTED", post)
    transaction.create(relationship)
    
    like = Node("Like", number  = str(row['like']))
    transaction.create(like)
    
    relationship = Relationship(like, "HAS_LIKED", post)
    transaction.create(relationship)
    
    share = Node("Share", number  = str(row['share']))
    transaction.create(share)
    
    relationship = Relationship(share, "HAS_SHARED", post)
    transaction.create(relationship)
    
    nodeSelected = selector.select("PageLikes", name="Page Likes")
    returnedList = list(nodeSelected)
    page = nodeSelected.first()
    
    relationship = Relationship(post, "INCREASED_LIKES", page, number=(row['Post total likes']))
    transaction.create(relationship)
    
    transaction.commit()


In [58]:
for i, row in data.iterrows():  
    
    transaction = graph.begin()    
    
    nodeSelected = selector.select("Post", id = str(row['Post id']))
    returnedList = list(nodeSelected)
    post = nodeSelected.first()
    
    nodeSelected = selector.select("PageLikes", name="Page Likes")
    returnedList = list(nodeSelected)
    page = nodeSelected.first()
    actualTotalLikes = page.get("total")
    
    newTotalLikes = actualTotalLikes +int(row['Page total likes'])
    graph.merge(page)
    page['total'] = newTotalLikes
    Node.push(page)
    
    relationship = Relationship(post, "INCREASED_LIKES", page, number=(row['Page total likes']))
    transaction.create(relationship)
    
    transaction.commit()