# Indexing data in Neo4j


In [None]:
#Shell command line in the jupyter
!jupyter nbconvert --to python connection_neo4j.ipynb

In [None]:
from py2neo import Graph, Node, Relationship, NodeSelector
from connection_neo4j import graph
import pandas as pd
import numpy as np

## Accessing Dataset Processed

In [None]:
data = pd.read_csv('../data/dataset_Facebook_processed.csv', sep=";")
data.columns

## Creating nodes
There are some import nodes that may be created once, for example:
* The weekdays (Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday)
* Page Likes
* Posts
* Comments
* Likes
* Shares

In [None]:
weekdaysList = ['Sunday','Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

print("Creating Weekdays...")

for i in weekdaysList:
    query  = "CREATE (n: Weekday {name: '" + i +"'}) RETURN n.name"
    result = graph.data(query)
    print(result)
    

In [None]:
weekdaysList = ["Action","Product","Inspiration"]

print("Creating Categories...")

for i in weekdaysList:
    query  = "CREATE (n: Category {name: '" + i +"'}) RETURN n.name"
    result = graph.data(query)
    print(result)
    
    

In [None]:
print("Creating Page Likes...")
query  = "CREATE (n: PageLikes{name: 'Page Likes', total: 0}) return n.name"
result = graph.data(query)
print(result)

In [None]:
print("Indexing data in Neo4j... ")
selector = NodeSelector(graph)

for i, row in data.iterrows():  
    transaction = graph.begin()    
    
    ############## Post Node ##############
    post = Node("Post",type = str(row['Type']), id = str(row['Post id']) )
    transaction.create(post)
    
    ############## Category Node ##############
    nodeSelected = selector.select("Category", name=str(row['Category']))
    returnedList = list(nodeSelected)
    category = nodeSelected.first()
    
    ############## Relationship (Post) - [:IS_ABOUT] -> (Category) ##############
    relationship = Relationship(post, "IS_ABOUT", category)
    transaction.create(relationship)
    
    ############## Weekday Node ##############
    nodeSelected = selector.select("Weekday", name=str(row['Post Weekday']))
    returnedList = list(nodeSelected)
    weekday = nodeSelected.first()

    ############## Relationship (Post) - [:POSTED_IN] -> (Weekday) ##############
    relationship = Relationship(post, "POSTED_IN", weekday, month=str(row['Post Month']), hour=str(row['Post Hour']))
    transaction.create(relationship)
    
    ############## Comment Node ##############
    comment = Node("Comment", number  = str(row['comment']))
    transaction.create(comment)
    
    ############## Relationship (Comment) - [:HAS_COMMENTED] -> (Post) ##############
    relationship = Relationship(comment, "HAS_COMMENTED", post)
    transaction.create(relationship)
    
    ############## Like Node ##############
    like = Node("Like", number  = str(row['like']))
    transaction.create(like)
    
    ############## Relationship (Like) - [:HAS_LIKED] -> (Post) ##############
    relationship = Relationship(like, "HAS_LIKED", post)
    transaction.create(relationship)
    
    ############## Share Node ##############
    share = Node("Share", number  = str(row['share']))
    transaction.create(share)
    
    ############## Relationship (Share) - [:HAS_SHARED] -> (Post) ##############
    relationship = Relationship(share, "HAS_SHARED", post)
    transaction.create(relationship)
    
    ############## PageLikes Node ##############
    nodeSelected = selector.select("PageLikes", name="Page Likes")
    returnedList = list(nodeSelected)
    page = nodeSelected.first()
    
    ############## Relationship (Post) - [:INCREASED_LIKES] -> (PageLikes) ##############
    relationship = Relationship(post, "INCREASED_LIKES", page, number=(row['Increase likes']))
    transaction.create(relationship)
    
    transaction.commit()
print("Indexing end!")

## Updating Total Likes - Node PageLikes

In [None]:
nodeSelected = selector.select("PageLikes", name="Page Likes")
returnedList = list(nodeSelected)
page = nodeSelected.first()

graph.merge(page)
#### Getting the last Total Likes (from dataset) ####
page['total'] = int(data['Page total likes'].iloc[len(data['Page total likes'])-1])
graph.push(page)

print("TotalLikes Node Updated!")