# Neo4j graph from xlsx document

## Input

- xlsx file
    - Nodes are taken from the columns:
        - "entity_starting" 
        - "entity_ending"
    - the relationships are taken from the columns labeled: 
        - "relationship"


## Output

#### An Onthologic Knowledge Graph having all nodes and relationships specified in the document

## Importing document as dataFrame

In [1]:
import pandas
from py2neo import Graph,Subgraph,Node,Relationship

# document path
path = "/home/luca/Documents/ontologia.xlsx"
path2 = "/home/luca/Documents/tokens.xlsx"

dataFrame = pandas.read_excel(path, na_values=['NA'])
dataFrame2 = pandas.read_excel(path2, na_values=['NA'])

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [3]:
dataFrame3 = dataFrame.groupby(dataFrame.columns.tolist(),as_index=False).size()
dataFrame3.head(4)

entity_starting  relationship    entity_ending  type_of_relationship
application      affects         user           influence               1
                 by integrating  information    usage                   1
                 concerning      requirement    influence               1
                 determines      purpose        intention               1
dtype: int64

## Importing graph

In [4]:
graph = Graph("bolt://localhost:7687", user="neo4j", password="graph")

## Iterating through values, saving nodes in a dictionary to avoid multiple occurrences

In [5]:
graph.delete_all()

In [6]:
# Auxiliary data structures in order to avoid multiple occurrences of the same nodes
classes = dict()
relationships = set()
relationships2=set()
tokens = dict()

### Adding Class nodes into dictionary

In [7]:
for index, row in dataFrame.iterrows():
    starter = Node("Class", type=row['entity_starting'], name=str.capitalize(row['entity_starting']))
    ender = Node("Class",type=row["entity_ending"],name=str.capitalize(row['entity_ending']))
    classes.update({row["entity_starting"]:starter})
    classes.update({row["entity_ending"]:ender})

### Adding relationships to set, pushing data into graph

In [8]:
for index, row in dataFrame.iterrows():
    doc = nlp(row["relationship"])
    relationship = Relationship(classes.get(row["entity_starting"]),
                                str.upper(doc[0:].lemma_).replace(" ","_"),
                                classes.get(row["entity_ending"]),
                                type=str.capitalize(row["type_of_relationship"]),
                                name=str.capitalize(row["relationship"]))
    relationships.add(relationship)

for relationship in relationships:
    graph.create(relationship)

### Adding Instance nodes to dictionary

In [9]:
for index, row in dataFrame2.iterrows():
    token = Node("Token", type=str.lower(row['class']), name=str.capitalize(row['token']))
    tokens.update({row["token"]:token})

### Adding hierarchy relationship to set

In [10]:
for key in tokens:
    starter = tokens[key]
    ender = classes.get(dict(starter).get("type"))
    relationship = Relationship(starter,"INSTANCE_OF",ender,type="hierarchy")
    relationships2.add(relationship)

### Pushing relationships into graph

In [11]:
for relationship in relationships2:
    graph.create(relationship)