#Installing neo4j db and related libraries.
#pip install neo4j 
#pip install ipython-cypher


In [1]:
#Import neo4j and other required liberaries.
from neo4j import GraphDatabase
import configparser
import json

In [3]:
# Create a ConfigParser object
config = configparser.ConfigParser()

# Read the configuration file
config.read('config.ini')

# Getting the credentials from the configuration file
uri = config['Neo4j']['uri']
username = config['Neo4j']['username']
password = config['Neo4j']['password']

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))
print("Created driver for neo4j DB. ")

#Function to test the connection to neo4j db
def test_neo4j_connection(driver):
    try:
        with driver.session() as session:
            print(f"Performing a test query (e.g., fetching nodes count from the db)....")
            result = session.run("MATCH (n) RETURN COUNT(n) AS nodeCount")
            record = result.single()
            node_count = record["nodeCount"]
            print(f"Successfully connected to Neo4j db. Total nodes in the database: {node_count}")
    except Exception as e:
        print(f"Error connecting to Neo4j: {str(e)}")

# Testing the connection to neo4j db:
test_neo4j_connection(driver)


Created driver for neo4j DB. 
Performing a test query (e.g., fetching nodes count from the db)....
Successfully connected to Neo4j db. Total nodes in the database: 0


In [4]:
#Function for creating Nodes and Relationships using the json
def create_nodes_and_relationships(tx, data):
    # Extracting the data from the JSON
    _id = data["_id"]
    title = data["title"][0]
    abstract = data["abstract"]
    
    # Creating a node for the PAPER
    tx.run("CREATE (paper:Paper {id: $id, title: $title, abstract: $abstract})", id=_id, title=title, abstract=abstract)
    
    # Creating nodes for AUTHORS and RELATIONSHIPS between AUTHORS and the PAPER
    for author in data["author"]:
        author_name = author["given"] + " " + author["family"]
        affiliation = author["affiliation"][0]["name"]
        tx.run("MERGE (author:Author {name: $name, affiliation: $affiliation})", name=author_name, affiliation=affiliation)
        
        # Creating ORGANIZATION node
        tx.run("MERGE (org:Organization {name: $affiliation})", affiliation=affiliation)

        # Creating the AFFILIATED_WITH relationship between AUTHOR and ORGANIZATION
        tx.run("MATCH (author:Author {name: $name}), (org:Organization {name: $affiliation}) "
           "MERGE (author)-[:AFFILIATED_WITH]->(org)", name=author_name, affiliation=affiliation)
    
        # Creating the AUTHORED_BY relationship between PAPER and AUTHOR
        tx.run("MATCH (paper:Paper {id: $id}), (author:Author {name: $name}) "
           "MERGE (paper)-[:AUTHORED_BY]->(author)", id=_id, name=author_name)

    # Creating nodes for REFERENCES and RELATIONSHIPS between REFERENCES and the PAPER
    for reference in data["reference"]:
        reference_key = reference["key"]
        reference_doi = reference.get("DOI", "N/A")  # Provide a default value if "DOI" is missing
        reference_unstructured = reference["unstructured"]
        tx.run("MERGE (reference:Reference {key: $key, doi: $doi, unstructured: $unstructured})", key=reference_key, doi=reference_doi, unstructured=reference_unstructured)
        tx.run("MATCH (paper:Paper {id: $id}), (reference:Reference {key: $key}) "
               "CREATE (paper)-[:CITES]->(reference)", id=_id, key=reference_key)


In [5]:
# Loading the JSON data from the Sample.json file
with open('Sample.json', 'r') as json_file:
    data = json.load(json_file)
print("Finshed json loading ")

# Creating a session to execute the Cypher queries for creating nodes and relationships to neo4j db
with driver.session() as session:
    print("Starting data insertion to neo4j db nodes and relationships from json")
    session.write_transaction(create_nodes_and_relationships, data)
    print("Finished data insertion")

{'_id': '10.1515/jaiscr-2018-0009', 'abstract': '<jats:title>Abstract</jats:title>\n               <jats:p>Real-life time series have complex and non-linear structures. Artificial Neural Networks have been frequently used in the literature to analyze non-linear time series. High order artificial neural networks, in view of other artificial neural network types, are more adaptable to the data because of their expandable model order. In this paper, a new recurrent architecture for Pi-Sigma artificial neural networks is proposed. A learning algorithm based on particle swarm optimization is also used as a tool for the training of the proposed neural network. The proposed new high order artificial neural network is applied to three real life time series data and also a simulation study is performed for Istanbul Stock Exchange data set.</jats:p>', 'URL': 'http://dx.doi.org/10.1515/jaiscr-2018-0009', 'resource': {'primary': {'URL': 'https://www.sciendo.com/article/10.1515/jaiscr-2018-0009'}},

  session.write_transaction(create_nodes_and_relationships, data)


In [6]:
#Function for calculating the 'Number of Articles' --> Task 1.2
def get_number_of_articles(driver):
    with driver.session() as session:
        result = session.run("MATCH (p:Paper) RETURN count(p) as num_articles")
        num_articles = result.single()["num_articles"]
    return num_articles

num_articles = get_number_of_articles(driver)
print(f"Number of Articles: {num_articles}")

Number of Articles: 1


In [7]:
#Function for calculating the 'Number of Organizations (Deduplicated Affiliations)'--> Task 1.2
def get_number_of_organizations(driver):
    with driver.session() as session:
        result = session.run("MATCH (a:Author)-[:AFFILIATED_WITH]->(o:Organization) "
                             "RETURN count(DISTINCT o) as num_organizations")
        num_organizations = result.single()["num_organizations"]
    return num_organizations

num_organizations = get_number_of_organizations(driver)
print(f"Number of Organizations (Deduplicated Affiliations): {num_organizations}")

Number of Organizations (Deduplicated Affiliations): 3


In [9]:
# Function to get the 'Number of Researchers' --> Task 1.2
def get_number_of_researchers(driver):
    with driver.session() as session:
        result = session.run(
            "MATCH (a:Author) RETURN count(DISTINCT a) AS num_researchers"
        )
        num_researchers = result.single()["num_researchers"]
    return num_researchers

# Get the number of researchers
num_researchers = get_number_of_researchers(driver)

# Print the result
print(f"Number of Researchers: {num_researchers}")

Number of Researchers: 4


In [55]:
'''def get_author_data(driver):
    with driver.session() as session:
        result = session.run("MATCH (a:Author) RETURN a")
        authors = [record['a'] for record in result]
    return authors


# Retrieve author data
authors = get_author_data(driver)

# Print author data
for author in authors:
    print(author)'''

#To delete all records from the database
'''def delete_all_records(driver):
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")

# Call the function to delete all records
delete_all_records(driver)'''

<Node element_id='4:e74e18a0-0f0c-4631-8a68-fe62c810dfff:1' labels=frozenset({'Author'}) properties={'affiliation': 'Department of Biostatistics, Medical Faculty , Marmara University , Istanbul , Turkey', 'name': 'Esra Akdeniz'}>
<Node element_id='4:e74e18a0-0f0c-4631-8a68-fe62c810dfff:2' labels=frozenset({'Author'}) properties={'affiliation': 'Department of Statistics, Faculty of Arts and Science , Forecast Research Laboratory , Giresun University , Giresun , 28100 , Turkey', 'name': 'Erol Egrioglu'}>
<Node element_id='4:e74e18a0-0f0c-4631-8a68-fe62c810dfff:3' labels=frozenset({'Author'}) properties={'affiliation': 'Department of Statistics, Faculty of Arts and Science , Forecast Research Laboratory , Giresun University , Giresun , 28100 , Turkey', 'name': 'Eren Bas'}>
<Node element_id='4:e74e18a0-0f0c-4631-8a68-fe62c810dfff:4' labels=frozenset({'Author'}) properties={'affiliation': 'Department of Econometrics, Faculty of Economic and Administrative Sciences , Forecast Research Labora