********************* Task 2 *********************

In [1]:
#Import neo4j and other required liberaries.
from neo4j import GraphDatabase
import configparser
import json
import logging

#Setting initial logging level to INFO
logging.basicConfig(filename='researchGraph.log',level=logging.INFO) 
# Creating ConfigParser object
config = configparser.ConfigParser()

# Reading the configuration file
config.read('config.ini')

# Getting the credentials from the configuration file
uri = config['Neo4j']['uri']
username = config['Neo4j']['username']
password = config['Neo4j']['password']

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))
logging.info("Created driver for Neo4j DB.")

#Function to test the connection to neo4j db
def test_neo4j_connection(driver):
    try:
        with driver.session() as session:
            print(f"Performing a test query (e.g., fetching nodes count from the db)....")
            logging.info(f"Performing a test query (e.g., fetching nodes count from the db)....")
            result = session.run("MATCH (n) RETURN COUNT(n) AS nodeCount")
            record = result.single()
            node_count = record["nodeCount"]
            logging.info(f"Successfully connected to Neo4j db. Total nodes in the database: {node_count}")
            print(f"Successfully connected to Neo4j db. Total nodes in the database: {node_count}")
    except Exception as e:
        logging.error(f"Error connecting to Neo4j due to error: {str(e)}")

# Testing the connection to neo4j db:
test_neo4j_connection(driver)

Performing a test query (e.g., fetching nodes count from the db)....
Successfully connected to Neo4j db. Total nodes in the database: 0


In [2]:
#Function for creating Nodes and Relationships using the json
def create_nodes_and_relationships(tx, data):
    # Extracting the data from the JSON
    _id = data["_id"]
    title = data["title"][0]
    abstract = data["abstract"]
    
    # Creating a node for the PAPER
    tx.run("CREATE (paper:Paper {id: $id, title: $title, abstract: $abstract})", id=_id, title=title, abstract=abstract)
   
    # Creating nodes for AUTHORS and RELATIONSHIPS between AUTHORS and the PAPER
    for author in data["author"]:
        author_name = author["given"] + " " + author["family"]
        affiliation = author["affiliation"][0]["name"]
    
    # Create or merge the Author node
        tx.run("MERGE (author:Author {name: $name})", name=author_name)

    # Create or merge the Organization node
        tx.run("MERGE (org:Organization {name: $affiliation})", affiliation=affiliation)

    # Create the AFFILIATED_WITH relationship between AUTHOR and ORGANIZATION
        tx.run("MATCH (author:Author {name: $name}), (org:Organization {name: $affiliation}) "
             "MERGE (author)-[:AFFILIATED_WITH]->(org)", name=author_name, affiliation=affiliation)

    # Create the AUTHORED_BY relationship between PAPER and AUTHOR
        tx.run("MATCH (paper:Paper {id: $id}), (author:Author {name: $name}) "
            "MERGE (paper)-[:AUTHORED_BY]->(author)", id=_id, name=author_name)

# Creating nodes for REFERENCES and RELATIONSHIPS between REFERENCES and the PAPER
    for reference in data["reference"]:
        reference_key = reference["key"]
        reference_doi = reference.get("DOI", "N/A")  # Provide a default value if "DOI" is missing
        reference_unstructured = reference["unstructured"]
        tx.run("MERGE (reference:Reference {key: $key, doi: $doi, unstructured: $unstructured})", key=reference_key, doi=reference_doi, unstructured=reference_unstructured)
        tx.run("MATCH (paper:Paper {id: $id}), (reference:Reference {key: $key}) "
            "CREATE (paper)-[:CITES]->(reference)", id=_id, key=reference_key)


In [3]:
# Loading the JSON data from the Sample.json file
with open('Sample.json', 'r') as json_file:
    data = json.load(json_file)
logging.info("Finshed json loading.")

# Creating a session to execute the Cypher queries for creating nodes and relationships to neo4j db
with driver.session() as session:
    logging.info("Starting data insertion to neo4j db nodes and relationships from json.")
    session.write_transaction(create_nodes_and_relationships, data)
    logging.info("Finished data insertion.")
    print("Finished data insertion.")

Finished data insertion.


  session.write_transaction(create_nodes_and_relationships, data)


Function to get the top 10 organizations with highest degree of centrality

In [6]:
# Function to get the top 10 organizations with highest degree of centrality
def get_top_10_organizations_by_centrality(driver):
    with driver.session() as session:
        try:
            result = session.run("""
            MATCH (n:Author)-[:AFFILIATED_WITH]-(o:Organization)
            WITH o, COUNT(n) AS degree
            ORDER BY degree DESC
            LIMIT 10
            RETURN o.name AS organization, degree;
            """)
        except Exception as e:
            print(f"Error while executing Cypher query. Details are as follow: {str(e)}")
        # Retrieve and return the results as a list of dictionaries
        top_organizations = [record for record in result]
        return top_organizations

# Get the top 10 organizations by degree centrality
top_organizations = get_top_10_organizations_by_centrality(driver)

for org in top_organizations:
    print(f"Organization: {org['organization']} => Degree of Centrality: {org['degree']}")


Organization: Department of Statistics, Faculty of Arts and Science , Forecast Research Laboratory , Giresun University , Giresun , 28100 , Turkey => Degree of Centrality: 2
Organization: Department of Biostatistics, Medical Faculty , Marmara University , Istanbul , Turkey => Degree of Centrality: 1
Organization: Department of Econometrics, Faculty of Economic and Administrative Sciences , Forecast Research Laboratory , Giresun University , Giresun , 28100 , Turkey => Degree of Centrality: 1


Function to get the top 10 researchers with highest degree of centrality

In [7]:
# Function to get the top 10 researchers with highest degree of centrality
def get_top_10_researchers_by_centrality(driver):
    with driver.session() as session:
        try:
            result = session.run("""
            MATCH (a:Author)-[:AFFILIATED_WITH]-(o:Organization)
            WITH a, COUNT(o) AS degree
            ORDER BY degree DESC
            LIMIT 10
            RETURN a.name AS author, degree;
            """)
        except Exception as e:
            print(f"Error while executing Cypher query. Details are as follow {str(e)}")
        # Retrieve and return the results as a list of dictionaries
        top_researchers = [record for record in result]
        return top_researchers

# Get the top 10 researchers by degree centrality
top_researchers = get_top_10_researchers_by_centrality(driver)

for author in top_researchers:
    print(f"Author: {author['author']} => Degree of Centrality: {author['degree']}")

Author: Esra Akdeniz => Degree of Centrality: 1
Author: Erol Egrioglu => Degree of Centrality: 1
Author: Eren Bas => Degree of Centrality: 1
Author: Ufuk Yolcu => Degree of Centrality: 1
