## Reactions Dataset
### Date: February 19, 2025
### Author: Selin Kaplanoglu

**Goal:** We want to use Neo4j and query gene-gene interactions to get information on them

In [2]:
#imports
from neo4j import GraphDatabase
from itertools import combinations
import random
import logging
import time
import json
import requests
import os
from dotenv import load_dotenv

In [3]:
#seeting up neo4j connection and inititializing the driver
load_dotenv()
uri = os.getenv("URI")  
username = os.getenv("USERNAME")          
password = os.getenv("PASSWORD")  

driver = GraphDatabase.driver(uri, auth=(username, password))


In [None]:
#Query one to get genes and its pathways

pathway_query = """
MATCH (n)-[:referenceDatabase]->(rd:ReferenceDatabase) 
WHERE toLower(rd.displayName) = toLower("ENSEMBL")
WITH DISTINCT n  
MATCH (pe:PhysicalEntity)-[:referenceEntity|referenceSequence|crossReference|referenceGene*]->(n)
WITH DISTINCT pe 
MATCH (:Species{taxId:"9606"})<-[:species]-(p:Pathway)-[:hasEvent]->(rle)
WITH DISTINCT pe, p 
RETURN pe.displayName AS gene_name, 
    p.displayName AS pathway_name
ORDER BY gene_name, pathway_name
"""

genes_results_list = []
with driver.session() as session:
    print("Fetching genes and pathways...")
    genes_results = session.run(pathway_query)

    for record in genes_results:
        genes_results_list.append({
            "gene_name": record["gene_name"],
            "pathway_name": record["pathway_name"]
        })

# Query two to get reactions for each pathway
reaction_query = """
MATCH (:Species{taxId:"9606"})<-[:species]-(p:Pathway)-[:hasEvent]->(rle:ReactionLikeEvent)
WHERE p.displayName = $pathway_name
WITH rle
OPTIONAL MATCH (rle)-[:input]->(input:PhysicalEntity)
OPTIONAL MATCH (rle)-[:output]->(output:PhysicalEntity)
OPTIONAL MATCH (rle)-[:catalystActivity]->(catalyst:CatalystActivity)
OPTIONAL MATCH (rle)-[:physicalEntity]->(physicalEntity:PhysicalEntity)
OPTIONAL MATCH (rle)-[:entityFunctionalStatus]->(entityFunctionalStatus:EntityFunctionalStatus)
OPTIONAL MATCH (rle)-[:diseaseEntity]->(diseaseEntity:Disease)
OPTIONAL MATCH (rle)-[:regulatedBy|regulator]->(regulator:Regulation)
RETURN collect(DISTINCT rle.displayName) AS reactions,
         collect(DISTINCT input.displayName) AS inputs, 
         collect(DISTINCT output.displayName) AS outputs, 
         collect(DISTINCT catalyst.displayName) AS catalysts,
         collect(DISTINCT physicalEntity.displayName) AS physical_entities,
         collect(DISTINCT entityFunctionalStatus.displayName) AS entity_functional_status,
         collect(DISTINCT diseaseEntity.displayName) AS diseases,
         collect(DISTINCT regulator.displayName) AS regulators
"""
for gene_data in genes_results_list:
    pathway_name = gene_data["pathway_name"]

    print(f"Fetching reactions for pathway: {pathway_name}")
    with driver.session() as session:
        reactions_results = session.run(reaction_query, {"pathway_name": pathway_name})

        for record in reactions_results:
            gene_data["reactions"] = record["reactions"]
            gene_data["inputs"] = record["inputs"]
            gene_data["outputs"] = record["outputs"]
            gene_data["catalysts"] = record["catalysts"]
            gene_data["physical_entities"] = record["physical_entities"]
            gene_data["entity_functional_status"] = record["entity_functional_status"]
            gene_data["diseases"] = record["diseases"]
            gene_data["regulators"] = record["regulators"]
driver.close()

# Save final results to JSON
with open('genes_output3.json', 'w') as outfile:
    json.dump(genes_results_list, outfile, indent=4)

print("JSON file saved successfully: genes_output3.json")



In [None]:
# Retrieve all the interacttions that exist within the graph database.
interaction_query = """
MATCH (n:Interaction)-[:interactor]->(p) 
RETURN 
    n.displayName AS interaction , n.score AS confidence_score,
    collect(DISTINCT{name:p.displayName, function: p.comment}) AS gene_information
"""
interactions_results_list = []
with driver.session() as session:
    results = session.run(interaction_query)
    for record in results:
        interactions_results_list.append({
            "interaction": record["interaction"],
            "confidence_score": record["confidence_score"],
            "gene_information": record["gene_information"]
        })
    
driver.close()

with open('interactions_output.json', 'w') as outfile:
    json.dump(interactions_results_list, outfile, indent=4)

print("Data successfully written to interactions_output.json")