## Reactions Dataset
### Date: February 19, 2025
### Author: Selin Kaplanoglu

**Goal:** We want to use Neo4j and query gene-gene interactions to get information on them

In [2]:
#imports
from neo4j import GraphDatabase
from itertools import combinations
import random
import logging
import time
import json
import requests
import os
from dotenv import load_dotenv

In [3]:
#seeting up neo4j connection and inititializing the driver
load_dotenv()
uri = os.getenv("URI")  
username = os.getenv("USERNAME")          
password = os.getenv("PASSWORD")  

driver = GraphDatabase.driver(uri, auth=(username, password))


In [4]:
import json

print("Executing Neo4j query...")

gene_query = """
MATCH (n)-[:referenceDatabase]->(rd:ReferenceDatabase)
WHERE toLower(rd.displayName) = toLower("ENSEMBL")
WITH DISTINCT n  
MATCH (pe:PhysicalEntity)-[:referenceEntity]->(n)
WITH DISTINCT pe 
MATCH (rle:ReactionLikeEvent)
    OPTIONAL MATCH (rle)-[:input]->(input:PhysicalEntity)
    OPTIONAL MATCH (rle)-[:output]->(output:PhysicalEntity)
    OPTIONAL MATCH (rle)-[:catalystActivity]->(catalyst:CatalystActivity)
    OPTIONAL MATCH (rle)-[:regulatedBy|regulator]->(regulator:Regulation)
MATCH (:Species{taxId:"9606"})<-[:species]-(p:Pathway)-[:hasEvent]->(rle)
WITH DISTINCT pe, rle, input, output, catalyst, regulator, p
RETURN pe.displayName AS gene_name,
       p.displayName AS pathway_name, 
       collect(DISTINCT rle.displayName) AS reactions,
       collect(DISTINCT input.displayName) AS inputs, 
       collect(DISTINCT output.displayName) AS outputs, 
       collect(DISTINCT catalyst.displayName) AS catalysts,
       collect(DISTINCT regulator.displayName) AS regulators,
ORDER BY gene_name, pathway_name
"""

genes_results_dict = {}

with driver.session() as session:
    try:
        # Run the query
        genes_results = session.run(gene_query)

        # Debug: Log each record one by one
        with open('debug_log.txt', 'a') as log_file:
            for record in genes_results:
                log_file.write(f"Record: {dict(record)}\n")  # Log each record as it is processed
                
                gene_name = record.get("gene_name", "Unknown")
                
                pathway_data = {
                    "pathway_name": record.get("pathway_name", "Unknown"),
                    "reactions": record.get("reactions", []),
                    "inputs": record.get("inputs", []),
                    "outputs": record.get("outputs", []),
                    "catalysts": record.get("catalysts", []),
                    "regulators": record.get("regulators", []),
                    "diseases": record.get("diseases", [])
                }

                if gene_name in genes_results_dict:
                    genes_results_dict[gene_name]["pathways"].append(pathway_data)
                else:
                    genes_results_dict[gene_name] = {
                        "gene_name": gene_name,
                        "pathways": [pathway_data]
                    }

    except Exception as e:
        print(f"Error executing query: {e}")  # Print error message

# Convert dictionary to list for JSON output
genes_results_list = list(genes_results_dict.values())

# Debug: Check final JSON before saving
print("\nFinal JSON Output:")
print(json.dumps(genes_results_list, indent=4))

# Save results to JSON
with open('genes_output3.json', 'w') as outfile:
    json.dump(genes_results_list, outfile, indent=4)

print("JSON file saved successfully: genes_output3.json")


Executing Neo4j query...


KeyboardInterrupt: 

In [None]:
# Retrieve all the interacttions that exist within the graph database.
interaction_query = """
MATCH (n:Interaction)-[:interactor]->(p) 
RETURN 
    n.displayName AS interaction , n.score AS confidence_score,
    collect(DISTINCT{name:p.displayName, function: p.comment}) AS gene_information
"""
interactions_results_list = []
with driver.session() as session:
    results = session.run(interaction_query)
    for record in results:
        interactions_results_list.append({
            "interaction": record["interaction"],
            "confidence_score": record["confidence_score"],
            "gene_information": record["gene_information"]
        })
    
driver.close()

with open('interactions_output.json', 'w') as outfile:
    json.dump(interactions_results_list, outfile, indent=4)

print("Data successfully written to interactions_output.json")