## Reactions Dataset
### Date: February 19, 2025
### Author: Selin Kaplanoglu

**Goal:** We want to use Neo4j and query the Reactome Graph database to retrieve all reactions of pathways.

In [3]:
#imports
from neo4j import GraphDatabase
from itertools import combinations
import random
import logging
import time
import json
import requests
import os
from dotenv import load_dotenv

In [4]:
#seeting up neo4j connection and inititializing the driver
load_dotenv()
uri = os.getenv("URI")  
username = os.getenv("USERNAME")          
password = os.getenv("PASSWORD")  

driver = GraphDatabase.driver(uri, auth=(username, password))


In [3]:
pathway_query = """
MATCH (p:Pathway)
OPTIONAL MATCH (p)-[:hasEvent*]->(r:ReactionLikeEvent)
OPTIONAL MATCH (p)-[:summation]->(s)
RETURN p.displayName AS pathway_name, collect(DISTINCT r.displayName) AS reactions, s.text AS summary
"""

reaction_query = """
MATCH (r:ReactionLikeEvent {stId: $reaction_id})
OPTIONAL MATCH (r)-[:input]->(input:PhysicalEntity)
OPTIONAL MATCH (r)-[:output]->(output:PhysicalEntity)
OPTIONAL MATCH (r)-[:catalystActivity]->(catalyst:CatalystActivity)
OPTIONAL MATCH (r)-[:regulatedBy|regulator]->(regulator:Regulation)
RETURN 
    r.displayName AS reaction_name, 
    collect(DISTINCT input.displayName) AS inputs, 
    collect(DISTINCT output.displayName) AS outputs, 
    collect(DISTINCT catalyst.displayName) AS catalysts,
    collect(DISTINCT regulator.displayName) AS regulators
"""

participant_query = """
MATCH (p:Complex {stId: $participant_id})
OPTIONAL MATCH (p)-[:hasComponent|hasMember|hasCandidate*]->(component:PhysicalEntity)
RETURN p.displayName AS complex_name, collect(DISTINCT component.displayName) AS components
"""

combined_results = []

with driver.session() as session:
    pathway_result = session.run(pathway_query)

    for record in pathway_result:
        pathway_name = record["pathway_name"]
        reactions = record["reactions"]
        summary = record["summary"] if record["summary"] else ""

        pathway_data = {
            "pathway_name": pathway_name,
            "summary": summary.replace('\u00a0', ' '),
            "reactions": []
        }

        for reaction_name in reactions:
            reaction_result = session.run(reaction_query, {"reaction_id": reaction_name})

            reaction_data = {
                "reaction_name": reaction_name,
                "inputs": [],
                "outputs": [],
                "catalysts": [],
                "regulators": []
            }

            for rec in reaction_result:
                reaction_data["inputs"].extend(rec["inputs"])
                reaction_data["outputs"].extend(rec["outputs"])
                reaction_data["catalysts"].extend(rec["catalysts"])
                reaction_data["regulators"].extend(rec["regulators"])

            pathway_data["reactions"].append(reaction_data)

        combined_results.append(pathway_data)

driver.close()

with open('pathway_reactions_output.json', 'w') as outfile:
    json.dump(combined_results, outfile, indent=4)

print("Data successfully written to pathway_reactions_output.json")


Data successfully written to pathway_reactions_output.json
