In [None]:
'''
author: Irsyad Adam
'''

In [None]:
import requests
import pandas as pd

def extract_pathway(unid) -> str:
    """
    grabs all reactome pathways associated with protein
    @param id is the uniprot id that is going to be grabbed
    @return is a list with all of the reactome pathways
    """
    #get the url
    url = 'https://www.uniprot.org/uniprot/' + unid + '.txt'

    #check the response
    response = requests.get(url=url)

    #if successful
    if response.status_code == 200:
        pathway_list = []
        #new line delimiter
        response = response.text.splitlines()
        #searcg for pathways
        for i in range(len(response)):
            #if found
            if 'Reactome;' in response[i]:
                #process string
                pathway = response[i]
                pathway = str(pathway.split()[2][:-1])
                pathway_list.append(pathway)
        return pathway_list

    #if not successful
    else:
        #get error
        print('Error, Status Code:' % response.status_code)


def add_pathways(df) -> any:
    """
    appends reactome pathways to df
    @param df is dataframe with uniprot id
    @return df is df with newly added reactome column
    """
    pathway = []
    for i in df['protein']:
        result = extract_pathway(i)
        pathway.append(result)
    df['pathway'] = pathway
    return df


In [None]:
#grab pathways (80s)
caseolap_df = pd.read_csv('caseolap.csv')
caseolap_df = add_pathways(caseolap_df)

In [None]:
caseolap_df

In [None]:
from neo4j import GraphDatabase

#use your own default url
url = "neo4j://localhost:11007"

#username is neo4j, password is your own password
driver = GraphDatabase.driver(url, auth=("neo4j", "Ima62186Ima@"))

#confirms the connection
def confirmation_query(tx) -> str:
    """
    @param tx is the session that calls the function
    @return result is the string containing the query data
    """
    result = tx.run("CALL db.info()")
    return result.single()

def server_confirmation() -> str:
    """
    @return result is the string containing the info
    """
    with driver.session() as session:
        result = session.write_transaction(confirmation_query)
        return result

server_info = server_confirmation()

print("server info: \n", server_info, '\n', type(server_info))



In [None]:
from neo4j import GraphDatabase
import pandas as pd

def edit_entity_count(text = 'entitycount.txt') -> pd.DataFrame:
    '''
    turns entitycount.txt to a pandas df
    @param text is the entitycount
    @return is a df that returns the pmid and unid columns
    '''
    #read data
    data = open(text, 'r')
    data = [line.strip('\n') for line in data.readlines()]
    for i in range(len(data)):
        data[i] = data[i].split()

    #get pmids and unid
    pmid = [element[0] for element in data]
    unid = [[id for id in element[1:]] for element in data]

    #process unids
    for i in range(len(unid)):
        for j in range(len(unid[i])):
            #find the '|' character
            index = unid[i][j].find('|')

            #slice everything after that index
            unid[i][j] = unid[i][j][0:index]

    #load in everything into a dataframe
    df = {'pmid' : pmid, 'unid' : unid}
    df = pd.DataFrame(df)
    return df


#Have caseolap.csv, drug_category_target.csv, entitycount.csv in the imports folder
class graph_creation:
    def __init__(self, caseolap_df) -> None:
        self.driver = GraphDatabase.driver("neo4j://localhost:11007", auth=("neo4j", "Ima62186Ima@"))
        self.pmid_df = edit_entity_count()
        self.df = []
        self.caseolap_df = caseolap_df
        

    def close(self) -> None:
        self.driver.close()

    ###################################################################################################
    #constraints
    @classmethod
    def create_drug_contraints(cls, tx) -> None:
        """
        drug constraint query
        @param cls is the class
        @param tx is the transaction
        @returns none
        """
        query = ("CREATE CONSTRAINT ON (d:Drug) ASSERT d.name IS UNIQUE")
        tx.run(query)


    @classmethod
    def create_category_contraints(cls, tx) -> None:
        """
        category constraint query
        @param cls is the class
        @param tx is the transaction
        @returns none
        """
        query = ("CREATE CONSTRAINT ON (e:Category) ASSERT e.category IS UNIQUE")
        result = tx.run(query)


    @classmethod
    def create_protein_contraints(cls, tx) -> None:
        """
        protein constraint query
        @param cls is the class
        @param tx is the transaction
        @returns none
        """
        query = ("CREATE CONSTRAINT ON (p:Protein) ASSERT p.protein IS UNIQUE")
        result = tx.run(query)

    @classmethod
    def create_pathway_contraints(cls, tx) -> None:
        """
        protein constraint query
        @param cls is the class
        @param tx is the transaction
        @returns none
        """
        query = ("CREATE CONSTRAINT ON (a:Pathway) ASSERT a.ReactomeID IS UNIQUE")
        result = tx.run(query)

    @classmethod
    def create_root_contraints(cls, tx) -> None:
        """
        root constraint query
        @param cls is the class
        @param tx is the transaction
        @returns none
        """
        query = ("CREATE CONSTRAINT ON (a:CVD_Drugs) ASSERT a.name IS UNIQUE")
        result = tx.run(query)

    @classmethod
    def create_pmid_contraints(cls, tx) -> None:
        """
        pmid constraint query
        @param cls is the class
        @param tx is the transaction
        @returns none
        """
        query = ("CREATE CONSTRAINT ON (a:PMID) ASSERT a.id IS UNIQUE")
        result = tx.run(query)


    def run_constraints(self) -> None:
        """
        run all class constraints
        @param self
        @return none
        """
        self.driver.session().write_transaction(self.create_category_contraints)
        self.driver.session().write_transaction(self.create_drug_contraints)
        self.driver.session().write_transaction(self.create_protein_contraints)
        self.driver.session().write_transaction(self.create_pathway_contraints)
        self.driver.session().write_transaction(self.create_root_contraints)
        self.driver.session().write_transaction(self.create_pmid_contraints)

    ###############################################################################################
    #root node deployment

    @classmethod
    def create_cvd_node(cls, tx) -> None:
        '''
        query to create main node
        @param cls is the class
        @param tx is the transaction
        @returns none
        '''
        query = ("CREATE (d: CVD_Drugs {name : 'CVD Drugs'})")
        result = tx.run(query)

    def run_cvd_node(self) -> None:
        """
        create root cvd node
        @param self
        @return none
        """
        self.driver.session().write_transaction(self.create_cvd_node)

    ###############################################################################################
    #protein pathway deployment

    @classmethod
    def create_proteins(cls, tx, protein, CM, ARR, CHD, VD, IHD, CCS, VOO, OHD) -> None:
        """
        creates proteins from caseolap csv
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("CREATE (:Protein {protein: $protein, CM: $CM, ARR: $ARR, CHD: $CHD, VD: $VD, IHD: $IHD, CCS: $CCS, VOO: $VOO, OHD: $OHD})")
        tx.run(query, protein = protein, CM = CM, ARR = ARR, CHD = CHD, VD = VD, IHD = IHD, CCS = CCS, VOO = VOO, OHD = OHD)

    @classmethod
    def create_pathways(cls, tx, pathway) -> None:
        """
        creates pathways from caseolap csv
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("MERGE (:Pathway {ReactomeID: $pathway})")
        tx.run(query, pathway = pathway)

    def run_proteins_pathways(self) -> None:
        """
        run the protein and pathway creation
        @param self
        @return none
        """
        for i in range(len(caseolap_df['protein'])):
            self.driver.session().write_transaction(self.create_proteins, self.caseolap_df.iloc[i]['protein'], self.caseolap_df.iloc[i]['CM'], self.caseolap_df.iloc[i]['ARR'], self.caseolap_df.iloc[i]['CHD'], self.caseolap_df.iloc[i]['VD'], self.caseolap_df.iloc[i]['IHD'], self.caseolap_df.iloc[i]['CCS'], self.caseolap_df.iloc[i]['VOO'], self.caseolap_df.iloc[i]['OHD'])

        for i in range(len(self.caseolap_df['protein'])):
            for j in range(len(self.caseolap_df['pathway'][i])):
                self.driver.session().write_transaction(self.create_pathways, self.caseolap_df['pathway'][i][j])
            
    ###############################################################################################
    #drug, category node deployment

    @classmethod
    def create_drug_categories(cls, tx) -> None:
        """
        drug and category node creation
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("LOAD CSV WITH HEADERS FROM 'file:///drug-category-target.csv' AS row "
                 "MERGE (d:Drug {name: row.Name}) "
                 "MERGE (c:Category {category: row.Category})")
        tx.run(query)

    
    def run_drug_categories(self) -> None:
        """
        run drug and category deployment
        @param self
        @return none
        """
        self.driver.session().write_transaction(self.create_drug_categories)

    ###############################################################################################
    #pmid node deployment

    @classmethod
    def create_pmid(cls, tx, pmid) -> None:
        '''
        query to run pmids
        @param cls is class
        @param tx is transaction used for in neo4j library
        @param pmid is the pmid number
        '''
        query = ('MERGE (p:PMID {id: $pmid})')
        tx.run(query, pmid = pmid)


    def run_pmid(self) -> None:
        '''
        deploys the pmids
        @param self
        @return none
        '''
        #iterate through the pmids
        for i in range(len(self.pmid_df['pmid'])):
            self.driver.session().write_transaction(self.create_pmid, self.pmid_df['pmid'][i])

    ###############################################################################################
    #data preprocessing

    @classmethod
    def get_protein_list(cls, tx) -> any:
        """
        get list of proteins for each drug
        @param self
        @return a data table
        """
        query = ("LOAD CSV WITH HEADERS FROM 'file:///drug-category-target.csv' AS row "
                 "RETURN row.Name as name, row.Category AS category, row.UniprotID AS UniProtID")
        result = tx.run(query)
        return result.data()

    def run_protein_list(self) -> None:
        """
        creates a csv from the data
        @param self
        @return None
        """
        #grab the table from the query
        result = self.driver.session().write_transaction(self.get_protein_list)
        #create dataframe
        result = pd.DataFrame(result)

        #create the string to a list
        for element in range(len(result['UniProtID'])):
            result['UniProtID'][element] = str(result['UniProtID'][element]).split(',')
            for i in range(len(result['UniProtID'][element])):
                result['UniProtID'][element][i] = str(result['UniProtID'][element][i]).strip()
        self.df = result

    ###############################################################################################
    #relationships deployment

    @classmethod
    def create_root_category_relationships(cls, tx) -> None:
        """
        query for relationships of the drugs and categories
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("MATCH (d:CVD_Drugs) WHERE d.name = 'CVD Drugs' " 
                 "MATCH (c:Category) "
                 "MERGE (d)-[r:CATEGORY]->(c)")
        tx.run(query)

    @classmethod
    def create_drug_category_relationships(cls, tx, drug, category) -> None:
        """
        query for relationships of the drugs and categories
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("MATCH (d:Drug) WHERE d.name = $drug " 
                 "MATCH (c:Category) WHERE c.category = $category "
                 "MERGE (d)-[r:CANDIDATE_OF]->(c)")
        tx.run(query, drug = drug, category = category)


    @classmethod
    def create_drug_protein_relationships(cls, tx, drug, protein) -> None:
        """
        query for relationships of categories and proteins
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("MATCH (c:Drug) WHERE c.name = $drug "
                 "MATCH (p:Protein) WHERE p.protein = $protein "
                 "MERGE (c)-[r:TARGET]->(p)")
        tx.run(query, drug = drug, protein = protein)

    @classmethod
    def create_protein_pathway_relationships(cls, tx, protein, pathway) -> None:
        """
        query for relationships of categories and proteins
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("MATCH (p:Protein) WHERE p.protein = $protein "
                 "MATCH (a:Pathway) WHERE a.ReactomeID = $pathway "
                 "MERGE (p)-[r:ASSOCIATED_WITH]-(a)")
        tx.run(query, protein = protein, pathway = pathway)

    def run_relationships(self) -> None:
        """
        runs all relationships
        @param self is the object of the class
        """
        #process uniprot ids in drug, category, targets
        self.run_protein_list()
        
        #create root relationship 
        self.driver.session().write_transaction(self.create_root_category_relationships)

        for i in range(len(self.df['category'])):
            #create drug category relationship
            self.driver.session().write_transaction(self.create_drug_category_relationships, self.df['name'][i], self.df['category'][i])
            for j in range(len(self.df['UniProtID'][i])):
                #create drug protein relationship
                self.driver.session().write_transaction(self.create_drug_protein_relationships, self.df['name'][i], self.df['UniProtID'][i][j])

        for i in range(len(self.caseolap_df['protein'])):
            for j in range(len(self.caseolap_df['pathway'][i])):
                #create protein pathway relationship
                self.driver.session().write_transaction(self.create_protein_pathway_relationships, self.caseolap_df['protein'][i], self.caseolap_df['pathway'][i][j])


    ###############################################################################################
    #pmid-proteins relationships deployment

    @classmethod
    def create_protein_pmid_relationships(cls, tx, protein, pmid) -> None:
        """
        query for relationships of pmid and proteins
        @param cls is the class
        @param tx is the transaction
        @return none
        """
        query = ("MATCH (p:Protein) WHERE p.protein = $protein "
                 "MATCH (a:PMID) WHERE a.id = $pmid "
                 "MERGE (p)-[r:PMID_TARGET]->(a)")
        tx.run(query, protein = protein, pmid = pmid)

    def run_pmid_protein_relationships(self) -> None:
        '''
        runs protein pmid relationships
        @param self 
        @return none
        '''
        for i in range(len(self.pmid_df['pmid'])):
            for j in range(len(self.pmid_df['unid'][i])):
                self.driver.session().write_transaction(self.create_protein_pmid_relationships, self.pmid_df['unid'][i][j], self.pmid_df['pmid'][i])


            


In [25]:
graph = graph_creation(caseolap_df)
#deploys the constraints
graph.run_constraints()

In [26]:
graph = graph_creation(caseolap_df)
#deploys the main cardiovascular drug node
graph.run_cvd_node()

In [27]:
graph = graph_creation(caseolap_df)
#deploys the proteins as nodes and pathways as nodes (30s for completion)
graph.run_proteins_pathways()

In [28]:
graph = graph_creation(caseolap_df)
#deploys the drugs and categories
graph.run_drug_categories()

In [29]:
graph = graph_creation(caseolap_df)
#deploys the pmids (120s for completion)
graph.run_pmid()

In [30]:
graph = graph_creation(caseolap_df)
#deploys the pmids-protein relationships (~250s for completion)
graph.run_pmid_protein_relationships()

In [31]:
graph = graph_creation(caseolap_df)
#deploys relationships
graph.run_relationships()
