# Installs and Imports

## Installs

In [2]:
%pip install neo4j

Collecting neo4j
  Downloading neo4j-4.4.4.tar.gz (90 kB)
     ---------------------------------------- 90.8/90.8 KB 2.5 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pytz
  Downloading pytz-2022.1-py2.py3-none-any.whl (503 kB)
     -------------------------------------- 503.5/503.5 KB 7.8 MB/s eta 0:00:00
Using legacy 'setup.py install' for neo4j, since package 'wheel' is not installed.
Installing collected packages: pytz, neo4j
  Running setup.py install for neo4j: started
  Running setup.py install for neo4j: finished with status 'done'
Successfully installed neo4j-4.4.4 pytz-2022.1
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\nelso\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


## Imports

In [1]:
import logging
import sys

from neo4j import GraphDatabase, basic_auth
from neo4j.exceptions import ServiceUnavailable

# Database connection

URL, username, and password for database connection

In [2]:
# MDB sandbox
url = "<URL for database>"
user = "<Username for database>"
password = "<Password for database>"
driver = GraphDatabase.driver(url, auth=(user, password))

# Functions

## Helper Functions

### Check existence of Term

In [3]:
def check_term_exists(tx, term_val):
  result = tx.run("MATCH (t:term {value: $term_val}) "
                  "RETURN t.value AS term", term_val=term_val)
  if [record["term"] for record in result]:
    return True
  else:
    return False

In [5]:
with driver.session() as session:
  term_exists = session.read_transaction(check_term_exists, "Epithelioma, benign")
  if term_exists:
    print("Term with that value found in DB")
  else:
    print("Term with that value not found in DB")

driver.close()

Term with that value found in DB


### Find existing Concept from Term

In [5]:
def get_concept(tx, term_val):
  concepts = []
  result = tx.run("MATCH (t:term {value: $term_val})-[:represents]->(c:concept) "
                  "RETURN c.nanoid AS concept", term_val=term_val)
  for record in result:
    concepts.append(record["concept"])
  return concepts

In [None]:
with driver.session() as session:
  concepts = session.read_transaction(get_concept, "Epithelioma, benign")
  print(concepts[0])
  
  for concept in concepts:
    print(f"concept: {concept}")
    
driver.close()

### Create new Term

In [6]:
def create_term(tx, term_val):
    tx.run("MERGE (n:term {value: $term_val, origin_name: 'NDC'})",
            term_val=term_val)
    print(f"Created new Term with value: {term_val}")

In [7]:
with driver.session() as session:
    session.write_transaction(create_term, "Epithelioma, benign")
    session.write_transaction(create_term, "Epithelial tumor, benign")
    session.write_transaction(create_term, "Spindle cell carcinoma, NOS")
    session.write_transaction(create_term, "Sarcomatoid Carcinoma")
    session.write_transaction(create_term, "Minimally Invasive Lung Adenocarcinoma")
    session.write_transaction(create_term, "Undifferentiated Carcinoma")
driver.close()

Created new Term with value: Epithelioma, benign
Created new Term with value: Epithelial tumor, benign
Created new Term with value: Spindle cell carcinoma, NOS
Created new Term with value: Sarcomatoid Carcinoma
Created new Term with value: Minimally Invasive Lung Adenocarcinoma
Created new Term with value: Undifferentiated Carcinoma


### Generate unique Concept nanoid

In [8]:
# generate nanoid
import random
import string

def generate_nanoid():
    valid_chars = string.ascii_letters + string.digits
    nanoid = ''.join(random.choice(valid_chars) for i in range(6))
    return nanoid

In [9]:
def generate_unique_nanoid(tx):
    nanoid = generate_nanoid()
    result = tx.run("MATCH (n {nanoid: $nanoid}) "
                    "RETURN n.nanoid", nanoid=nanoid)
    if not [record["n.nanoid"] for record in result]:
        #print(nanoid)
        return nanoid
    else:
        generate_unique_nanoid(tx)

In [81]:
with driver.session() as session:
  session.read_transaction(generate_unique_nanoid)
driver.close()

hjpBQV


### Create Concept

In [10]:
# create concept
def create_concept(tx, concept_nanoid):    
    tx.run("MERGE (n:concept {nanoid: $concept_nanoid})", 
            concept_nanoid=concept_nanoid)
    print(f"Created new Concept with nanoid: {concept_nanoid}")

In [12]:
with driver.session() as session:
    id_1 = session.read_transaction(generate_unique_nanoid)
    session.write_transaction(create_concept, id_1)
    id_2 = session.read_transaction(generate_unique_nanoid)   
    session.write_transaction(create_concept, id_2)
driver.close()

Created new Concept with nanoid: H6X2Ph
Created new Concept with nanoid: p2IbFv


### Link Term and Concept

In [14]:
# link term and concept
def create_relationship(tx, term_val, concept_nanoid):
        tx.run("MATCH (t:term {value: $term_val}), "
                "(c:concept {nanoid: $concept_nanoid}) "
                "MERGE (t)-[r:represents]->(c) ", 
                term_val=term_val, 
                concept_nanoid=concept_nanoid)
        print(f"{term_val} Term now represents {concept_nanoid} Concept")

In [15]:
with driver.session() as session:
    session.write_transaction(create_relationship, "Epithelioma, benign", id_1)
    session.write_transaction(create_relationship, "Epithelial tumor, benign", id_1)
    session.write_transaction(create_relationship, "Minimally Invasive Lung Adenocarcinoma", id_2)
driver.close() 

Epithelioma, benign Term now represents H6X2Ph Concept
Epithelial tumor, benign Term now represents H6X2Ph Concept
Minimally Invasive Lung Adenocarcinoma Term now represents p2IbFv Concept


## Link Two Terms

In [17]:
def link_two_terms(term_val_1: str, term_val_2: str) -> None:
    """
    Link two Term nodes in the MDB via a Concept node.

    This function takes two synonymous Term values as input strings and 
    ensures they are present in the MDB and connected to each other via 
    a Concept node and a 'represents' relationship.
    """

    with driver.session() as session:

        term_1_exists = session.read_transaction(check_term_exists, term_val_1)
        term_2_exists = session.read_transaction(check_term_exists, term_val_2)

        if term_1_exists and term_2_exists:
            term_1_concepts = session.read_transaction(get_concept, term_val_1)
            term_2_concepts = session.read_transaction(get_concept, term_val_2)
            
            # Terms are already connected by a Concept
            if not set(term_1_concepts).isdisjoint(set(term_2_concepts)):
                existing_concept = set(term_1_concepts).intersection(set(term_2_concepts))             
                print(f"{term_val_1} and {term_val_2} are already connected via Concept {list(existing_concept)[0]}")

            # Terms are not already connected by a Concept
            elif set(term_1_concepts).isdisjoint(set(term_2_concepts)):                               
                new_nanoid = session.read_transaction(generate_unique_nanoid)
                session.write_transaction(create_concept, new_nanoid)
                session.write_transaction(create_relationship, term_val_1, new_nanoid)
                session.write_transaction(create_relationship, term_val_2, new_nanoid)

        elif term_1_exists or term_2_exists:
            if term_1_exists:
                existing_term = term_val_1
                new_term = term_val_2
            else:
                existing_term = term_val_2
                new_term = term_val_1

            existing_term_concepts = session.read_transaction(get_concept, existing_term)

            if existing_term_concepts:                
                session.write_transaction(create_term, new_term)
                existing_concept_nanoid = existing_term_concepts[0]
                session.write_transaction(create_relationship, new_term, existing_concept_nanoid)

            else:      
                new_nanoid = session.read_transaction(generate_unique_nanoid)
                session.write_transaction(create_concept, new_nanoid)
                session.write_transaction(create_term, new_term)
                session.write_transaction(create_relationship, existing_term, new_nanoid)
                session.write_transaction(create_relationship, new_term, new_nanoid)

        else:
            session.write_transaction(create_term, term_val_1)
            session.write_transaction(create_term, term_val_2)
            new_nanoid = session.read_transaction(generate_unique_nanoid)
            session.write_transaction(create_concept, new_nanoid)
            session.write_transaction(create_relationship, term_val_1, new_nanoid)
            session.write_transaction(create_relationship, term_val_2, new_nanoid)
    
    driver.close()

# Testing

In [18]:
# both terms exist & connected via concept
link_two_terms("Epithelioma, benign", "Epithelial tumor, benign")

Epithelioma, benign and Epithelial tumor, benign are already connected via Concept H6X2Ph


In [19]:
# both terms exist & NOT connected via concept
link_two_terms("Spindle cell carcinoma, NOS", "Sarcomatoid Carcinoma")

Created new Concept with nanoid: Zith4U
Spindle cell carcinoma, NOS Term now represents Zith4U Concept
Sarcomatoid Carcinoma Term now represents Zith4U Concept


In [20]:
# one term exists & already has concept
link_two_terms("Minimally Invasive Lung Adenocarcinoma", "Alveolar adenocarcinoma")

Created new Term with value: Alveolar adenocarcinoma
Alveolar adenocarcinoma Term now represents p2IbFv Concept


In [21]:
# one term exists & doesn't have concept yet
link_two_terms("Undifferentiated Carcinoma", "Carcinoma, anaplastic, NOS")

Created new Concept with nanoid: dBSPWS
Created new Term with value: Carcinoma, anaplastic, NOS
Undifferentiated Carcinoma Term now represents dBSPWS Concept
Carcinoma, anaplastic, NOS Term now represents dBSPWS Concept


In [22]:
# neither term exists
link_two_terms("Epithelioma, malignant", "Carcinoma")

Created new Term with value: Epithelioma, malignant
Created new Term with value: Carcinoma
Created new Concept with nanoid: 4CneRp
Epithelioma, malignant Term now represents 4CneRp Concept
Carcinoma Term now represents 4CneRp Concept
