# SemNet Training Notebook

For any questions regarding this notebook, please contact Kevin McCoy <kmccoy8@gatech.edu> or Stephen Allegri <sallegri3@gatech.edu>

# [0] Instructions
1. Read the docstring and comment at the top of each cell.
    * Docstrings detail what each cell does.
    * Comments below the docstring detail what you need to edit in that cell.
2. Run each cell sequentially and edit what you like.
3. That's it! Have fun! :)

* API keys and urls have been removed, replace them with your own (though it isn't required- just ignore the email/CUI retrieval functions).

# [1] Construct Email Notification System

In [None]:
'''Construct email notification system.'''
# Run cell

import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
MY_ADDRESS = ''
PASSWORD = ''


def send_notif(destination, subject, msg_text):

    s = smtplib.SMTP(host='', port=0000)
    s.starttls()
    s.login(MY_ADDRESS, PASSWORD)

    msg = MIMEMultipart()

    msg['From'] = MY_ADDRESS
    msg['To'] = destination
    msg['Subject'] = subject

    msg.attach(MIMEText(msg_text, 'plain'))

    s.send_message(msg)
    del msg

    s.quit()

In [None]:
'''Copy and paste this cell wherever you like to notify you of code completion.'''
# Edit below variables and then run cell

destination = '' # Put your own email here
subject = 'subject' # Edit subject line and email body how you like
msg_text = 'Hello world!'

send_notif(destination, subject, msg_text)

# [2] Get CUIs for Concepts of Interest

In [None]:
'''UMLS API.'''
# Run cell

import lxml.html as lh
from lxml.html import fromstring
import requests
import json

uri = ''
auth_endpoint = ''

class Authentication:
    
    def __init__(self, apikey):
        self.apikey = apikey
        self.service = ''

    def gettgt(self):
        params = {'apikey': self.apikey}
        h = {'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain', 'User-Agent':'python' }
        r = requests.post(uri+auth_endpoint,data=params,headers=h)
        response = fromstring(r.text)
        ## extract the entire URL needed from the HTML form (action attribute) returned
        ## we make a POST call to this URL in the getst method
        tgt = response.xpath('//form/@action')[0]
        return tgt

    def getst(self,tgt):
        params = {'service': self.service}
        h = {'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain', 'User-Agent':'python' }
        r = requests.post(tgt,data=params,headers=h)
        st = r.text
        return st
    

def get_cuis_for_concept(concept_name, page_size=1000, return_names=False):
    '''
    Get CUIs matching a query concept in UMLS
    '''
    API_KEY = ''
    VERSION = ''
    AuthClient = Authentication(API_KEY)

    tgt = AuthClient.gettgt()
    url = ''

    content_endpoint = f'search/{VERSION}'

    query = {'ticket':AuthClient.getst(tgt), 'string':concept_name, 'pageSize':page_size}
    
    r = requests.get(url=url+content_endpoint, params=query)
    r.encoding = 'utf-8'
    items = json.loads(r.text)
    if return_names:
        data = [[res['ui'], res['name']] for res in items['result']['results']]
    else:
        data = [res['ui'] for res in items['result']['results']]
    return data


def get_concept_for_cui(concept_cui, page_size=1000, return_names=False):
    '''
    Get concept for matching CUI.
    '''

    API_KEY = ''
    VERSION = ''
    AuthClient = Authentication(API_KEY)

    tgt = AuthClient.gettgt()
    url = ''

    content_endpoint = f'/content/{VERSION}/CUI/{concept_cui}'

    query = {'ticket':AuthClient.getst(tgt), 'CUI':concept_cui, 'pageSize':page_size}
    
    r = requests.get(url=url+content_endpoint, params=query)
    r.encoding = 'utf-8'
    items = json.loads(r.text)
    
    return items['result']['name']

In [None]:
'''Search for CUIs relating to concept.'''
# Edit saved variables and run cell

search_term = 'cardiovascular disease' # Edit
num_results = 5 # Edit

get_cuis_for_concept(search_term, page_size=num_results, return_names=True)

In [None]:
'''Search for CUIs relating to concept.'''
# Edit saved variables and run cell

search_term = 'stem cell' # Edit
num_results = 5 # Edit

get_cuis_for_concept(search_term, page_size=num_results, return_names=True)

In [None]:
'''Save CUIs of interest.'''
# Edit cui variables

# Target node
cvd_cui = 'C0007222' # Edit

# Source nodes
embryonic_stemcells_cui = 'C0596508' # Edit
adult_stemcells_cui = 'C1171322' # Edit
source_node_cuis = [embryonic_stemcells_cui, adult_stemcells_cui] # Edit

Note: In this notebook, I will be conducting an example experiment where I compare the relationship between cardiovascular disease (CVD) and adult stem cells vs embryonic stem cells. Make your guesses now, I bet the results will suprise you! :)

# [3] Load SemNet

In [None]:
'''Import semnet and other necessary libraries.'''
# Run cell

from semnet import offline, offline_hetesim, randomized_hetesim
import pandas as pd

In [None]:
'''Load SemNet data in pandas dataframe.'''
# Run cell (may return warning)

# Load in semnet database (should take no more than 20 seconds)
# semnet_df = pd.read_csv('/mitchell/semnet_related_data/edges_updated.csv', index_col=0)
semnet_df = pd.read_csv('sample_data.csv', index_col=0)

In [None]:
'''Convert dataframe to dictionary.'''
# Run cell. Should take ~100s.

edgelist = semnet_df.to_dict(orient='records')

In [None]:
'''Define SemNet relationships.'''
# Run cell

rel2inv = {'PHYSICALLY_RELATED_TO': 'PHYSICALLY_RELATED_TO',
 'PART_OF': 'HAS_PART',
 'CONTAINS': 'CONTAINED_IN',
 'LOCATION_OF': 'HAS_LOCATION',
 'TEMPORALLY_RELATED_TO': 'TEMPORALLY_RELATED_TO',
 'CO-OCCURS_WITH': 'CO-OCCURS_WITH',
 'PRECEDES': 'FOLLOWS',
 'FUNCTIONALLY_RELATED_TO': 'FUNCTIONALLY_RELATED_TO',
 'PROCESS_OF': 'HAS_PROCESS',
 'CARRIES_OUT': 'CARRIED_OUT_BY',
 'INTERACTS_WITH': 'INTERACTS_WITH',
 'PRACTICES': 'PRACTICED_BY',
 'PRODUCES': 'PRODUCED_BY',
 'EXHIBITS': 'EXHIBITED_BY',
 'DISRUPTS': 'DISRUPTED_BY',
 'CAUSES': 'CAUSED_BY',
 'PREVENTS': 'PREVENTED_BY',
 'COMPLICATES': 'COMPLICATED_BY',
 'MANIFESTATION_OF': 'HAS_MANIFESTATION',
 'AFFECTS': 'AFFECTED_BY',
 'OCCURS_IN': 'HAS_OCCURRENCE',
 'MANAGES': 'MANAGED_BY',
 'TREATS': 'TREATED_BY',
 'USES': 'USED_BY',
 'INDICATES': 'INDICATED_BY',
 'RESULT_OF': 'HAS_RESULT',
 'CONCEPTUALLY_RELATED_TO': 'CONCEPTUALLY_RELATED_TO',
 'PROPERTY_OF': 'HAS_PROPERTY',
 'CONCEPTUAL_PART_OF': 'HAS_CONCEPTUAL_PART',
 'EVALUATION_OF': 'HAS_EVALUATION',
 'MEASURES': 'MEASURED_BY',
 'DIAGNOSES': 'DIAGNOSED_BY',
 'ASSESSES_EFFECT_OF': 'ASSESSED_FOR_EFFECT_BY',
 'ISSUE_IN': 'HAS_ISSUE',
 'ASSOCIATED_WITH': 'ASSOCIATED_WITH',
 'CONSISTS_OF': 'CONSTITUTES',
 'ADJACENT_TO': 'ADJACENT_TO',
 'CONNECTED_TO': 'CONNECTED_TO',
 'INTERCONNECTS': 'INTERCONNECTED_BY',
 'SURROUNDS': 'SURROUNDED_BY',
 'TRAVERSES': 'TRAVERSED_BY',
 'DERIVATIVE_OF': 'HAS_DERIVATIVE',
 'DEVELOPMENTAL_FORM_OF': 'HAS_DEVELOPMENTAL_FORM',
 'DEGREE_OF': 'HAS_DEGREE',
 'MEASUREMENT_OF': 'HAS_MEASUREMENT',
 'METHOD_OF': 'HAS_METHOD',
 'ISA': 'INVERSE_ISA',
 'BRINGS_ABOUT': 'BROUGHT_ABOUT_BY',
 'PERFORMS': 'PERFORMED_BY',
 'SPATIALLY_RELATED_TO': 'SPATIALLY_RELATED_TO',
 'ANALYZES': 'ANALYZED_BY',
 'BRANCH_OF': 'HAS_BRANCH',
 'TRIBUTARY_OF': 'HAS_TRIBUTARY',
 'INGREDIENT_OF': 'HAS_INGREDIENT',
 'COMPARED_WITH': 'COMPARED_WITH',
 'INHIBITS': 'INHIBITED_BY',
 'STIMULATES': 'STIMULATED BY',
 'CONVERTS_TO': 'CONVERTS_FROM',
 'NEG_ASSOCIATED_WITH': 'NEG_ASSOCIATED_WITH',
 'COEXISTS_WITH': 'COEXISTS_WITH',
 'NEG_CAUSES': 'NEG_CAUSED_BY',
 'PREDISPOSES': 'PREDISPOSED_BY',
 'HIGHER_THAN': 'LOWER_THAN',
 'LOWER_THAN': 'HIGHER_THAN',
 'NEG_TREATS': 'NEG_TREATED_BY',
 'AUGMENTS': 'AUGMENTED_BY',
 'ADMINISTERED_TO': 'ADMINISTERED_BY',
 'NEG_PROCESS_OF': 'NEG_HAS_PROCESS',
 'NEG_STIMULATES': 'NEG_STIMULATED_BY',
 'NEG_PART_OF': 'NEG_HAS_PART',
 'NEG_AFFECTS': 'NEG_AFFECTED_BY',
 'NEG_ADMINISTERED_TO': 'NEG_ADMINISTERED_BY',
 'NEG_PRODUCES': 'NEG_PRODUCED_BY',
 'NEG_COEXISTS_WITH': 'NEG_COEXISTS_WITH',
 'NEG_INTERACTS_WITH': 'NEG_INTERACTS_WITH',
 'NEG_AUGMENTS': 'NEG_AUGMENTED_BY',
 'NEG_LOCATION_OF': 'NEG_HAS_LOCATION',
 'NEG_ISA': 'NEG_ISA',
 'SAME_AS': 'SAME_AS',
 'NEG_INHIBITS': 'NEG_INHIBITED_BY',
 'NEG_DISRUPTS': 'NEG_DISRUPTED_BY',
 'NEG_USES': 'NEG_USED_BY',
 'NEG_MEASURES': 'NEG_MEASURED_BY',
 'NEG_PREDISPOSES': 'NEG_PREDISPOSED_BY',
 'NEG_PREVENTS': 'NEG_PREVENTED_BY',
 'NEG_OCCURS_IN': 'NEG_HAS_OCCURRENCE',
 'NEG_DIAGNOSES': 'NEG_DIAGNOSED_BY',
 'NEG_METHOD_OF': 'NEG_HAS_METHOD',
 'NEG_HIGHER_THAN': 'NEG_LOWER_THAN',
 'NEG_SAME_AS': 'NEG_SAME_AS',
 'NEG_PRECEDES': 'NEG_PRECEDED_BY',
 'NEG_CONVERTS_TO': 'NEG_CONVERTS_FROM',
 'NEG_MANIFESTATION_OF': 'NEG_HAS_MANIFESTATION',
 'NEG_COMPLICATES': 'NEG_COMPLICATED_BY',
 'NEG_LOWER_THAN': 'NEG_HIGHER_THAN',
 'NEG_MEASUREMENT_OF': 'NEG_HAS_MEASURMENT'}

In [None]:
'''Load SemNet dictionary into HetGraph object.'''
# Run cell. Should take ~4min.

semnet_graph = offline.HetGraph(edgelist, rel2inv)

# [4] Search for available functions and their docstrings

In [None]:
'''Investigate available attributes and methods of semnet_graph.'''
# Run cell

dir(semnet_graph)

In [None]:
'''Investigate available attributes and methods of offline_hetesim.py.'''
# Run cell

dir(offline_hetesim)

In [None]:
'''Investigate available attributes and methods of randomized_hetesim.py.'''
# Run cell

dir(randomized_hetesim)

In [None]:
'''Display docstring of desired function.'''
# Edit below function to include function of choice

help(randomized_hetesim.randomized_pruned_hetesim_all_metapaths)

For more information on SemNet, see [SemNet GitHub Repo](https://github.com/pathology-dynamics/semnet-2)

# [5] Example Tests

### [5a] Graph Attributes

In [None]:
'''List outgoing edges.'''

node = cvd_cui
relation = 'COMPLICATES'

semnet_graph.outgoing_edges[node][relation]

In [None]:
'''List incoming edges.'''

node = cvd_cui
relation = 'COMPLICATES'

semnet_graph.incoming_edges[node][relation]

In [None]:
'''Get outgoing edge weights of certain CUIs.'''

node = cvd_cui
relation = 'COMPLICATES'

semnet_graph.outgoing_edge_weights[node][relation]

In [None]:
'''Get imcoming edge weights of certain CUIs.'''

node = cvd_cui
relation = 'COMPLICATES'

semnet_graph.incoming_edge_weights[node][relation]

In [None]:
'''Find outgoing relations from certain node type.'''

node_type = 'AAPP'
relation = 'COMPLICATES'

semnet_graph.schema_outgoing_edges[node_type][relation]

In [None]:
'''Find incoming relations from certain node type.'''

node_type = 'AAPP'
relation = 'PART_OF'

semnet_graph.schema_incoming_edges[node_type][relation]

In [None]:
'''Lists CUIs of nodes of certain type.'''

type_ = 'AAPP'

semnet_graph.type2nodes[type_]

In [None]:
'''Gets type counts.'''

semnet_graph.type_counts[cvd_cui]

In [None]:
'''Find type of node of interest.'''

node = cvd_cui

semnet_graph.node2type[node]

In [None]:
'''List relation types.'''

semnet_graph.relations

In [None]:
'''Max one sided k of graph.'''

semnet_graph.max_one_sided_k

### [5b] Graph Methods

In [None]:
'''Get max one sided k.'''

semnet_graph.get_max_one_sided_k()

In [None]:
'''Reset max one sided k.'''

semnet_graph.reset_max_one_sided_k()

In [None]:
'''Compute paths.'''

source_node = cvd_cui
target_node = embryonic_stemcells_cui

for obj in (semnet_graph.compute_fixed_length_paths(source_node, target_node, 2)):
    print(obj)

In [None]:
'''Compute fixed length schema walks.'''

source_node = cvd_cui
target_node = embryonic_stemcells_cui

for obj in semnet_graph.compute_fixed_length_schema_walks(source_node, target_node, 2):
    print(obj)

In [None]:
'''Compute metapaths.'''

source_node = cvd_cui
tail_node = embryonic_stemcells_cui

for obj in (semnet_graph.compute_fixed_length_metapaths(source_node, tail_node, 2)):
    print(obj)

In [None]:
'''Find metapath reachable nodes.'''

source_node = cvd_cui
metapath = ['DSYN', 'TREATED_BY', 'LIPD', 'COMPARED_WITH', 'ORCH', 'ISA', 'NSBA', 'INTERACTS_WITH', 'CELL']

semnet_graph.compute_metapath_reachable_nodes(source_node, metapath)

In [None]:
'''Reach all nodes with outgoing relationships depth away start node.'''

start_node = cvd_cui
depth = 2

list(semnet_graph._fan_out(start_node, depth))

In [None]:
'''Reach all nodes with outgoing relationships depth away start node. Returns types.'''

start_node = cvd_cui
depth = 2

[obj for obj in semnet_graph._schema_fan_out(start_node, depth=depth)]

In [None]:
'''Reach all nodes with incoming relationships depth away start node.'''

start_node = cvd_cui
depth = 2

list(semnet_graph._fan_in(start_node, depth=depth))

In [None]:
'''Reach all nodes with outgoing relationships depth away start node.'''

start_node = cvd_cui
depth = 2

list(semnet_graph._schema_fan_in(start_node, depth=depth))

In [None]:
path = ['C0007222', 'COEXISTS_WITH', 'C0024623', 'HAS_LOCATION', 'C0596508']

In [None]:
'''Creates iterator of edges to neighbors.'''

node = cvd_cui

list(semnet_graph._get_edges_to_nbhrs(node))

In [None]:
'''Convert a path to a metapath.'''

semnet_graph._path_to_metapath(path)

In [None]:
'''Convert a path to a string.'''

semnet_graph._path_to_string(path)

### [5c] Offline HeteSim

In [None]:
end_nodes = list(semnet_graph.compute_metapath_reachable_nodes(source_node, metapath))

In [None]:
'''Hetesim on particular metapath.'''

start_nodes = source_node_cuis

offline_hetesim.hetesim(semnet_graph, start_nodes, end_nodes, [metapath])

In [None]:
'''HeteSim all metapaths.'''

start_nodes = source_node_cuis

offline_hetesim.hetesim_all_metapaths(semnet_graph, start_nodes, end_nodes, 2)

In [None]:
'''Just another way to find all metapaths'''

start_nodes = source_node_cuis

offline_hetesim.find_all_metapaths(semnet_graph, start_nodes, end_nodes, 2)

In [None]:
'''Take mean of hetesim across all metapaths.'''

start_nodes = source_node_cuis
end_node = adult_stemcells_cui

offline_hetesim.mean_hetesim_scores(semnet_graph, start_nodes, end_node, 2)

In [None]:
'''Approximate mean hetesim across all metapaths.'''

start_nodes = source_node_cuis

offline_hetesim.approximate_mean_hetesim_scores(semnet_graph, start_nodes, end_node, 2, 0.05, 0.95)

In [None]:
'''Example CVD experiment.'''

result_dict = offline_hetesim.mean_hetesim_scores(semnet_graph, source_node_cuis, tail_node, 2)

df = pd.DataFrame(list(result_dict.items()), columns = ['cui','hetesim'])

In [None]:
df.head()

### [5d] Randomized HeteSim

In [None]:
start_nodes = cvd_cui
end_nodes = source_node_cuis
metapaths = [['DSYN', 'TREATED_BY', 'AAPP', 'INTERACTS_WITH', 'CELL'],
['DSYN', 'TREATED_BY', 'GNGM', 'INTERACTS_WITH', 'CELL'],
['DSYN', 'TREATED_BY', 'AAPP', 'STIMULATES', 'CELL'],
['DSYN', 'TREATED_BY', 'BACS', 'PART_OF', 'CELL']]
kmax = 2
epsilon = 0.05
r = 0.95

N = 100

In [None]:
'''Randmoize pruned hetesim.'''

randomized_hetesim.randomized_pruned_hetesim(semnet_graph, start_nodes, end_nodes, metapaths, kmax, epsilon, r)

In [None]:
'''Randmoize pruned hetesim.'''

randomized_hetesim.randomized_pruned_hetesim_given_N(semnet_graph, start_nodes, end_nodes, metapaths, kmax, N)

In [None]:
'''Randomized pruned hetesim over all metapaths.'''

path_len = 2

randomized_hetesim.randomized_pruned_hetesim_all_metapaths(semnet_graph, start_nodes, end_nodes, path_len, epsilon, r)

In [None]:
'''Approximation of mean pruned hetesim.'''

randomized_hetesim.approximate_mean_pruned_hetesim(semnet_graph, start_nodes, end_node, path_len, epsilon, r)

# [6] Save any desired results to .csv or .xlsx

In [None]:
'''Insert node names into df.'''

for i in range(len(df.loc[:,'cui'])):
    df.loc[i, 'name'] = get_concept_for_cui(df.loc[i, 'cui'])
    
df = df[['cui', 'name', 'hetesim']]

In [None]:
'''Display df.'''

df.head()

In [None]:
'''Save df to .csv.'''

output_fn = 'output_file.csv'
df.to_csv(output_fn)

# [7] Other methods of calculating HeteSim

In [None]:
'''Save search parameters.'''
# Edit every line of this cell

targets = ['C0002395', 'C0020676']

sn_types = ['DSYN', 'AAPP'] # leave empty for all sn types
sn_search_depth = 1
hetesim_metapath_length = 2

email_toggle = False
output_toggle = True
joint_output_toggle = True
output_identifier = ''
destination = '' # enter email here

epsilon = 0.05 # error tolerance
r = 0.95 # probability of achieving error tolerance

In [None]:
cui_dict = {'cui': 'name'}

In [None]:
'''Find source nodes related to target node(s).'''
# Run cell

sn_set_list = []

for target in targets:
    target_sns = set()
    
    for in_set, in_path in semnet_graph._fan_in(target, depth=sn_search_depth):
        for node_type in in_set:
            if (node_type in sn_types) or (len(sn_types) == 0):
                target_sns.update(in_set[node_type])
                
    sn_set_list.append(target_sns)

sn_list = list(set.intersection(*sn_set_list))

In [None]:
len(sn_list)

In [None]:
%%time

'''Calculate HeteSim scores (No Multiprocessing).'''
# Run cell. Will take a while.

results_df_list = []

for i, target in enumerate(targets):
    result_dict = offline_hetesim.mean_hetesim_scores(semnet_graph, sn_list, target, hetesim_metapath_length)
    sorted_result_dict = dict(sorted(result_dict.items(), key=lambda item: item[1], reverse=True))

    df = pd.DataFrame(list(sorted_result_dict.items()), columns = ['source_node', 'hetesim_score'])
    df['target_node'] = target
    
    for i in range(len(df.loc[:,'source_node'])):
        df.loc[i, 'source_name'] = cui_dict.get(df.loc[i, 'source_node'], 'NA')
        
    df['target_name'] = cui_dict[target]
    df = df[['source_node', 'source_name', 'target_node', 'target_name', 'hetesim_score']]
    
    results_df_list.append(df)
    
    if output_toggle:
        if output_identifier != '':
            output_fn = 'SemNet_results_target={}_{}.csv'.format(target, output_identifier)
            df.to_csv(output_fn)
        else:
            output_fn = 'SemNet_results_target={}.csv'.format(target)
            df.to_csv(output_fn)
        
    if email_toggle:
        send_notif(destination, 'SemNet Run ' + str(((i + 1)/len(targets)) * 100) + '% Complete!', 'target: ' + str(target))

CPU times: user 7min 43s, sys: 7.78 s, total: 7min 50s
Wall time: 11min 27s


In [None]:
%%time

'''Calculate approximate mean HeteSim scores (No Multiprocessing).'''
# Run cell. Will take a while.

results_df_list = []

for i, target in enumerate(targets):
    result_dict = offline_hetesim.approximate_mean_hetesim_scores(semnet_graph, sn_list, target, hetesim_metapath_length, epsilon, r)
    sorted_result_dict = dict(sorted(result_dict.items(), key=lambda item: item[1], reverse=True))

    df = pd.DataFrame(list(sorted_result_dict.items()), columns = ['source_node', 'approximate_mean_hetesim_score'])
    df['target_node'] = target
    
    for i in range(len(df.loc[:,'source_node'])):
        df.loc[i, 'source_name'] = cui_dict.get(df.loc[i, 'source_node'], 'NA')
        
    df['target_name'] = cui_dict[target]
    df = df[['source_node', 'source_name', 'target_node', 'target_name', 'approximate_mean_hetesim_score']]
    
    results_df_list.append(df)
    
    if output_toggle:
        if output_identifier != '':
            output_fn = 'approximate_mean_SemNet_results_target={}_{}.csv'.format(target, output_identifier)
            df.to_csv(output_fn)
        else:
            output_fn = 'approximate_mean_SemNet_results_target={}.csv'.format(target)
            df.to_csv(output_fn)
        
    if email_toggle:
        send_notif(destination, 'SemNet Run ' + str(((i + 1)/len(targets)) * 100) + '% Complete!', 'target: ' + str(target))

CPU times: user 7min 1s, sys: 4.75 s, total: 7min 6s
Wall time: 10min 36s


In [None]:
'''Multiprocessing Setup'''
# Run cell.

num_cpus = 40

from pathos.multiprocessing import ProcessingPool as Pool

def divide(lst, n):
    p = len(lst) // n
    if len(lst)-p > 0:
        return [lst[:p]] + divide(lst[p:], n-1)
    else:
        return [lst]

source_nodes = divide(sn_list, num_cpus)

results_df_list = []

In [None]:
%%time

'''Calculate approximate mean HeteSim scores (WITH multiprocessing).'''
# Run cell. Will take a while.

results_df_list = []

for i, target in enumerate(targets):

    def myFunc(sources):
        return offline_hetesim.mean_hetesim_scores(semnet_graph, sources, target, hetesim_metapath_length)

    pool = Pool(40)
    results = pool.map(myFunc, source_nodes)
    pool.clear()

    result_dict = {k:v for x in results for k,v in x.items()}

    sorted_result_dict = dict(sorted(result_dict.items(), key=lambda item: item[1], reverse=True))

    df = pd.DataFrame(list(sorted_result_dict.items()), columns = ['source_node', 'approximate_mean_hetesim_score'])
    df['target_node'] = target
    
    for i in range(len(df.loc[:,'source_node'])):
        df.loc[i, 'source_name'] = cui_dict.get(df.loc[i, 'source_node'], 'NA')
        
    df['target_name'] = cui_dict[target]
    df = df[['source_node', 'source_name', 'target_node', 'target_name', 'approximate_mean_hetesim_score']]
    
    results_df_list.append(df)
    
    if output_toggle:
        if output_identifier != '':
            output_fn = 'approximate_mean_SemNet_results_target={}_{}.csv'.format(target, output_identifier)
            df.to_csv(output_fn)
        else:
            output_fn = 'approximate_mean_SemNet_results_target={}.csv'.format(target)
            df.to_csv(output_fn)
        
    if email_toggle:
        send_notif(destination, 'SemNet Run ' + str(((i + 1)/len(targets)) * 100) + '% Complete!', 'target: ' + str(target))

CPU times: user 1min 14s, sys: 21.1 s, total: 1min 35s
Wall time: 5min 40s
