In [None]:
# Connect notebook to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Navigate to the shared drive folder
%cd /content/drive/'My Drive'/'Cardiovascular Knowledge Graph'
!pwd
!ls

/content/drive/My Drive/Cardiovascular Knowledge Graph
/content/drive/My Drive/Cardiovascular Knowledge Graph
cardiac_conduction.ipynb  muscle_contraction_graph.ipynb  pathway_links.ipynb
data			  muscle_contraction.ipynb
filter_organism.ipynb	  output


In [None]:
%cd output

/content/drive/My Drive/Cardiovascular Knowledge Graph/output


In [None]:
!ls

cardiac_cond_name_filtered.csv	      graphs
cardiac_conduction_all_organisms.csv  muscle_contraction_filtered.csv
cardiac_conduction_filtered.csv       muscle_contraction_filtered.json
cardiac_conduction_filtered.json      protein_name_cardiac_conduction.csv
cardiac_conduction.json


In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from collections import Mapping

  """


In [None]:
def sort_contraction(full_table, cardiac_table):
  """
  sorts as follows:
  {'Muscle Contraction' : {'Cardiac Conduction' : {p1 : [<->]}}, {'Not CC' : {p1 : [<->]}}}
  """
  full_proteins = list(full_table['Protein ID'])
  cardiac_proteins = list(cardiac_table['Protein ID'])
  overlap_proteins = list(set(full_proteins) - set(cardiac_proteins)) #proteins in full_pathway but not in cardiac_pathway

  d = {'Muscle Contraction' : {'Cardiac Conduction' : {},
       'Not CC' : overlap_proteins}}
  
  for k in list(set(cardiac_table['Reactome Pathway ID'])):
    search_val = cardiac_table[cardiac_table['Reactome Pathway ID'] == k]
    d['Muscle Contraction']['Cardiac Conduction'][k] = list(search_val['Protein ID'])
  
  return d

def sort_cardiac_pathway_protein(cardiac_table):
  """
  sorts cardiac conduction proteins as follows:
  {'Pathway ID' : [protein IDs in pathway]}
  """
  d = {}
  
  for k in range(len(cardiac_table['Protein ID'])):
    pathway_id = list(cardiac_table['Reactome Pathway ID'])[k]
    protein_id = list(cardiac_table['Protein ID'])[k]
    if pathway_id in d.keys():
      d[pathway_id].append(protein_id)
    else:
      d[pathway_id] = [protein_id]

  return d

def sort_cardiac_pathway_name(cardiac_table):
    """
    {pathway ID : pathway name}
    """
    d = {}

    for k in range(len(cardiac_table['Protein ID'])):
        pathway_id = list(cardiac_table['Reactome Pathway ID'])[k]
        name = list(cardiac_table['Pathway Name'])[k]
        
        d[pathway_id] = name
    
    return d

def combine_sorted_data(cardiac_table):
    """
    {Pathway ID : ([proteins], name)}
    """
    d = {}
    pathway_name = sort_cardiac_pathway_name(cardiac_table)
    pathway_protein = sort_cardiac_pathway_protein(cardiac_table)

    for pathway in pathway_name.keys():
        d[pathway] = (pathway_protein[pathway], pathway_name[pathway])
    
    return d

In [None]:
!ls

sample_data


In [None]:
cardiac_table = pd.read_csv('cardiac_cond_name_filtered.csv')
cardiac_table

Unnamed: 0.1,Unnamed: 0,Protein ID,Reactome Pathway ID,Pathway Name
0,0,A2AGL3,R-MMU-5578775,Ion homeostasis
1,1,E9PZQ0,R-MMU-5578775,Ion homeostasis
2,2,E9Q401,R-MMU-5578775,Ion homeostasis
3,3,E9Q9K5,R-MMU-5578775,Ion homeostasis
4,4,G5E829,R-MMU-5578775,Ion homeostasis
...,...,...,...,...
514,514,Q9NY47,R-HSA-5576893,Phase 2 - plateau phase
515,515,Q9UBN1,R-HSA-5576893,Phase 2 - plateau phase
516,516,Q9UJ90,R-HSA-5576893,Phase 2 - plateau phase
517,517,Q9Y6H6,R-HSA-5576893,Phase 2 - plateau phase


In [None]:
combined_sorted = combine_sorted_data(cardiac_table)
combined_sorted

## **Using PyVis**

In [None]:
!pip install pyvis

In [None]:
from pyvis.network import Network
from IPython.core.display import display, HTML

In [None]:
def cardiac_conduction_linked(cardiac_table):
    """
    utilizes pyvis to create cardiac conduction graph
    """
    d = combine_sorted_data(cardiac_table)
    g = Network(height = 750, width = 1500)

    g.add_nodes(d.keys())

    for k, v in d.items():
        g.add_nodes(v[0])

    for k, v in d.items():
        for value in v[0]:
            title_display = "from " + value + " to " + k
            g.add_edge(k, value, title = title_display)

    for i in range(len(cardiac_table['Reactome Pathway ID'])):
        name_ref = cardiac_table['Pathway Name'].values[i]
        id_ref = cardiac_table['Reactome Pathway ID'].values[i]

        for j in range(len(cardiac_table['Reactome Pathway ID'])):
            name_compare = cardiac_table['Pathway Name'].values[j]
            id_compare = cardiac_table['Reactome Pathway ID'].values[j]

            if name_ref == name_compare:
                title_display = str(id_ref) + " is connected to " + str(id_compare)
                g.add_edge(id_ref, id_compare, title = title_display)
        
    t = "cardiac_conduction_pathway_link.html"
    g.show(t)
    display(HTML(t))

    #return g.nodes


In [None]:
cardiac_conduction_linked(cardiac_table)

In [None]:
cardiac_table['Protein ID'].values[19]

'P70170'