<a href="https://colab.research.google.com/github/niranjana2222/Drug-Interaction-Network-Analysis/blob/main/Drug_Interaction_Network_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import re
import zipfile
import json
!pip3 install selenium 

import urllib
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting selenium
  Downloading selenium-4.8.0-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m75.0 MB/s[0m eta [36m0:00:00[0m
Collecting urllib3[socks]~=1.26
  Downloading urllib3-1.26.14-py2.py3-none-any.whl (140 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trio-websocket~=0.9
  Downloading trio_websocket-0.9.2-py3-none-any.whl (16 kB)
Collecting trio~=0.17
  Downloading trio-0.22.0-py3-none-any.whl (384 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m384.9/384.9 KB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
Collecting async-generator>=1.9
  Downloading async_generator-1.10-py3-none-any.whl (18 kB)
Collecting exceptiongroup>=1.0.0rc9
  Downloading exceptiongroup-1.1.0-py3-none-any.whl (14 kB)
Collecting out

In [None]:
# read data
zip_files_list = os.listdir('data')

for file in zip_files_list:
    with zipfile.ZipFile(f'data/{file}', 'r') as zip_ref:
        zip_ref.extractall('data')

os.listdir('data')

In [None]:
with open('data/DB_mapping.json', 'r') as fp:
    db_mapping = json.load(fp)

# Import raw drugbank dataset
df_db_int = pd.read_csv("data/ChCh-Miner_durgbank-chem-chem.tsv", sep='\t', header=None)
df_db_int.columns = ['drug_1_code', 'drug_2_code']

# Perform code-name mapping
df_db_int['drug_1_name'] = df_db_int['drug_1_code'].map(db_mapping)
df_db_int['drug_2_name'] = df_db_int['drug_2_code'].map(db_mapping)

# Clean data 
df_db_int = df_db_int[~df_db_int['drug_1_name'].isin(['This record has been revoked'])]
df_db_int = df_db_int[~df_db_int['drug_2_name'].isin(['This record has been revoked'])]

# Rename columns
new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name']
df_db_int = df_db_int[new_cols]
df_db_int.head()

In [None]:
df_db_int['weight'] = 1

# Keep only drug name columns
df_db_int = df_db_int[['drug_1_name', 'drug_2_name', 'weight']]
df_db_int.head()

In [None]:
G = nx.from_pandas_edgelist(df_db_int, 'drug_1_name', 'drug_2_name')

# Give the graph a name
G.name = 'Drug Interactions Network'

# Check whether graph is directed or undirected (False = undirected)
print(G.is_directed())

# Obtain general information of graph
print(nx.info(G))

# Get graph density
density = nx.density(G)
print("Network density:", density)

In [None]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

# Generate sorted list of tuples of drug entity and corresponding degree
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 drugs by degree:")
for d in sorted_degree[:20]:
    print(d)

In [None]:
sorted_degree_values = [pair[1] for pair in sorted_degree]

# Set fixed bin size
bins = np.arange(0, 450, 2) # fixed bin size

# Plot histogram
plt.xlim([min(sorted_degree_values)-15, max(sorted_degree_values)+15])
plt.hist(sorted_degree_values, bins=bins, alpha=0.5)
plt.title('Node degree distribution (fixed bin size)')
plt.xlabel('degree')
plt.ylabel('count')
plt.show()

In [None]:
degree_centrality = nx.degree_centrality(G)
degree_centrality = dict(sorted(degree_centrality.items(), key=lambda item: i

In [None]:
def generate_network_viz(df, source_col, target_col, weights, 
                         layout='barnes_hut',
                         central_gravity=0.15,
                         node_distance=420,
                         spring_length=100,
                         spring_strength=0.15,
                         damping=0.96
                         ):
    
    # Generate a networkx graph
    G = nx.from_pandas_edgelist(df, source_col, target_col, weights)
    
    if layout == 'repulsion':
        bgcolor, font_color = '#222222', 'white'
    else:
        bgcolor, font_color = 'white', 'black'
    
    # Initiate PyVis network object
    drug_net = Network(
                       height='700px', 
                       width='100%',
                       bgcolor=bgcolor, 
                       font_color=font_color, 
                       notebook=True
                      )
    
    # Take Networkx graph and translate it to a PyVis graph format
    drug_net.from_nx(G)
    
    # Create different network layout (repulsion or Barnes Hut)
    if layout == 'repulsion':
        drug_net.repulsion(
                            node_distance=node_distance, 
                            central_gravity=central_gravity, 
                            spring_length=spring_length, 
                            spring_strength=spring_strength, 
                            damping=damping
                           )

In [None]:
# Define list of oral medications for the common diseases
dm_meds = ['Metformin', 'Linagliptin', 'Sitagliptin', 'Glibenclamide', 'Gliclazide',
          'Glimepiride', 'Glipizide', 'Tolbutamide', 'Canagliflozin', 'Dapagliflozin',
          'Empagliflozin', 'Dapagliflozin', 'Acarbose', 'Liraglutide']

cardio_meds = ['Captopril', 'Enalapril', 'Lisinopril', 'Perindopril', 'Irbesartan',
               'Losartan', 'Telmisartan', 'Valsartan', 'Candesartan', 'Atenolol', 
               'Bisoprolol', 'Carvedilol', 'Propranolol', 'Amlodipine', 'Diltiazem', 
               'Nifedipine', 'Verapamil', 'Bumetanide', 'Frusemide', 'Isosorbide',
               'Hydrochlorothiazide', 'Spironolactone', 'Isosorbide Dinitrate', 
               'Isosorbide Mononitrate', 'Aspirin', 'Clopidogrel', 'Ticagrelor', 
               'Atorvastatin', 'Rosuvastatin', 'Simvastatin', 'Pravastatin']  

# List of meds with the most interactions
subset_meds_1 = ['Phenytoin', 'Mifepristone', 'Paroxetine']

# Sample drugs for diabetes (Metformin), hyperlipidemia (Simvastatin), and hypertension (Lisinopril)
subset_meds_2 = ['Metformin', 'Simvastatin', 'Lisinopril']

In [None]:
# Generate a networkx graph based on subset data
db_subset_net = generate_network_viz(df_db_int_sm, 'drug_1_name', 'drug_2_name', 'weight', layout='repulsion')

# Display interactive graph
db_subset_net.show('drug_interactions_network_subset_repulsion.html')

In [None]:
# Generate a networkx graph based on subset data
db_subset_net_barnes = generate_network_viz(df_db_int_sm, 'drug_1_name', 'drug_2_name', 'weight', layout='barnes')
db_subset_net_barnes.show('drug_interactions_network_subset_barnes.html')
