# Drug Interactions Network Analysis
**Part 2 - Network Analysis and Visualization with NetworkX and PyVis**

- https://programminghistorian.org/en/lessons/exploring-and-analyzing-network-data-with-python
- https://datapane.com/getting-started/
- https://towardsdatascience.com/introduction-to-datapane-a-python-library-to-build-interactive-reports-4593fd3cb9c8

___
### 1. Import dependencies

In [1]:
import pandas as pd
import numpy as np
import re
import zipfile
import json
from pyvis.network import Network
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd
import altair as alt
import datapane as dp

# process the data and create an Altair plot
df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv')
countries = df['location'].sample(3)
subset = df[df.location.isin(countries)]
base_plot = alt.Chart(subset).encode(x='date:T', color='location').mark_line(size=5,opacity=0.75).interactive()

# generate the Report and upload it
dp.Report(
    dp.Text(f"## Covid vaccinations in {', '.join(countries)}"),
    dp.Group(base_plot.encode(y='total_vaccinations_per_hundred'),
             base_plot.encode(y='daily_vaccinations_per_million'),
             columns=2),
    base_plot.encode(y='people_vaccinated'),
    dp.DataTable(subset, caption=f'Dataset for {countries}'),
).upload(name='My Covid Report', open=True)

Bokeh version 2.3.1 is not supported, your plots may not display correctly, please install version ~=2.2.0


Publishing document and associated data - *please wait...*

Report successfully uploaded, click [here](https://datapane.com/u/kenneth3/reports/my-covid-report/) to view your report and optionally share it with the Datapane Community

___
### 2. Data preparation

In [2]:
# Unzip all tar/zip files
zip_files_list = [i for i in os.listdir('data') if i.endswith('.zip')]

for file in zip_files_list:
    with zipfile.ZipFile(f'data/{file}', 'r') as zip_ref:
        zip_ref.extractall('data')

<IPython.core.display.Javascript object>

#### 2(a) - Drug Interactions (Drugbank)

In [22]:
# Read DB mapping JSON
with open('data/DB_mapping.json', 'r') as fp:
    db_mapping = json.load(fp)

# Import raw drugbank dataset
df_db_int = pd.read_csv("data/ChCh-Miner_durgbank-chem-chem.tsv", sep='\t', header=None)
df_db_int.columns = ['drug_1_code', 'drug_2_code']

# Perform code-name mapping
df_db_int['drug_1_name'] = df_db_int['drug_1_code'].map(db_mapping)
df_db_int['drug_2_name'] = df_db_int['drug_2_code'].map(db_mapping)

new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name']

# Clean data 
df_db_int = df_db_int[~df_db_int['drug_1_name'].isin(['This record has been revoked'])]
df_db_int = df_db_int[~df_db_int['drug_2_name'].isin(['This record has been revoked'])]

df_db_int = df_db_int[new_cols]
df_db_int.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name
0,DB00862,Vardenafil,DB00966,Telmisartan
1,DB00575,Clonidine,DB00806,Pentoxifylline
2,DB01242,Clomipramine,DB08893,Mirabegron
3,DB01151,Desipramine,DB08883,Perampanel
4,DB01235,Levodopa,DB01275,Hydralazine


#### 2(b) - Polypharmacy side effects

In [5]:
# Read CID mapping JSON
with open('data/CID_mapping.json', 'r') as fp:
    cid_mapping = json.load(fp)

In [6]:
# Import dataset
df_poly_se = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df_poly_se.columns = ['drug_1_code', 'drug_2_code', 'side_effect_code', 'side_effect_description']

# Perform code-name mapping
df_poly_se['drug_1_name'] = df_poly_se['drug_1_code'].map(cid_mapping)
df_poly_se['drug_2_name'] = df_poly_se['drug_2_code'].map(cid_mapping)

# Rearrange columns
new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name', 
            'side_effect_code', 'side_effect_description']

df_poly_se = df_poly_se[new_cols]
df_poly_se.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name,side_effect_code,side_effect_description
0,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0151714,hypermagnesemia
1,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0035344,retinopathy of prematurity
2,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0004144,atelectasis
3,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0002063,alkalosis
4,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0004604,Back Ache


#### 2(c) - Monopharmacy side effects

In [7]:
df_mono_se = pd.read_csv("data/ChSe-Decagon_monopharmacy.csv")

# Import dataset
df_mono_se = pd.read_csv("data/ChSe-Decagon_monopharmacy.csv")
df_mono_se.columns = ['drug_code', 'side_effect_code', 'side_effect_description']

# Perform code-name mapping
df_mono_se['drug_name'] = df_mono_se['drug_code'].map(cid_mapping)

# Rearrange columns
new_cols = ['drug_code', 'drug_name', 'side_effect_code', 'side_effect_description']

df_mono_se = df_mono_se[new_cols]
df_mono_se.head()

Unnamed: 0,drug_code,drug_name,side_effect_code,side_effect_description
0,CID003062316,Dasatinib,C1096328,central nervous system mass
1,CID003062316,Dasatinib,C0162830,Photosensitivity reaction
2,CID003062316,Dasatinib,C1611725,leukaemic infiltration brain
3,CID003062316,Dasatinib,C0541767,platelet adhesiveness abnormal
4,CID003062316,Dasatinib,C0242973,Ventricular dysfunction


### 3. Network Analysis (with NetworkX)

#### Drug Bank Drug Interactions

In [23]:
df_db_int['weight'] = 1

# Keep only drug name columns
df_db_int = df_db_int[['drug_1_name', 'drug_2_name', 'weight']]

df_db_int.head()

Unnamed: 0,drug_1_name,drug_2_name,weight
0,Vardenafil,Telmisartan,1
1,Clonidine,Pentoxifylline,1
2,Clomipramine,Mirabegron,1
3,Desipramine,Perampanel,1
4,Levodopa,Hydralazine,1


In [24]:
# Generate a networkx graph
G = nx.from_pandas_edgelist(df_db_int, 'drug_1_name', 'drug_2_name', 'weight')

In [30]:
# General information of graph
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 1505
Number of edges: 48224
Average degree:  64.0850


In [31]:
density = nx.density(G)
print("Network density:", density)

Network density: 0.042609740581041916


In [25]:
# Get most connected node (i.e. drug with most drug interactions)
G.degree()
max(dict(G.degree()).items(), key = lambda x : x[1])

('Phenytoin', 442)

In [37]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 drugs by degree:")
for d in sorted_degree[:20]:
    print(d)

Top 20 drugs by degree:
('Phenytoin', 442)
('Mifepristone', 377)
('Paroxetine', 368)
('Tranylcypromine', 358)
('Phenelzine', 342)
('Warfarin', 334)
('Deferasirox', 329)
('Nelfinavir', 321)
('Dronabinol', 312)
('Hydrocodone', 311)
('Clozapine', 311)
('Primidone', 307)
('Ritonavir', 306)
('Aripiprazole', 304)
('Fosphenytoin', 300)
('Saquinavir', 293)
('Acenocoumarol', 292)
('Carbamazepine', 290)
('Citalopram', 288)
('Dabrafenib', 285)


In [None]:
# Betweenness centrality not very relevant for drug-drug interactions
# betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
# eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality

# # Assign each to an attribute in your network
# nx.set_node_attributes(G, betweenness_dict, 'betweenness')
# nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

# sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

# print("Top 20 nodes by betweenness centrality:")
# for b in sorted_betweenness[:20]:
#     print(b)

___
### 4. Network Visualization (with Pyvis)

In [None]:
# # Loads entire network for all drug interactions (Will take very long to load)
# db_net = Network(height='700px', width='100%', 
#                  bgcolor='white', font_color='blue',
#                  notebook=True)

# # set the physics layout of the network
# db_net.barnes_hut()

# sources = df_db_int['drug_1_name']
# targets = df_db_int['drug_2_name']
# weights = df_db_int['weight']

# edge_data = zip(sources, targets, weights)

# for e in edge_data:
#     src = e[0]
#     dst = e[1]
#     w = e[2]

#     db_net.add_node(src, src, title=src)
#     db_net.add_node(dst, dst, title=dst)
#     db_net.add_edge(src, dst, value=w)

# # Add neighbor data to node hover data
# # neighbor_map = db_net.get_adj_list()

# # for node in db_net.nodes:
# #     node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
# #     node['value'] = len(neighbor_map[node['id']])

# db_net.show('drug_interactions_db.html')

___
- Narrow down data for network visualization

In [43]:
# Define list of oral medications for the common diseases

# dm_meds = ['Metformin', 'Linagliptin', 'Sitagliptin', 'Glibenclamide', 'Gliclazide',
#           'Glimepiride', 'Glipizide', 'Tolbutamide', 'Canagliflozin', 'Dapagliflozin',
#           'Empagliflozin', 'Dapagliflozin', 'Acarbose', 'Liraglutide']

# cardio_meds = ['Captopril', 'Enalapril', 'Lisinopril', 'Perindopril', 'Irbesartan',
#                'Losartan', 'Telmisartan', 'Valsartan', 'Candesartan', 'Atenolol', 
#                'Bisoprolol', 'Carvedilol', 'Propranolol', 'Amlodipine', 'Diltiazem', 
#                'Nifedipine', 'Verapamil', 'Bumetanide', 'Frusemide', 'Isosorbide',
#                'Hydrochlorothiazide', 'Spironolactone', 'Isosorbide Dinitrate', 
#                'Isosorbide Mononitrate', 'Aspirin', 'Clopidogrel', 'Ticagrelor', 
#                'Atorvastatin', 'Rosuvastatin', 'Simvastatin', 'Pravastatin']  

# subset_meds_1 = ['Phenytoin', 'Warfarin', 'Paroxetine']
subset_meds_2 = ['Warfarin', 'Simvastatin', 'Lisinopril']
subset_meds_3 = []

In [44]:
# Focus on subset meds
df_db_int_sm = df_db_int.loc[df_db_int['drug_1_name'].isin(subset_meds_2) | df_db_int['drug_2_name'].isin(subset_meds_2)]
df_db_int_sm = df_db_int_sm.reset_index(drop=True)
df_db_int_sm

Unnamed: 0,drug_1_name,drug_2_name,weight
0,Progesterone,Warfarin,1
1,Warfarin,Dabigatran etexilate,1
2,Simvastatin,Siltuximab,1
3,Warfarin,Dronedarone,1
4,Floxuridine,Warfarin,1
...,...,...,...
480,Warfarin,Tibolone,1
481,Warfarin,Pentosan polysulfate,1
482,Warfarin,Cefpodoxime,1
483,Warfarin,Dabrafenib,1


In [58]:
db_net_sm = Network(height='700px', width='100%', 
                    bgcolor='white', font_color='black',
                    notebook=True)

# set the physics layout of the network
db_net_sm.barnes_hut()

sources = df_db_int_sm['drug_1_name']
targets = df_db_int_sm['drug_2_name']
weights = df_db_int_sm['weight']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    db_net_sm.add_node(src, src, title=src)
    db_net_sm.add_node(dst, dst, title=dst)
    db_net_sm.add_edge(src, dst, value=w)

# Add neighbor data to node hover data
neighbor_map = db_net_sm.get_adj_list()

# for node in db_net_sm.nodes:
#     node['title'] += ' <b>Neighbors:</b><br>' + '<br>'.join(neighbor_map[node['id']])
#     node['value'] = len(neighbor_map[node['id']])

db_net_sm.set_options("""
var options = {
  "edges": {
    "color": {
      "inherit": true
    },
    "font": {
      "size": 65,
      "strokeWidth": 3
    },
    "scaling": {
      "max": 14
    },
    "smooth": false
  },
  "interaction": {
    "tooltipDelay": 100
  },
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -80000,
      "springLength": 250,
      "springConstant": 0.001
    },
    "minVelocity": 0.75
  }
}
""")

db_net_sm.show('drug_interactions_db.html')