In [38]:
import networkx as nx
import pandas as pd
import requests as rqs
import re
import os
from operator import itemgetter

In [2]:
df = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df.head()

Unnamed: 0,# STITCH 1,STITCH 2,Polypharmacy Side Effect,Side Effect Name
0,CID000002173,CID000003345,C0151714,hypermagnesemia
1,CID000002173,CID000003345,C0035344,retinopathy of prematurity
2,CID000002173,CID000003345,C0004144,atelectasis
3,CID000002173,CID000003345,C0002063,alkalosis
4,CID000002173,CID000003345,C0004604,Back Ache


In [3]:
weights = pd.read_csv("data/weighted_side_effects.csv").drop(
    columns=["Id"]
)
weights.head()

Unnamed: 0,Side effect,Perceived fear score
0,hypermagnesemia,0.471916
1,retinopathy of prematurity,0.45746
2,atelectasis,0.693081
3,alkalosis,0.401998
4,Back Ache,0.289684


In [4]:
weighted_data = pd.merge(
    df, weights, left_on="Side Effect Name", right_on="Side effect", how="left"
)
weighted_data.head()

Unnamed: 0,# STITCH 1,STITCH 2,Polypharmacy Side Effect,Side Effect Name,Side effect,Perceived fear score
0,CID000002173,CID000003345,C0151714,hypermagnesemia,hypermagnesemia,0.471916
1,CID000002173,CID000003345,C0035344,retinopathy of prematurity,retinopathy of prematurity,0.45746
2,CID000002173,CID000003345,C0004144,atelectasis,atelectasis,0.693081
3,CID000002173,CID000003345,C0002063,alkalosis,alkalosis,0.401998
4,CID000002173,CID000003345,C0004604,Back Ache,Back Ache,0.289684


In [5]:
def cid_to_name(cid):

    pattern = r"CID0+(\d+)"
    norm_cid = re.search(pattern, cid).group(1)
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{norm_cid}/property/Title/TXT"
    response = rqs.get(url)

    if response.status_code == 200:
        return response.text.strip()  # Rimuove eventuali spazi o newline extra
    else:
        return "Errore nella richiesta"


def name_to_cid(drug_name):
    url = (
        f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{drug_name}/cids/TXT"
    )

    response = rqs.get(url)

    if response.status_code == 200:
        cid = response.text.strip()
        n_zeros = 12 - len(cid) - 3
        return f"CID{''.join(['0' for n in range(n_zeros)])}{cid}"
    else:
        return f"Skibidi status - {response.status_code}: {drug_name} got no rizz fr fr"

### construct the graph

In [6]:
G = nx.from_pandas_edgelist(
    weighted_data,
    source="# STITCH 1",
    target="STITCH 2",
    edge_attr=["Polypharmacy Side Effect", "Perceived fear score"],
    create_using=nx.MultiGraph(),
)

#### Degree centrality

In a MultiGraph, degree centrality counts the total number of edges connected to a node, including all parallel edges.



What It Measures: The number of drugs a given drug interacts with (i.e., has side effects when combined).

In [26]:
degree_centrality = nx.degree_centrality(G)
sorted_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)
for cid, score in sorted_degree_centrality[:5]:
    print(cid_to_name(cid), score)

Omeprazole 123.19099378881987
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 121.46739130434781
Lorazepam 109.8447204968944
DL-Thyroxine 108.74223602484471
Rofecoxib 108.71894409937887


#### edge multiplicity

In [27]:
edge_multiplicity = {(u, v): len(G[u][v]) for u, v in G.edges()}

sorted_edge_multiplicity = sorted(
    edge_multiplicity.items(), key=lambda x: x[1], reverse=True
)
for nodes, score in sorted_edge_multiplicity[:5]:
    cid_1, cid_2 = nodes
    print(cid_to_name(cid_1), cid_to_name(cid_2), score)

Lansoprazole Omeprazole 540
Lansoprazole Celecoxib 524
Rofecoxib Omeprazole 494
Rofecoxib Zolpidem 478
Gabapentin Celecoxib 476


#### degree per node

In [28]:
degree_per_node = dict(G.degree())
sorted_degree_per_node = sorted(
    degree_per_node.items(), key=lambda x: x[1], reverse=True
)

for cid, score in sorted_degree_per_node[:5]:
    print(cid_to_name(cid), score)

Omeprazole 79335
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 78225
Lorazepam 70740
DL-Thyroxine 70030
Rofecoxib 70015


#### degree per node weighted

In [None]:
weighted_degrees = {}
for node in G.nodes:
    weighted_degree = sum(
        G[u][v][key].get("Perceived fear score", 0)
        for u, v, key in G.edges(node, keys=True)  # Iterate over all edges of the node
    )
    weighted_degrees[node] = weighted_degree

sorted_weighted_degrees = sorted(
    weighted_degrees.items(), key=lambda x: x[1], reverse=True
)

In [None]:
for cid, score in sorted_weighted_degrees[:5]:
    print(cid_to_name(cid), score)

Omeprazole 32807.673254430054
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 32716.54036732952
Lorazepam 29996.598305576954
Zolpidem 29128.420431400013
Acetaminophen 29060.579033013142


#### degree per node weighted NORMALIZED

In [32]:
normalized_weighted_degrees = {}

for node in G.nodes:
    edges = list(G.edges(node, keys=True))

    weighted_degree = sum(
        G[u][v][key].get("Perceived fear score", 0) for u, v, key in edges
    )

    normalized_weighted_degree = weighted_degree / len(edges) 

    normalized_weighted_degrees[node] = normalized_weighted_degree

sorted_normalized_weighted_degrees = sorted(
    normalized_weighted_degrees.items(), key=lambda x: x[1], reverse=True
)

for cid, score in sorted_normalized_weighted_degrees[:5]:
    print(cid, score)

CID000145068 0.5192658859389547
CID000004675 0.5051913038387866
CID000036339 0.5051303354118789
CID000036811 0.49290672432451305
CID000002232 0.4828852222989256


#### Analysis on most used drugs

In [23]:
if os.path.isfile("data/ranked_drugs_matching.csv"):
    most_used = pd.read_csv("data/ranked_drugs_matching.csv")
else:
    rank_drugs = pd.read_csv("data/top_300_drugs.csv")
    drugs_in_nw = df["# STITCH 1"].unique().tolist()
    drugs_in_nw += df["STITCH 2"].unique().tolist()
    drugs_in_nw = list(set(drugs_in_nw))

    drugs_present_in_both = []
    for idx, row in rank_drugs.iterrows():
        drug = row["Drug Name"]
        rank = row["Rank"]
        cid = name_to_cid(drug)
        if cid in drugs_in_nw:
            drugs_present_in_both.append([rank, drug, cid])

    most_used = pd.DataFrame(drugs_present_in_both, columns=["rank", "drug_name", "cid"])
    most_used.to_csv("data/ranked_drugs_matching.csv", index=False)
most_used.head()

Unnamed: 0,rank,drug_name,cid
0,2,Metformin,CID000004091
1,5,Amlodipine,CID000002162
2,6,Metoprolol,CID000004171
3,7,Albuterol,CID000002083
4,8,Losartan,CID000003961


In [49]:
if os.path.isfile("data/top_300_measures.csv"):
    most_used = pd.read_csv("data/top_300_measures.csv")
else:
    def get_tuple_and_index(data, value):
        return next(((i, t[1]) for i, t in enumerate(data) if value in t), None)

    most_used_score = {
        "cid": [],
        "drug_name": [],
        "rank": [],
        "degree": [],
        "weighted_degree": [],
        "averaged_weighted_degree": []
    }

    for idx, row in most_used.iterrows():
        cid = row["cid"]
        most_used_score["cid"].append(cid)
        most_used_score["drug_name"].append(row["drug_name"])
        most_used_score["rank"].append(row["rank"])
        most_used_score["degree"].append(get_tuple_and_index(sorted_degree_centrality, cid))
        most_used_score["weighted_degree"].append(
            get_tuple_and_index(sorted_weighted_degrees, cid)
        )
        most_used_score["averaged_weighted_degree"].append(
            get_tuple_and_index(sorted_normalized_weighted_degrees, cid)
        )

    top_300_measures = pd.DataFrame(most_used_score)
    most_used.to_csv("data/top_300_measures.csv", index=False)

In [44]:
len(G.nodes)

645

In [50]:
top_300_measures

Unnamed: 0,cid,drug_name,rank,degree,weighted_degree,averaged_weighted_degree
0,CID000004091,Metformin,2,"(41, 76.04658385093167)","(40, 20634.244997340862)","(190, 0.42133060393965904)"
1,CID000002162,Amlodipine,5,"(81, 55.754658385093165)","(73, 15244.516577953209)","(159, 0.42456738645221437)"
2,CID000004171,Metoprolol,6,"(16, 92.35093167701862)","(15, 25002.451203504716)","(202, 0.4203929650520348)"
3,CID000002083,Albuterol,7,"(6, 108.17701863354037)","(6, 28841.80992762025)","(250, 0.4140012334226201)"
4,CID000003961,Losartan,8,"(56, 67.1475155279503)","(57, 17884.952621380642)","(257, 0.41359185582361635)"
...,...,...,...,...,...,...
100,CID000003385,Fluorouracil,270,"(238, 20.062111801242235)","(235, 5480.533973272665)","(161, 0.4241899360118162)"
101,CID000010631,Medroxyprogesterone,276,"(126, 41.296583850931675)","(130, 10339.906544410607)","(491, 0.3887913722282612)"
102,CID000002725,Chlorpheniramine,291,"(558, 1.2593167701863353)","(562, 312.8056885316162)","(504, 0.38570368499582763)"
103,CID000003516,Guaifenesin,295,"(186, 28.71894409937888)","(195, 7297.636632122773)","(448, 0.39457348646243706)"
