In [3]:
import networkx as nx
import pandas as pd
import requests as rqs
import re

In [9]:
df = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df.head()

Unnamed: 0,# STITCH 1,STITCH 2,Polypharmacy Side Effect,Side Effect Name
0,CID000002173,CID000003345,C0151714,hypermagnesemia
1,CID000002173,CID000003345,C0035344,retinopathy of prematurity
2,CID000002173,CID000003345,C0004144,atelectasis
3,CID000002173,CID000003345,C0002063,alkalosis
4,CID000002173,CID000003345,C0004604,Back Ache


In [10]:
def what_we_poppin_boyz(cid):

    pattern = r"CID0+(\d+)"
    norm_cid = re.search(pattern, cid).group(1)
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{norm_cid}/property/Title/TXT"
    response = rqs.get(url)

    if response.status_code == 200:
        return response.text.strip()  # Rimuove eventuali spazi o newline extra
    else:
        return "Errore nella richiesta"

In [11]:
G = nx.from_pandas_edgelist(
    df,
    source="# STITCH 1",
    target="STITCH 2",
    edge_attr=["Polypharmacy Side Effect"],
    create_using=nx.MultiGraph(),
)

In [9]:
print(len(G["CID000002173"]["CID000003345"]))

56


In [12]:
degree_centrality = nx.degree_centrality(G)


In a MultiGraph, degree centrality counts the total number of edges connected to a node, including all parallel edges.



What It Measures: The number of drugs a given drug interacts with (i.e., has side effects when combined).

In [13]:
top_5 = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

In [13]:
top_5

[('CID000004594', 123.19099378881987),
 ('CID000004900', 121.46739130434781),
 ('CID000003958', 109.8447204968944),
 ('CID000000853', 108.74223602484471),
 ('CID000005090', 108.71894409937887)]

In [14]:
for cid, score in top_5:
   print(what_we_poppin_boyz(cid), score)

Omeprazole 123.19099378881987
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 121.46739130434781
Lorazepam 109.8447204968944
DL-Thyroxine 108.74223602484471
Rofecoxib 108.71894409937887


Betweenness centrality measures how often a node appears on the shortest paths between other nodes.

•	A high betweenness centrality indicates a drug acts as a “bridge” between different groups of interacting drugs.

•	Critical for: Identifying drugs that can spread side-effect risks across otherwise disconnected groups. These could be key targets for safety interventions.

In [15]:
betweenness_centrality = nx.betweenness_centrality(G)
top_5_between = sorted(
    betweenness_centrality.items(), key=lambda x: x[1], reverse=True
)[:5]

In [16]:
for cid, score in top_5_between:
    print(what_we_poppin_boyz(cid), score)

DL-Thyroxine 0.022849657819124902
Omeprazole 0.022660226525834916
Acetaminophen 0.01894759085405763
Ranitidine 0.012737688029971331
Citalopram 0.012228708176325582


In [17]:
edge_multiplicity = {(u, v): len(G[u][v]) for u, v in G.edges()}

In [19]:
top_5_multiplicity = sorted(
    edge_multiplicity.items(), key=lambda x: x[1], reverse=True
)[:5]
top_5_multiplicity

[(('CID000003883', 'CID000004594'), 540),
 (('CID000003883', 'CID000002662'), 524),
 (('CID000005090', 'CID000004594'), 494),
 (('CID000005090', 'CID000005732'), 478),
 (('CID000003446', 'CID000002662'), 476)]

In [21]:
for nodes, score in top_5_multiplicity:
    cid_1, cid_2 = nodes
    print(what_we_poppin_boyz(cid_1), what_we_poppin_boyz(cid_2), score)

Lansoprazole Omeprazole 540
Lansoprazole Celecoxib 524
Rofecoxib Omeprazole 494
Rofecoxib Zolpidem 478
Gabapentin Celecoxib 476


In [23]:
edge_count_per_node = dict(G.degree())

top_5_count = sorted(
    edge_count_per_node.items(), key=lambda x: x[1], reverse=True
)[:5]

In [26]:
for cid, score in top_5_count:
    print(what_we_poppin_boyz(cid), score)

Omeprazole 79335
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 78225
Lorazepam 70740
DL-Thyroxine 70030
Rofecoxib 70015


In [1]:
def name_to_cid(drug_name):
    url = (
        f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{drug_name}/cids/TXT"
    )
    response = rqs.get(url)

    if response.status_code == 200:
        return response.text.strip()
    else:
        return f"Skibidi status - {response.status_code}: {drug_name} got no rizz fr fr"

In [12]:
n_zeros = 12 - len(name_to_cid("Levothyroxine")) - 3

f"CID{''.join(['0' for n in range(n_zeros)])}{name_to_cid("Levothyroxine")}"

'CID000005819'

In [5]:
len("CID000003883")

12