In [1]:
import networkx as nx
import pandas as pd
import requests as rqs
import re
import os

In [2]:
df = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df.head()

Unnamed: 0,# STITCH 1,STITCH 2,Polypharmacy Side Effect,Side Effect Name
0,CID000002173,CID000003345,C0151714,hypermagnesemia
1,CID000002173,CID000003345,C0035344,retinopathy of prematurity
2,CID000002173,CID000003345,C0004144,atelectasis
3,CID000002173,CID000003345,C0002063,alkalosis
4,CID000002173,CID000003345,C0004604,Back Ache


In [38]:
def cid_to_name(cid):

    pattern = r"CID0+(\d+)"
    norm_cid = re.search(pattern, cid).group(1)
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{norm_cid}/property/Title/TXT"
    response = rqs.get(url)

    if response.status_code == 200:
        return response.text.strip()  # Rimuove eventuali spazi o newline extra
    else:
        return "Errore nella richiesta"


def name_to_cid(drug_name):
    url = (
        f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{drug_name}/cids/TXT"
    )

    response = rqs.get(url)

    if response.status_code == 200:
        cid = response.text.strip()
        n_zeros = 12 - len(cid) - 3
        return f"CID{''.join(['0' for n in range(n_zeros)])}{cid}"
    else:
        return f"Skibidi status - {response.status_code}: {drug_name} got no rizz fr fr"

### construct the graph

In [5]:
G = nx.from_pandas_edgelist(
    df,
    source="# STITCH 1",
    target="STITCH 2",
    edge_attr=["Polypharmacy Side Effect"],
    create_using=nx.MultiGraph(),
)

#### Degree centrality

In a MultiGraph, degree centrality counts the total number of edges connected to a node, including all parallel edges.



What It Measures: The number of drugs a given drug interacts with (i.e., has side effects when combined).

In [43]:
degree_centrality = nx.degree_centrality(G)
sort_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)
for cid, score in sort_degree_centrality[:5]:
    print(cid_to_name(cid), score)

Omeprazole 123.19099378881987
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 121.46739130434781
Lorazepam 109.8447204968944
DL-Thyroxine 108.74223602484471
Rofecoxib 108.71894409937887


#### edge multiplicity

In [44]:
edge_multiplicity = {(u, v): len(G[u][v]) for u, v in G.edges()}

sort_edge_multiplicity = sorted(
    edge_multiplicity.items(), key=lambda x: x[1], reverse=True
)
for nodes, score in sort_edge_multiplicity[:5]:
    cid_1, cid_2 = nodes
    print(cid_to_name(cid_1), cid_to_name(cid_2), score)

Lansoprazole Omeprazole 540
Lansoprazole Celecoxib 524
Rofecoxib Omeprazole 494
Rofecoxib Zolpidem 478
Gabapentin Celecoxib 476


#### degree per node

In [46]:
degree_per_node = dict(G.degree())
sorted_degree_per_node = sorted(
    degree_per_node.items(), key=lambda x: x[1], reverse=True
)

for cid, score in sorted_degree_per_node[:5]:
    print(cid_to_name(cid), score)

Omeprazole 79335
1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione 78225
Lorazepam 70740
DL-Thyroxine 70030
Rofecoxib 70015


In [47]:
cliques = list(nx.find_cliques(G))

In [49]:
len(cliques)

23043739

#### Analysis on most used drugs

In [42]:
if os.path.isfile("data/ranked_drugs_matching.csv"):
    most_used = pd.read_csv("data/ranked_drugs_matching.csv")
else:
    rank_drugs = pd.read_csv("data/top_300_drugs.csv")
    drugs_in_nw = df["# STITCH 1"].unique().tolist()
    drugs_in_nw += df["STITCH 2"].unique().tolist()
    drugs_in_nw = list(set(drugs_in_nw))

    drugs_present_in_both = []
    for idx, row in rank_drugs.iterrows():
        drug = row["Drug Name"]
        rank = row["Rank"]
        cid = name_to_cid(drug)
        if cid in drugs_in_nw:
            drugs_present_in_both.append([rank, drug, cid])

    most_used = pd.DataFrame(drugs_present_in_both, columns=["rank", "drug_name", "cid"])
    most_used.to_csv("data/ranked_drugs_matching.csv", index=False)

In [40]:
most_used.head()

Unnamed: 0,rank,drug_name
0,2,Metformin
1,5,Amlodipine
2,6,Metoprolol
3,7,Albuterol
4,8,Losartan


In [None]:
most_used_score = {}
for idx, row in most_used.iterrows():
   row["drug_name"]