In [2]:
import networkx as nx
import pandas as pd
import requests as rqs
import re
import os
from operator import itemgetter

In [3]:
df = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df.head()

Unnamed: 0,# STITCH 1,STITCH 2,Polypharmacy Side Effect,Side Effect Name
0,CID000002173,CID000003345,C0151714,hypermagnesemia
1,CID000002173,CID000003345,C0035344,retinopathy of prematurity
2,CID000002173,CID000003345,C0004144,atelectasis
3,CID000002173,CID000003345,C0002063,alkalosis
4,CID000002173,CID000003345,C0004604,Back Ache


In [4]:
weights = pd.read_csv("data/weighted_side_effects.csv").drop(
    columns=["Id"]
)
weights.head()


Unnamed: 0,Side effect,Perceived fear score
0,hypermagnesemia,0.471916
1,retinopathy of prematurity,0.45746
2,atelectasis,0.693081
3,alkalosis,0.401998
4,Back Ache,0.289684


In [5]:
if os.path.exists("data/merged_side_effect.csv"):
    weighted_data = pd.read_csv("data/merged_side_effect.csv")
else:
    weighted_data = pd.merge(
        df, weights, left_on="Side Effect Name", right_on="Side effect", how="left"
    ).drop(columns=["Side effect"])
    weighted_data.head()
    weighted_data.to_csv("data/merged_side_effect.csv", index=False)

In [6]:
def cid_to_name(cid):

    pattern = r"CID0+(\d+)"
    norm_cid = re.search(pattern, cid).group(1)
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{norm_cid}/property/Title/TXT"
    response = rqs.get(url)

    if response.status_code == 200:
        return response.text.strip()  # Rimuove eventuali spazi o newline extra
    else:
        return "Errore nella richiesta"


def name_to_cid(drug_name):
    url = (
        f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{drug_name}/cids/TXT"
    )

    response = rqs.get(url)

    if response.status_code == 200:
        cid = response.text.strip()
        n_zeros = 12 - len(cid) - 3
        return f"CID{''.join(['0' for n in range(n_zeros)])}{cid}"
    else:
        return f"Skibidi status - {response.status_code}: {drug_name} got no rizz fr fr"

### construct the graph

In [8]:
# multigraph
G = nx.from_pandas_edgelist(
    weighted_data,
    source="# STITCH 1",
    target="STITCH 2",
    edge_attr=["Polypharmacy Side Effect", "Perceived fear score"],
    create_using=nx.MultiGraph(),
)
# simple
G_simple = nx.Graph(G)

#### simple degree centrality

What It Measures: The number of drugs a given drug interacts with (i.e., has side effects when combined).

In [13]:
simple_degree_centrality = nx.degree_centrality(G_simple)
sorted_simple_degree_centrality = sorted(simple_degree_centrality.items(), key=lambda x: x[1], reverse=True)
for cid, score in sorted_simple_degree_centrality[:10]:
    print(cid, cid_to_name(cid), score)

CID000004594 Omeprazole 0.9301242236024844
CID000001983 Acetaminophen 0.8975155279503105
CID000000853 DL-Thyroxine 0.8944099378881987
CID000003883 Lansoprazole 0.8571428571428571
CID000005039 Ranitidine 0.8524844720496894
CID000003958 Lorazepam 0.8447204968944099
CID000054454 Simvastatin 0.8338509316770186
CID000004679 Pantoprazole 0.8338509316770186
CID000005732 Zolpidem 0.8338509316770186
CID000002771 Citalopram 0.8322981366459626


In [13]:
N = len(G.nodes)  # Total number of nodes
degree_values = dict(G.degree(weight="weight"))  # Sum of all edge weights

# Compute normalized degree centrality considering multiple edges
degree_centrality_fixed = {
    node: degree / (N - 1) for node, degree in degree_values.items()
}

# Print top 10 nodes
sorted_degree_centrality = sorted(
    degree_centrality_fixed.items(), key=lambda x: x[1], reverse=True
)
for node, centrality in sorted_degree_centrality[:10]:
    print(f"Node: {node}, Fixed Degree Centrality: {centrality:.4f}")

Node: CID000004594, Fixed Degree Centrality: 123.1910
Node: CID000004900, Fixed Degree Centrality: 121.4674
Node: CID000003958, Fixed Degree Centrality: 109.8447
Node: CID000000853, Fixed Degree Centrality: 108.7422
Node: CID000005090, Fixed Degree Centrality: 108.7189
Node: CID000005732, Fixed Degree Centrality: 108.5031
Node: CID000002083, Fixed Degree Centrality: 108.1770
Node: CID000003883, Fixed Degree Centrality: 107.5932
Node: CID000002771, Fixed Degree Centrality: 104.5016
Node: CID000001983, Fixed Degree Centrality: 103.9193


#### multigraph degree centrality
In a MultiGraph, degree centrality counts the total number of edges connected to a node, including all parallel edges.


In [52]:
def multigraph_degree_centrality(G):
    """
    Computes the degree centrality for a multigraph, correctly handling parallel edges
    by ensuring proper normalization.

    Parameters:
    G (nx.MultiGraph): A NetworkX multigraph.

    Returns:
    dict: Node degree centrality values.
    """
    # Step 1: Compute weighted degree for each node (sum of parallel edges)
    degree_multigraph = dict(G.degree())  # Correctly counts multiple edges

    # Step 2: Compute total number of edges in the network (avoiding double counting)
    total_edges = (
        sum(degree_multigraph.values()) / 2
    )  # Divide by 2 to correct for double counting

    # Step 3: Compute degree centrality for each node
    degree_centrality = {
        node: degree / total_edges for node, degree in degree_multigraph.items()
    }

    return degree_centrality


degree_centrality_multi = multigraph_degree_centrality(G)

sorted_degree_centrality_multi = sorted(
    degree_centrality_multi.items(), key=lambda x: x[1], reverse=True
)

print("Multigraph Degree Centrality:")

degree_per_node = dict(G.degree())
sorted_degree_per_node = sorted(
    degree_per_node.items(), key=lambda x: x[1], reverse=True
)


for (node, centrality), (node_2, multi_degree) in zip(sorted_degree_centrality_multi[:10], sorted_degree_per_node[:10]):
    print(
        f"Node: {node}, Degree: {multi_degree}, Multi-edge-Degree-Centrality: {centrality:.4f}"
    )

Multigraph Degree Centrality:
Node: CID000004594, Degree: 79335, Multi-edge-Degree-Centrality: 0.0171
Node: CID000004900, Degree: 78225, Multi-edge-Degree-Centrality: 0.0168
Node: CID000003958, Degree: 70740, Multi-edge-Degree-Centrality: 0.0152
Node: CID000000853, Degree: 70030, Multi-edge-Degree-Centrality: 0.0151
Node: CID000005090, Degree: 70015, Multi-edge-Degree-Centrality: 0.0151
Node: CID000005732, Degree: 69876, Multi-edge-Degree-Centrality: 0.0150
Node: CID000002083, Degree: 69666, Multi-edge-Degree-Centrality: 0.0150
Node: CID000003883, Degree: 69290, Multi-edge-Degree-Centrality: 0.0149
Node: CID000002771, Degree: 67299, Multi-edge-Degree-Centrality: 0.0145
Node: CID000001983, Degree: 66924, Multi-edge-Degree-Centrality: 0.0144


#### edge multiplicity

In [41]:
edge_multiplicity = {(u, v): len(G[u][v]) for u, v in G.edges()}

sorted_edge_multiplicity = sorted(
    edge_multiplicity.items(), key=lambda x: x[1], reverse=True
)
for nodes, score in sorted_edge_multiplicity[:10]:
    cid_1, cid_2 = nodes

    print(
        f"{cid_1} - {cid_2} - {cid_to_name(cid_1)} - {cid_to_name(cid_2)} - {score} "
    )

CID000003883 - CID000004594 - Lansoprazole - Omeprazole - 540 
CID000003883 - CID000002662 - Lansoprazole - Celecoxib - 524 
CID000005090 - CID000004594 - Rofecoxib - Omeprazole - 494 
CID000005090 - CID000005732 - Rofecoxib - Zolpidem - 478 
CID000003446 - CID000002662 - Gabapentin - Celecoxib - 476 
CID000002678 - CID000004900 - Cetirizine - 1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione - 468 
CID000005090 - CID000003958 - Rofecoxib - Lorazepam - 466 
CID000005090 - CID000004900 - Rofecoxib - 1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione - 465 
CID000002662 - CID000004900 - Celecoxib - 1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione - 462 
CID000004635 - CID000004594 - Morphinan-6-one, 4,5-epoxy-14-hydroxy-3-methoxy-17-methyl-, (5alpha)- - Omeprazole - 461 


#### degree per node weighted

In [54]:
weighted_degrees = {}
for node in G.nodes:
    weighted_degree = sum(
        G[u][v][key].get("Perceived fear score", 0)
        for u, v, key in G.edges(node, keys=True)  # Iterate over all edges of the node
    )
    weighted_degrees[node] = weighted_degree

sorted_weighted_degrees = sorted(
    weighted_degrees.items(), key=lambda x: x[1], reverse=True
)

In [58]:
for cid, score in sorted_weighted_degrees[:10]:
    print(f"{cid} -  {cid_to_name(cid)} - {score:.2f}")

CID000004594 -  Omeprazole - 32807.67
CID000004900 -  1,4-Pregnadiene-17alpha,21-diol-3,11,20-trione - 32716.54
CID000003958 -  Lorazepam - 29996.60
CID000005732 -  Zolpidem - 29128.42
CID000001983 -  Acetaminophen - 29060.58
CID000000853 -  DL-Thyroxine - 28999.38
CID000002083 -  Albuterol - 28841.81
CID000005090 -  Rofecoxib - 28798.69
CID000003883 -  Lansoprazole - 28628.52
CID000002771 -  Citalopram - 27862.70


#### averaged weighted degree

In [56]:
average_weighted_degrees = {}

for node in G.nodes:
    edges = list(G.edges(node, keys=True))

    weighted_degree = sum(
        G[u][v][key].get("Perceived fear score", 0) for u, v, key in edges
    )

    average_weighted_degree = weighted_degree / len(edges) 

    average_weighted_degrees[node] = average_weighted_degree

sorted_average_weighted_degrees = sorted(
    average_weighted_degrees.items(), key=lambda x: x[1], reverse=True
)

for cid, score in sorted_average_weighted_degrees[:10]:
    print(f"{cid} & {cid_to_name(cid)} & {score:.3f}")

CID000145068 & Nitric Oxide & 0.519
CID000004675 & [17-acetyloxy-10,13-dimethyl-2,16-bis(1-methylpiperidin-1-ium-1-yl)-2,3,4,5,6,7,8,9,11,12,14,15,16,17-tetradecahydro-1H-cyclopenta[a]phenanthren-3-yl] acetate & 0.505
CID000036339 & Ethyl 1-(1-phenylethyl)-1H-imidazole-5-carboxylate & 0.505
CID000036811 & Dobutamine & 0.493
CID000002232 & 1-[2-[(3-Methyl-1,2,3,4-tetrahydroquinoline-8-yl)sulfonylamino]-5-guanidinopentanoyl]-4-methylpiperidine-2-carboxylic acid & 0.483
CID005282044 &  & 0.481
CID000005486 & 2-(Butylsulfonylamino)-3-[4-(4-piperidin-4-ylbutoxy)phenyl]propanoic acid & 0.479
CID000041693 & Sufentanil & 0.479
CID000004993 & Pyrimethamine & 0.478
CID000003562 & Halothane & 0.477


#### Analysis on most used drugs

In [19]:
if os.path.isfile("data/ranked_drugs_matching.csv"):
    top_300 = pd.read_csv("data/ranked_drugs_matching.csv")
else:
    rank_drugs = pd.read_csv("data/top_300_drugs.csv")
    drugs_in_nw = df["# STITCH 1"].unique().tolist()
    drugs_in_nw += df["STITCH 2"].unique().tolist()
    drugs_in_nw = list(set(drugs_in_nw))

    drugs_present_in_both = []
    for idx, row in rank_drugs.iterrows():
        drug = row["Drug Name"]
        rank = row["Rank"]
        cid = name_to_cid(drug)
        if cid in drugs_in_nw:
            drugs_present_in_both.append([rank, drug, cid])

    top_300 = pd.DataFrame(drugs_present_in_both, columns=["rank", "drug_name", "cid"])
    top_300.to_csv("data/ranked_drugs_matching.csv", index=False)
top_300.head()

Unnamed: 0,rank,drug_name,cid
0,2,Metformin,CID000004091
1,5,Amlodipine,CID000002162
2,6,Metoprolol,CID000004171
3,7,Albuterol,CID000002083
4,8,Losartan,CID000003961


In [64]:
def to_percentage(rank):
    return int(100 * (1 - (rank - 1) / (645 - 1)))


if os.path.isfile("data/ranked_drugs_matching.csv"):
    top_300_matched = pd.read_csv("data/ranked_drugs_matching.csv")
    simple_degree_column = []
    multi_degree_column = []
    weighted_degree_column = []
    average_weighted_degree_column = []

    for idx, row in top_300_matched.iterrows():
        cid = row['cid']
        # simple degree
        simple_degree_rank = [x[0] for x in sorted_simple_degree_centrality].index(cid)
        simple_degree_column.append(f'{to_percentage(simple_degree_rank)}%')

        # multigraph degree
        multi_degree_rank = [x[0] for x in sorted_degree_centrality_multi].index(cid)
        multi_degree_column.append(f"{to_percentage(multi_degree_rank)}%")

        # weighted degree
        sorted_weighted_rank = [x[0] for x in sorted_weighted_degrees].index(cid)
        weighted_degree_column.append(f"{to_percentage(sorted_weighted_rank)}%")

        # average weighted degree
        average_weighted_rank = [
            x[0] for x in sorted_average_weighted_degrees
        ].index(cid)
        average_weighted_degree_column.append(
            f"{to_percentage(average_weighted_rank)}%"
        )

    top_300_matched["simple_degree"] = simple_degree_column
    top_300_matched["multi_degree"] = multi_degree_column
    top_300_matched["weighted_degree"] = weighted_degree_column
    top_300_matched["average_weighted"] = average_weighted_degree_column

if os.path.isfile("data/top_300_measures.csv"):
    top_300_measures = pd.read_csv("data/top_300_measures.csv")
else:
    top_300_matched.to_csv("data/top_300_measures.csv", index=False)
top_300_measures[:10]

Unnamed: 0,rank,drug_name,cid,simple_degree,multi_degree,weighted_degree,average_weighted
0,2,Metformin,CID000004091,93%,93%,93%,70%
1,5,Amlodipine,CID000002162,91%,87%,88%,75%
2,6,Metoprolol,CID000004171,98%,97%,97%,68%
3,7,Albuterol,CID000002083,97%,99%,99%,61%
4,8,Losartan,CID000003961,92%,91%,91%,60%
5,9,Omeprazole,CID000004594,100%,100%,100%,59%
6,10,Gabapentin,CID000003446,93%,96%,96%,46%
7,12,Hydrochlorothiazide,CID000003639,94%,94%,94%,51%
8,16,Pantoprazole,CID000004679,99%,98%,98%,68%
9,18,Trazodone,CID000005533,67%,77%,76%,36%
