In [2]:
import pandas as pd


dim_drugs = pd.read_csv("dr_dr_se.csv")
dim_side_effects = pd.read_csv("Se-DoDecagon_sidefx.csv")

dim_side_effects = dim_side_effects.rename(columns={
    'Side Effect': 'side_effect_id',
    'Side Effect Name': 'side_effect_name',
    'Disease Class': 'disease_class'
})


dim_side_effects.head()

Unnamed: 0,side_effect_id,side_effect_name,disease_class
0,C0017152,gastric inflammation,gastrointestinal system disease
1,C0027858,neuroma,benign neoplasm
2,C0041466,Typhoid,bacterial infectious disease
3,C0032807,Post thrombotic syndrome,cardiovascular system disease
4,C0033860,psoriasis,integumentary system disease


In [4]:
import pandas as pd

unique_drugs = pd.unique(dim_drugs[['drug_a', 'drug_b']].values.ravel('K'))

# Create a new DataFrame with the unique values
unique_drugs_df = pd.DataFrame(unique_drugs, columns=['drug_id'])

unique_drugs_df.value_counts

dr_se = pd.read_csv('ChSe-Decagon_monopharmacy.csv')

dr_se = dr_se.rename(columns={
    '# STITCH': 'drug_id',
    'Individual Side Effect': 'side_effect_id',
    'Side Effect Name': 'side_effect'
})

drugs_se = pd.merge(dr_se, unique_drugs_df, on='drug_id', how = 'left')
drugs_se

Unnamed: 0,drug_id,side_effect_id,side_effect
0,CID003062316,C1096328,central nervous system mass
1,CID003062316,C0162830,Photosensitivity reaction
2,CID003062316,C1611725,leukaemic infiltration brain
3,CID003062316,C0541767,platelet adhesiveness abnormal
4,CID003062316,C0242973,Ventricular dysfunction
...,...,...,...
174972,CID005282226,C1096278,anterior chamber inflammation
174973,CID005282226,C0272412,splenic abscess
174974,CID005282226,C0860013,hepatitis b antibody positive
174975,CID005282226,C0853225,international normalised ratio increased


In [5]:
import pandas as pd
import networkx as nx

# Assuming drugs_se is your DataFrame
# Add a column to distinguish between drug and side effect nodes
drugs_se['node_type'] = drugs_se['drug_id'].apply(lambda x: 0)  # 0 for drugs
drugs_se['side_effect_type'] = drugs_se['side_effect_id'].apply(lambda x: 1)  # 1 for side effects

# Create a bipartite graph
B = nx.Graph()
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(drugs_se['drug_id'], bipartite=0)
B.add_nodes_from(drugs_se['side_effect_id'], bipartite=1)
# Add edges
B.add_edges_from([(row['drug_id'], row['side_effect_id']) for idx, row in drugs_se.iterrows()])

# Drug Projection
drug_nodes = [n for n, d in B.nodes(data=True) if d['bipartite'] == 0]
drug_projection = nx.bipartite.weighted_projected_graph(B, drug_nodes)

# Side Effect Projection
#side_effect_nodes = [n for n, d in B.nodes(data=True) if d['bipartite'] == 1]
#side_effect_projection = nx.bipartite.weighted_projected_graph(B, side_effect_nodes)

In [None]:
import matplotlib.pyplot as plt
from node2vec import Node2Vec
import networkx as nx
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler

# Assuming drug_projection is your graph
node2vec = Node2Vec(
    drug_projection,
    dimensions=64,
    walk_length=30,
    num_walks=200,
    workers=4)

model = node2vec.fit(window=10,
                     min_count=1,
                     batch_words=4)

# Get embeddings
embeddings = [model.wv[node] for node in drug_projection.nodes()]
node_list = [node for node in drug_projection.nodes()]