In [1]:
import networkx as nx

In [2]:
from nodevectors import Node2Vec
import pandas as pd
import csrgraph as cg

In [3]:
for network_type in ["DTI", "DDI", "PPI", "GDI"]:
    print(f"Learning embeddings for {network_type}")
    for i in range(1, 6):
        print(f"Fold {i}")
        df = pd.read_csv(f"./{network_type}/fold{i}/train.csv")
        edges = df.values[:, [1,2]].tolist()
        G = nx.from_edgelist(edges)
        G = cg.csrgraph(G)
        # Fit embedding model to graph
        g2v = Node2Vec(n_components=64, walklen=20, 
                       w2vparams={"window":5, "negative":5, "iter":10,
                       "batch_words":128})
        # way faster than other node2vec implementations
        # Graph edge weights are handled automatically
        g2v.fit(G)
        g2v.save_vectors(f"./{network_type}/fold{i}/{network_type.lower()}.emb")

Learning embeddings for DTI
Fold 1
Making walks... Done, T=3.15
Mapping Walk Names... Done, T=2.49
Training W2V... Done, T=150.66
Fold 2
Making walks... Done, T=0.80
Mapping Walk Names... Done, T=4.49
Training W2V... Done, T=150.85
Fold 3
Making walks... Done, T=0.61
Mapping Walk Names... Done, T=2.23
Training W2V... Done, T=143.56
Fold 4
Making walks... Done, T=0.66
Mapping Walk Names... Done, T=2.68
Training W2V... Done, T=114.81
Fold 5
Making walks... Done, T=0.58
Mapping Walk Names... Done, T=1.67
Training W2V... Done, T=107.23
Learning embeddings for DDI
Fold 1
Making walks... Done, T=0.20
Mapping Walk Names... Done, T=0.48
Training W2V... Done, T=30.04
Fold 2
Making walks... Done, T=0.46
Mapping Walk Names... Done, T=0.93
Training W2V... Done, T=24.23
Fold 3
Making walks... Done, T=0.15
Mapping Walk Names... Done, T=0.38
Training W2V... Done, T=20.37
Fold 4
Making walks... Done, T=0.17
Mapping Walk Names... Done, T=0.31
Training W2V... Done, T=16.26
Fold 5
Making walks... Done, T