In [None]:
import networkx as nx
import numpy as np
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# generate a random graph
G = pd.read_csv("leanna/markov_chain_graph/graph.pkl")

# generate the random walks using DeepWalk
walks = []
for node in G.nodes():
    walks.append([str(n) for n in nx.random_walk(G, node, length=10)])

# train the Word2Vec model on the random walks
model = Word2Vec(walks, size=64, window=5, min_count=0, sg=1, hs=0, negative=5, workers=4)

# generate the edge features using the node embeddings
X = []
y = []
for edge in G.edges():
    node1, node2 = edge
    node1_embedding = model.wv[str(node1)]
    node2_embedding = model.wv[str(node2)]
    feature_vector = np.concatenate([node1_embedding, node2_embedding])
    X.append(feature_vector)
    y.append(G.has_edge(node1, node2))

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# train a logistic regression model on the edge features
clf = LogisticRegression()
clf.fit(X_train, y_train)

# make predictions on the test set and evaluate the accuracy
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: {:.4f}'.format(accuracy))
