### Complex Networks Midterm Project 

In [34]:
import networkx as nx
import numpy as np
import pandas as pd

In [35]:
dataset = np.array([(nx.gnp_random_graph(10, 0.9, seed=None, directed=False), '1') for _ in range(30)] + [(nx.gnp_random_graph(10, 0.1, seed=None, directed=False), '0') for _ in range(30)], dtype=object)

In [36]:
from functools import partial
from node2vec import Node2Vec

In [37]:
%%capture

graphs = [graph[0] for graph in dataset]

# create model instance with partial function
model_instance = partial(Node2Vec, dimensions=1, walk_length=30, num_walks=200, workers=4)

embeddings_list = []
for graph in graphs:
    model = model_instance(graph=graph).fit(window=10, min_count=1, batch_words=4)
    embeddings = [[model.wv.get_vector(str(node))] for node in graph.nodes]
    embeddings_list.append(embeddings)

# Convert the embeddings list to a NumPy array
node_embeddings = np.array(embeddings_list)

Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 223.34it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 161.37it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 162.03it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 144.06it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 401.72it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 321.85it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 331.00it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 337.14it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 422.39it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 415.83it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:00<00:00, 482.10it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:00<00:00, 392.11it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:00<00:00, 421.40it/s]
Generating w

In [38]:
# convert the node embeddings to a dataframe with shape (60, 10) and type float64 (required by the classifier)
node_embeddings_df = pd.DataFrame(np.squeeze(node_embeddings).astype('float64'))

# add the target variable to the dataframe (required by the classifier)
node_embeddings_df['target'] = [graph[1] for graph in dataset]

### DecisionTree and SVM

In [39]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score, KFold

In [40]:
# select the features and the target variable and split the dataset into training and test sets
X = node_embeddings_df.iloc[:, :-1]
y = node_embeddings_df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

### Decision Tree

In [41]:
model = DecisionTreeClassifier(random_state=62)
model.fit(X_train,y_train)

cross_val = KFold(n_splits=30, random_state=62, shuffle=True)
scores = cross_val_score(model, X, y, cv= cross_val, n_jobs=-1)
print(np.abs(np.mean(scores)))

0.9166666666666666


### SVM

In [42]:
model = make_pipeline(StandardScaler(), SVC(gamma='auto'))
model.fit(X_train,y_train)

cross_val = KFold(n_splits=50, random_state=42, shuffle=True)
scores = cross_val_score(model, X, y, cv= cross_val, n_jobs=-1)
print(np.abs(np.mean(scores)))

0.99
