# Machine Learning in Network Science
Group Challenge

***
by: Leonardo Basili, Paul Bédier, Lasse Schmidt

within: MS Data Sciences & Business Analytics

at: CentraleSupélec & ESSEC Business School
***

This notebook covers classical machine learning techniques, namely:
- unsupervised link prediction based on graph-topology (Adamic Adar, Preferential Attachment, Resource-Allocation-Index, ...)
- supervised link prediction using Logistic Regression, SVM, Random Forest & Boosting

### 1. Import Packages

In [43]:
from importlib import reload
reload(analyseData)
reload(prepData)
reload(loadData)
reload(modeling)
reload(autoenc)

<module 'util.autoencoder' from '/Users/macbookpro/Documents/GitHub/Network-Science_Final-Project/util/autoencoder.py'>

In [3]:
# import own scripts
import util.analyse_Data as analyseData
import util.preprocess_Data as prepData
import util.load_Data as loadData
import util.modeling as modeling
import util.autoencoder as autoenc

In [5]:
# parse & handle data
import os
import csv
import numpy as np
import pandas as pd
import networkx as nx # graph data

# modeling
import torch
from torch_geometric.nn import GAE, VGAE
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegressionCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier

# hyperparameter tuning
from ray import tune, air
from ray.tune import CLIReporter, JupyterNotebookReporter
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler, MedianStoppingRule, PopulationBasedTraining
from ray.tune.stopper import MaximumIterationStopper
from ray.tune.experiment.trial import Trial
from ray.tune.sklearn import TuneSearchCV
import hyperopt

# evaluation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_auc_score
from sklearn.model_selection import PredefinedSplit, ShuffleSplit

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
# set matplotlib and seaborn settings for nicer plots
%matplotlib inline

SMALL_SIZE = 6
MEDIUM_SIZE = 8
BIGGER_SIZE = 10

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)    # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)   # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

### 2. Load Data for Modeling

In [56]:
# might take up to a minute
(G, G_train, G_trainval, node_info, train_tf, val_tf, trainval_tf, test_tf, 
 X_train, y_train, X_val, y_val, X_trainval, 
 y_trainval, X_test, y_test) = loadData.load_transform(val_ratio = 0.2, test_ratio = 0.1, n2v_train=True)

Number of positive edges for training: 3802
Number of positive edges for validation: 1085
Number of positive edges for test: 542
Number of edges in original graph: 5429
Number of edges in training graph: 3802
Number of non-existing edges generated: 29971
Number of negative edges for training: 3802
Number of negative edges for validation: 1085
Number of negative edges for test: 542


Computing transition probabilities:   0%|          | 0/2708 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 11.64it/s]


Computing transition probabilities:   0%|          | 0/2708 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:01<00:00,  8.74it/s]


Enriching train data...


  adj = nx.adjacency_matrix(G, nodelist = nodelist)
  adj = nx.adjacency_matrix(G, nodelist = nodelist)
  adj = nx.adjacency_matrix(G, nodelist = nodelist).toarray()


Enriching validation data...
Enriching test data...


In [37]:
# might take up to a minute
data, _ = autoenc.load(val_ratio = 0.2, test_ratio = 0.1)

Number of positive edges for training: 3802
Number of positive edges for validation: 1085
Number of positive edges for test: 542
Number of edges in original graph: 5429
Number of edges in training graph: 3802
Number of non-existing edges generated: 29971
Number of negative edges for training: 3802
Number of negative edges for validation: 1085
Number of negative edges for test: 542
sum of train pos edges: 3802
sum of train neg edges: 3802
sum of val pos edges: 1085
sum of val neg edges: 1085
sum of train pos edges: 3802
sum of train neg edges: 3802
sum of val pos edges: 1085
sum of val neg edges: 1085
Enriching node features...


Computing transition probabilities:   0%|          | 0/2708 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 13.98it/s]
  A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)


Computing transition probabilities:   0%|          | 0/2708 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 11.12it/s]


Create PyTorch Geometric dataset...


In [21]:
# get node embeddings (IF YOU CHANGE SEED = 42, then you need to train new autoencoder as train-val split is different)

# load best autoencoder
path = os.path.abspath("")+"/models/VGNAE_0.001_0_4982a_00000_autoencoder.pt"

model = VGAE(autoenc.Encoder(data.x.size()[1], 64, 1.5, 5, 0.2, 0.01))
model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))

# get embeddings of nodes
embedding = autoenc.get_embeddings(model, data.x, data.trainval_edges)
node_emb = pd.DataFrame(embedding).rename(columns = {val: f"x{val+1}" for val in range(embedding.shape[1])})

# enrich train
train_sim = pd.DataFrame(autoenc.get_similarity(model, data.x, data.train_pos_edges, data.train_edges)).rename(columns = {0: "sim"})
train_tf = (train_tf
    .assign(sim = train_sim.sim.values)
    .assign(dist = lambda df_: [np.linalg.norm(node_emb.loc[u].values-node_emb.loc[v].values) for u, v in zip(df_.source, df_.target)])
)

# enrich val
val_sim = pd.DataFrame(autoenc.get_similarity(model, data.x, data.train_pos_edges, data.val_edges)).rename(columns = {0: "sim"})
val_tf = (val_tf
    .assign(sim = val_sim.sim.values)
    .assign(dist = lambda df_: [np.linalg.norm(node_emb.loc[u].values-node_emb.loc[v].values) for u, v in zip(df_.source, df_.target)])
)

# enrich test
test_sim = pd.DataFrame(autoenc.get_similarity(model, data.x, data.trainval_pos_edges, data.test_edges)).rename(columns = {0: "sim"})
test_tf = (test_tf
    .assign(sim = test_sim.sim.values)
    .assign(dist = lambda df_: [np.linalg.norm(node_emb.loc[u].values-node_emb.loc[v].values) for u, v in zip(df_.source, df_.target)])
)

# split
X_train, y_train = loadData.split_frame(train_tf)
X_val, y_val     = loadData.split_frame(val_tf)
X_test, y_test    = loadData.split_frame(test_tf)

# merge to get trainval data
X_trainval = pd.concat([X_train, X_val])
y_trainval = pd.concat([y_train, y_val])

In [19]:
# plot correlation with target (first train, then validation)
for df in [train_tf, val_tf]:
    analyseData.plot_corr_matrix(df.iloc[:, 2:])

TypeError: plot() got an unexpected keyword argument 'text_kw'

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte

In [39]:
sns.pairplot(val_tf[["sim", "y"]], hue = "y")

<seaborn.axisgrid.PairGrid at 0x7f879be24580>

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte

### 3. Supervised Link Prediction

In [22]:
# prepare datasets for supervised methods, removing features highly correlated with target in train set but not in val set
cols = ['source_DCT', 'target_DCT', 'BCT_diff', 'katz_idx', 'node2vec_1', 'node2vec_2', 'node2vec_3', 'node2vec_4',
       'friendLink', 'PR1', 'PR2', 'sim', 'dist']
X_trainval_sv  =  X_trainval[[col for col in X_trainval.columns  if col in cols]]
X_train_sv  =  X_train[[col for col in X_train.columns  if col in cols]]
X_val_sv  =  X_val[[col for col in X_val.columns  if col in cols]]
X_test_sv  =  X_test[[col for col in X_test.columns  if col in cols]]

# preparing predifined cv split for sklearn
val_fold = [0 if i in X_val_sv.index else -1 for i in X_trainval_sv.index]
ps = PredefinedSplit(val_fold)
ps.get_n_splits()

1

In [23]:
print(len(X_trainval_sv))
print(len(y_trainval))

9774
9774


##### a) Logistic Regression

In [24]:
# logistic regression

clf = LogisticRegressionCV(max_iter = 10000, cv = ps)
clf.fit(X_trainval_sv, y_trainval)

# predict val
y_trainval_hat = clf.predict(X_trainval_sv)
# compute accuracy
acc_trainval = accuracy_score(y_trainval, y_trainval_hat)

# predict val
y_val_hat = clf.predict(X_val_sv)
y_test_hat = clf.predict(X_test_sv)
# compute accuracy
acc_val = accuracy_score(y_val, y_val_hat)
acc_test = accuracy_score(y_test, y_test_hat)
# print performance
print(f"Acc train: {acc_trainval}")
print(f"Acc val: {acc_val}")
print(f"Acc test: {acc_test}")

Acc train: 0.9288929813791692
Acc val: 0.8622119815668203
Acc test: 0.8560885608856088


In [28]:
# predict test
y_test_hat = clf.predict(X_test_sv)

# save test predictions
save_test = modeling.save_test_preds(test_tf[['source', 'target']], test_tf, y_test_hat)
save_test.value_counts()

Predicted
0            596
1            488
dtype: int64

##### b) Random Forest & XGBoost

In [29]:
# search spaces
rdForest_params = {
                "n_estimators": tune.randint(2, 10),
                "max_depth": tune.randint(1, 3),
                "min_samples_split": tune.choice([2, 5, 10]),
                "min_samples_leaf": tune.choice([1, 2, 3, 4]),
                "max_features": tune.uniform(0.2, 1),
                "bootstrap": tune.choice([True, False])
            }

XGBoost_params = {
                "n_estimators": tune.randint(2, 10),
                "max_depth": tune.randint(1, 3),
                "min_child_weight": tune.uniform(1, 10),
                "colsample_bytree": tune.uniform(0.5, 1),
                "subsample": tune.uniform(0.2, 1),
                "eta": tune.loguniform(1e-4, 1e-1)
            }

# loading models
rdForest_mdl = RandomForestClassifier()
XGBoost_mdl = XGBClassifier()

In [30]:
# tuning RandomForestClassifier
tune_search = TuneSearchCV(
    rdForest_mdl,
    rdForest_params,
    search_optimization="hyperopt",
    n_trials=20,
    cv=ps,
    scoring='accuracy', 
    early_stopping=False,
    verbose=1,
    return_train_score=True,
)
tune_search.fit(X_trainval_sv, y_trainval)

# print tuning results
print(f'The TuneSearchCV best hyperparameters : {tune_search.best_params_}')
print(f'The TuneSearchCV best score : {tune_search.best_score_}')

# performance on train set
y_trainval_hat = tune_search.best_estimator_.predict(X_trainval_sv)
acc_trainval = accuracy_score(y_trainval, y_trainval_hat)

# performance on val set
y_val_hat = tune_search.best_estimator_.predict(X_val_sv)
acc_val = accuracy_score(y_val, y_val_hat)

print(f"Acc train: {acc_trainval}")
print(f"Acc val: {acc_val}")

0,1
Current time:,2023-04-23 12:48:18
Running for:,00:00:06.90
Memory:,14.8/16.0 GiB

Trial name,status,loc,bootstrap,cv,early_stop_type,early_stopping,groups,max_depth,max_features,max_iters,metric_name,min_samples_leaf,min_samples_split,n_estimators,n_jobs,return_train_score,scoring/score,iter,total time (s),split0_test_score,average_test_score,split0_train_score
_Trainable_17b85452,TERMINATED,127.0.0.1:14253,False,PredefinedSplit_6bb0,EarlyStopping.N_88e0,False,,1,0.652533,1,average_test_score,2,5,6,1,True,make_scorer(acc_60a0,1,0.0474529,0.837788,0.837788,0.951604
_Trainable_1a2073c0,TERMINATED,127.0.0.1:14264,False,PredefinedSplit_1550,EarlyStopping.N_88e0,False,,1,0.242124,1,average_test_score,3,10,5,1,True,make_scorer(acc_1f70,1,0.028239,0.770968,0.770968,0.856128
_Trainable_79fafcee,TERMINATED,127.0.0.1:14265,True,PredefinedSplit_17c0,EarlyStopping.N_88e0,False,,2,0.453867,1,average_test_score,1,10,2,1,True,make_scorer(acc_1280,1,0.080364,0.872811,0.872811,0.966991
_Trainable_e555cf5e,TERMINATED,127.0.0.1:14253,True,PredefinedSplit_14c0,EarlyStopping.N_88e0,False,,1,0.393491,1,average_test_score,4,10,3,1,True,make_scorer(acc_1b20,1,0.019366,0.849309,0.849309,0.950552
_Trainable_7875056d,TERMINATED,127.0.0.1:14266,False,PredefinedSplit_7040,EarlyStopping.N_88e0,False,,1,0.221702,1,average_test_score,4,5,7,1,True,make_scorer(acc_7700,1,0.047282,0.820737,0.820737,0.841268
_Trainable_7c1a81bc,TERMINATED,127.0.0.1:14253,True,PredefinedSplit_5d00,EarlyStopping.N_88e0,False,,1,0.915287,1,average_test_score,4,5,4,1,True,make_scorer(acc_c2e0,1,0.0302317,0.837788,0.837788,0.951604
_Trainable_51fe831d,TERMINATED,127.0.0.1:14267,True,PredefinedSplit_4f10,EarlyStopping.N_88e0,False,,1,0.310439,1,average_test_score,4,5,3,1,True,make_scorer(acc_4e80,1,0.0222261,0.839631,0.839631,0.957522
_Trainable_dd9ad69e,TERMINATED,127.0.0.1:14253,False,PredefinedSplit_f100,EarlyStopping.N_88e0,False,,1,0.363102,1,average_test_score,1,2,7,1,True,make_scorer(acc_4d60,1,0.033664,0.837788,0.837788,0.951604
_Trainable_57149d82,TERMINATED,127.0.0.1:14268,False,PredefinedSplit_9dc0,EarlyStopping.N_88e0,False,,1,0.428941,1,average_test_score,2,10,4,1,True,make_scorer(acc_9400,1,0.026669,0.727189,0.727189,0.725276
_Trainable_378aaffe,TERMINATED,127.0.0.1:14253,False,PredefinedSplit_a520,EarlyStopping.N_88e0,False,,2,0.651058,1,average_test_score,1,2,6,1,True,make_scorer(acc_a2b0,1,0.0708702,0.948848,0.948848,0.967386


2023-04-23 12:48:14,377	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])), 'early_stop_type': <EarlyStopping.NO_EARLY_STOP: 7>, 'scoring/score': make_scorer(accuracy_score)}
2023-04-23 12:48:14,425	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])), 'early_stop_type': <EarlyStopping.NO_EARLY_STOP: 7>, 'scoring/score': make_scorer(accuracy_score)}
2023-04-23 12:48:14,472	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])), 'early_stop_type': <EarlyStopping.NO_EARLY_STOP: 7>, 'scoring/score': make_scorer(accuracy_score)}
2023-04-23 12:48:14,529	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': Pred

The TuneSearchCV best hyperparameters : {'n_estimators': 6, 'max_depth': 2, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 0.6510575850885153, 'bootstrap': False}
The TuneSearchCV best score : 0.9488479262672812
Acc train: 0.9598935952527112
Acc val: 0.9488479262672811


In [32]:
# predict test
y_test_hat = tune_search.predict(X_test_sv)

# save test predictions
save_test = modeling.save_test_preds(test_tf[['source', 'target']], test_tf, y_test_hat)
save_test.value_counts()

Predicted
0            619
1            465
dtype: int64

In [33]:
# tuning XGBoostClassifier
tune_search = TuneSearchCV(
    XGBoost_mdl,
    XGBoost_params,
    search_optimization="hyperopt",
    n_trials=20,
    cv=ps,
    scoring='accuracy',
    early_stopping=False,
    verbose=1,
    return_train_score=True,
)
tune_search.fit(X_trainval_sv, y_trainval)

# print tuning results
print(f'The TuneSearchCV best hyperparameters : {tune_search.best_params_}')
print(f'The TuneSearchCV best score : {tune_search.best_score_}')

# performance on train set
y_trainval_hat = tune_search.best_estimator_.predict(X_trainval_sv)
acc_trainval = accuracy_score(y_trainval, y_trainval_hat)

# performance on val set
y_val_hat = tune_search.best_estimator_.predict(X_val_sv)
acc_val = accuracy_score(y_val, y_val_hat)

print(f"Acc train: {acc_trainval}")
print(f"Acc val: {acc_val}")

0,1
Current time:,2023-04-23 12:48:56
Running for:,00:00:06.72
Memory:,14.8/16.0 GiB

Trial name,status,loc,colsample_bytree,cv,early_stop_type,early_stopping,eta,groups,max_depth,max_iters,metric_name,min_child_weight,n_estimators,n_jobs,return_train_score,scoring/score,subsample,iter,total time (s),split0_test_score,average_test_score,split0_train_score
_Trainable_8ae62d5b,TERMINATED,127.0.0.1:14331,0.698673,PredefinedSplit_f2b0,EarlyStopping.N_88e0,False,0.00738356,,1,1,average_test_score,4.12184,9,1,True,make_scorer(acc_6850,0.59832,1,0.0614731,0.83871,0.83871,0.951867
_Trainable_dd3a2fa3,TERMINATED,127.0.0.1:14334,0.535174,PredefinedSplit_3c40,EarlyStopping.N_88e0,False,0.00011344,,2,1,average_test_score,2.3167,7,1,True,make_scorer(acc_34c0,0.228422,1,0.050518,0.923963,0.923963,0.96949
_Trainable_86624880,TERMINATED,127.0.0.1:14335,0.706756,PredefinedSplit_a730,EarlyStopping.N_88e0,False,0.00248612,,2,1,average_test_score,8.54984,6,1,True,make_scorer(acc_a9a0,0.543918,1,0.0683062,0.85576,0.85576,0.965676
_Trainable_44730ccc,TERMINATED,127.0.0.1:14336,0.83729,PredefinedSplit_f640,EarlyStopping.N_88e0,False,0.00435993,,2,1,average_test_score,7.26557,9,1,True,make_scorer(acc_a370,0.834846,1,0.0743341,0.929493,0.929493,0.968175
_Trainable_f6379984,TERMINATED,127.0.0.1:14331,0.506702,PredefinedSplit_ccd0,EarlyStopping.N_88e0,False,0.0801221,,2,1,average_test_score,3.50407,5,1,True,make_scorer(acc_c970,0.30988,1,0.036787,0.927189,0.927189,0.966597
_Trainable_51259200,TERMINATED,127.0.0.1:14337,0.604132,PredefinedSplit_1400,EarlyStopping.N_88e0,False,0.00173655,,1,1,average_test_score,2.88933,7,1,True,make_scorer(acc_1a00,0.618203,1,0.0503099,0.839631,0.839631,0.953183
_Trainable_31b6320d,TERMINATED,127.0.0.1:14331,0.82258,PredefinedSplit_dc10,EarlyStopping.N_88e0,False,0.00278052,,1,1,average_test_score,1.97842,4,1,True,make_scorer(acc_d9a0,0.626716,1,0.05475,0.837788,0.837788,0.951604
_Trainable_c84e39fa,TERMINATED,127.0.0.1:14338,0.853117,PredefinedSplit_f730,EarlyStopping.N_88e0,False,0.0254406,,1,1,average_test_score,4.6106,3,1,True,make_scorer(acc_f7c0,0.526345,1,0.038027,0.843779,0.843779,0.954366
_Trainable_0e325feb,TERMINATED,127.0.0.1:14339,0.595901,PredefinedSplit_7520,EarlyStopping.N_88e0,False,0.000269513,,2,1,average_test_score,6.38578,2,1,True,make_scorer(acc_75b0,0.776367,1,0.0459907,0.8447,0.8447,0.854287
_Trainable_1c1d2576,TERMINATED,127.0.0.1:14331,0.541129,PredefinedSplit_3520,EarlyStopping.N_88e0,False,0.00096172,,1,1,average_test_score,5.91799,8,1,True,make_scorer(acc_3310,0.309091,1,0.0532033,0.83871,0.83871,0.953314


2023-04-23 12:48:52,593	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])), 'early_stop_type': <EarlyStopping.NO_EARLY_STOP: 7>, 'scoring/score': make_scorer(accuracy_score)}
2023-04-23 12:48:52,646	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])), 'early_stop_type': <EarlyStopping.NO_EARLY_STOP: 7>, 'scoring/score': make_scorer(accuracy_score)}
2023-04-23 12:48:52,735	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])), 'early_stop_type': <EarlyStopping.NO_EARLY_STOP: 7>, 'scoring/score': make_scorer(accuracy_score)}
2023-04-23 12:48:52,834	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'cv': Pred

The TuneSearchCV best hyperparameters : {'n_estimators': 8, 'max_depth': 2, 'min_child_weight': 3.7064888836950427, 'colsample_bytree': 0.8535591471612474, 'subsample': 0.645970665138671, 'eta': 0.0002273317107317788}
The TuneSearchCV best score : 0.9474654377880184
Acc train: 0.9608144055657868
Acc val: 0.947926267281106


In [34]:
# predict test
y_test_hat = tune_search.predict(X_test_sv)

# save test predictions
save_test = modeling.save_test_preds(test_tf[['source', 'target']], test_tf, y_test_hat)
save_test.value_counts()

Predicted
0            622
1            462
dtype: int64

##### c) SVM

In [35]:
# prepare datasets for SVM
cols = ['node2vec_1', 'node2vec_2', 'node2vec_3', 'node2vec_4']
X_trainval_sv  =  X_trainval[[col for col in X_trainval.columns  if col in cols]]
X_train_sv  =  X_train[[col for col in X_train.columns  if col in cols]]
X_val_sv  =  X_val[[col for col in X_val.columns  if col in cols]]
X_test_sv  =  X_test[[col for col in X_test.columns  if col in cols]]

In [36]:
# prepare SVM pipeline
clf = make_pipeline(StandardScaler(), SVC(C=0.001))
clf.fit(X_train_sv, y_train)

# performance on train set
y_train_hat = clf.predict(X_train_sv)
acc_train = accuracy_score(y_train, y_train_hat)

# performance on val set
y_val_hat = clf.predict(X_val_sv)
acc_val = accuracy_score(y_val, y_val_hat)

print(f"Acc train: {acc_train}")
print(f"Acc val: {acc_val}")


Acc train: 0.7006838506049448
Acc val: 0.6953917050691244
