In [1]:
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd 
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

## Load data

In [2]:
# Read embeddings
emb_df = pd.read_csv('embedding/[128, 20, 100, 4, 1].txt', sep=' ', skiprows=[0], header=None)
emb_df = emb_df.sort_values(by=[0])
emb_df = emb_df.drop(emb_df.columns[0], axis=1)

In [3]:
# Read nodes file for labels

nodes = pd.read_csv('data/cora.content', sep='\t', header=None)
nodes = nodes.rename(columns={0:'id', 1434:'class'})
y = nodes[['id','class']]
y = y.sort_values(by=['id'])
y = np.ravel(np.array(y[['class']]))
y

array(['Genetic_Algorithms', 'Genetic_Algorithms',
       'Reinforcement_Learning', ..., 'Rule_Learning', 'Rule_Learning',
       'Rule_Learning'], dtype=object)

## Grid search for knn

In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()
parameters = {'n_neighbors':[4,8,10], 'algorithm':['ball_tree', 'kd_tree', 'brute']}
knn_grid = GridSearchCV(knn, parameters)
knn_grid.fit(emb_df, y)

GridSearchCV(estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['ball_tree', 'kd_tree', 'brute'],
                         'n_neighbors': [4, 8, 10]})

In [5]:
knn_grid.cv_results_

{'mean_fit_time': array([0.0218833 , 0.02648177, 0.02945766, 0.04238605, 0.04366493,
        0.04061556, 0.        , 0.00312357, 0.00312281]),
 'std_fit_time': array([0.00764693, 0.00575704, 0.00150132, 0.00585139, 0.00620458,
        0.00765318, 0.        , 0.00624714, 0.00624561]),
 'mean_score_time': array([0.16061869, 0.15992365, 0.16701026, 0.24914446, 0.2343214 ,
        0.23744378, 0.03124371, 0.03124356, 0.03123622]),
 'std_score_time': array([6.13374954e-03, 6.18645169e-03, 7.48640492e-03, 4.55242843e-03,
        4.36857792e-05, 6.24891854e-03, 2.40122429e-05, 2.40185863e-05,
        1.50807243e-05]),
 'param_algorithm': masked_array(data=['ball_tree', 'ball_tree', 'ball_tree', 'kd_tree',
                    'kd_tree', 'kd_tree', 'brute', 'brute', 'brute'],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_value='?',
             dtype=object),
 'param_n_neighbors': masked_array(data=[4, 8, 10, 4, 8, 10, 4, 8, 

In [13]:
knn_df = pd.concat([pd.DataFrame(knn_grid.cv_results_["params"]),pd.DataFrame(knn_grid.cv_results_["mean_test_score"],
                                                                     columns=["Accuracy"])],axis=1)
import dataframe_image as dfi
knn_styled = knn_df.style.background_gradient(cmap='Greys')
knn_styled

Unnamed: 0,algorithm,n_neighbors,Accuracy
0,ball_tree,4,0.822755
1,ball_tree,8,0.814998
2,ball_tree,10,0.810934
3,kd_tree,4,0.822755
4,kd_tree,8,0.814998
5,kd_tree,10,0.810934
6,brute,4,0.822755
7,brute,8,0.814998
8,brute,10,0.810934


## Grid search for SVM

In [6]:
from sklearn import svm

svm = svm.SVC()
parameters = {'kernel':('linear', 'rbf'), 'C':[0.5, 1, 5, 10]}
svm_grid = GridSearchCV(svm, parameters)
svm_grid.fit(emb_df, y)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.5, 1, 5, 10], 'kernel': ('linear', 'rbf')})

In [7]:
svm_grid.cv_results_

{'mean_fit_time': array([0.10168781, 0.15656862, 0.12110467, 0.15342274, 0.18343258,
        0.139676  , 0.20662131, 0.13757663]),
 'std_fit_time': array([0.00671688, 0.00092586, 0.00958634, 0.00644781, 0.01610334,
        0.00126148, 0.01416433, 0.00531446]),
 'mean_score_time': array([0.02534623, 0.08940072, 0.02287183, 0.08984823, 0.02334213,
        0.08209691, 0.02127547, 0.08374109]),
 'std_score_time': array([0.00861888, 0.00986082, 0.00380555, 0.00715113, 0.00079857,
        0.00630853, 0.00285448, 0.00828004]),
 'param_C': masked_array(data=[0.5, 0.5, 1, 1, 5, 5, 10, 10],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf',
                    'linear', 'rbf'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 0.5, 'kerne

In [14]:
svm_df = pd.concat([pd.DataFrame(svm_grid.cv_results_["params"]),pd.DataFrame(svm_grid.cv_results_["mean_test_score"],
                                                                     columns=["Accuracy"])],axis=1)
svm_styled = svm_df.style.background_gradient(cmap='Greys')
svm_styled

Unnamed: 0,C,kernel,Accuracy
0,0.5,linear,0.839005
1,0.5,rbf,0.828283
2,1.0,linear,0.838636
3,1.0,rbf,0.848229
4,5.0,linear,0.815001
5,5.0,rbf,0.86005
6,10.0,linear,0.810563
7,10.0,rbf,0.85451
