In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import svm, metrics
from sklearn.neighbors import KNeighborsClassifier, DistanceMetric
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from preprocess import preprocess, load_dataset

# Classification of cell types with RNA-seq data

## Preparation

In [32]:
#Load data and preprocess
datadir = 'data/muraro'
data = load_dataset(datadir, 'muraro')
# datadir = 'data/FACS'
# data = load_dataset(datadir, 'tabula-muris')
X, y = preprocess(data)
num_labels = len(np.unique(y))
num_features = X.shape[1]

Reading Data...  -  Aorta
Reading Data...  -  Bladder
Reading Data...  -  Brain_Myeloid
Reading Data...  -  Brain_Non-Myeloid
Reading Data...  -  Diaphragm
Reading Data...  -  Fat
Reading Data...  -  Heart
Reading Data...  -  Kidney
Reading Data...  -  Large_Intestine
Reading Data...  -  Limb_Muscle
Reading Data...  -  Liver
Reading Data...  -  Lung
Reading Data...  -  Mammary_Gland
Reading Data...  -  Marrow
Reading Data...  -  Pancreas
Reading Data...  -  Skin
Reading Data...  -  Spleen
Reading Data...  -  Thymus
Reading Data...  -  Tongue
Reading Data...  -  Trachea


In [12]:
X = X.astype(float)
X = np.nan_to_num(X)

In [3]:
# Train/Test splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=True)

In [15]:
# RUN THESE CELLS IF USING TABULA MURIS
store_x_train = X_train # 38206
store_y_train = y_train
store_x_test = X_test # 6743
store_y_test = y_test

In [25]:
# RUN THESE CELLS IF USING TABULA MURIS
X_train = store_x_train[:7000]
y_train = store_y_train[:7000]
X_test = store_x_test[:1200]
y_test = store_y_test[:1200]

## Algorithms

In [4]:
def evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='macro'):
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    precision, recall, f1, support = metrics.precision_recall_fscore_support(y_test, prediction,
                                                                         average=average,
                                                                         zero_division='warn')
    print(f"Classification report for classifier {clf}:\n"
      f"{metrics.classification_report(y_test, prediction)}\n")
    return precision, recall, f1

**Linear SVM**

In [9]:
from sklearn.model_selection import GridSearchCV
denom = len(X_train[0]) * X_train.var()
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1,100000/denom, 10000/denom, 1000/denom, 100/denom, 10/denom, 1/denom],
              'kernel': ['linear']}

grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 35 candidates, totalling 175 fits
[CV] C=0.1, gamma=1, kernel=linear ...................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.975, total=   5.2s
[CV] C=0.1, gamma=1, kernel=linear ...................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.2s remaining:    0.0s


[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.994, total=   5.0s
[CV] C=0.1, gamma=1, kernel=linear ...................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   10.3s remaining:    0.0s


[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.986, total=   5.0s
[CV] C=0.1, gamma=1, kernel=linear ...................................
[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.983, total=   5.2s
[CV] C=0.1, gamma=1, kernel=linear ...................................
[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.967, total=   5.0s
[CV] C=0.1, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=0.1, gamma=0.029224700105547258, kernel=linear, score=0.975, total=   4.5s
[CV] C=0.1, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=0.1, gamma=0.029224700105547258, kernel=linear, score=0.994, total=   5.1s
[CV] C=0.1, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=0.1, gamma=0.029224700105547258, kernel=linear, score=0.986, total=   4.9s
[CV] C=0.1, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=0.1, gamma=0.029224700105547258, kernel=linear, score=0.983, total=   5.2s
[CV] C=0.1, gamma=0.029224700

[CV]  C=1, gamma=2.9224700105547256e-05, kernel=linear, score=0.994, total=   5.0s
[CV] C=1, gamma=2.9224700105547256e-05, kernel=linear ................
[CV]  C=1, gamma=2.9224700105547256e-05, kernel=linear, score=0.986, total=   4.9s
[CV] C=1, gamma=2.9224700105547256e-05, kernel=linear ................
[CV]  C=1, gamma=2.9224700105547256e-05, kernel=linear, score=0.983, total=   5.1s
[CV] C=1, gamma=2.9224700105547256e-05, kernel=linear ................
[CV]  C=1, gamma=2.9224700105547256e-05, kernel=linear, score=0.967, total=   5.0s
[CV] C=1, gamma=2.9224700105547255e-06, kernel=linear ................
[CV]  C=1, gamma=2.9224700105547255e-06, kernel=linear, score=0.975, total=   4.4s
[CV] C=1, gamma=2.9224700105547255e-06, kernel=linear ................
[CV]  C=1, gamma=2.9224700105547255e-06, kernel=linear, score=0.994, total=   5.0s
[CV] C=1, gamma=2.9224700105547255e-06, kernel=linear ................
[CV]  C=1, gamma=2.9224700105547255e-06, kernel=linear, score=0.986, total= 

[CV]  C=100, gamma=0.029224700105547258, kernel=linear, score=0.975, total=   6.0s
[CV] C=100, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=100, gamma=0.029224700105547258, kernel=linear, score=0.994, total=   5.6s
[CV] C=100, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=100, gamma=0.029224700105547258, kernel=linear, score=0.986, total=   6.4s
[CV] C=100, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=100, gamma=0.029224700105547258, kernel=linear, score=0.983, total=   6.0s
[CV] C=100, gamma=0.029224700105547258, kernel=linear ................
[CV]  C=100, gamma=0.029224700105547258, kernel=linear, score=0.967, total=   5.6s
[CV] C=100, gamma=0.002922470010554726, kernel=linear ................
[CV]  C=100, gamma=0.002922470010554726, kernel=linear, score=0.975, total=   4.7s
[CV] C=100, gamma=0.002922470010554726, kernel=linear ................
[CV]  C=100, gamma=0.002922470010554726, kernel=linear, score=0.994, total= 

[CV]  C=1000, gamma=2.9224700105547256e-05, kernel=linear, score=0.967, total=   5.3s
[CV] C=1000, gamma=2.9224700105547255e-06, kernel=linear .............
[CV]  C=1000, gamma=2.9224700105547255e-06, kernel=linear, score=0.975, total=   5.0s
[CV] C=1000, gamma=2.9224700105547255e-06, kernel=linear .............
[CV]  C=1000, gamma=2.9224700105547255e-06, kernel=linear, score=0.994, total=   5.6s
[CV] C=1000, gamma=2.9224700105547255e-06, kernel=linear .............
[CV]  C=1000, gamma=2.9224700105547255e-06, kernel=linear, score=0.986, total=   5.3s
[CV] C=1000, gamma=2.9224700105547255e-06, kernel=linear .............
[CV]  C=1000, gamma=2.9224700105547255e-06, kernel=linear, score=0.983, total=   5.2s
[CV] C=1000, gamma=2.9224700105547255e-06, kernel=linear .............
[CV]  C=1000, gamma=2.9224700105547255e-06, kernel=linear, score=0.967, total=   5.4s
[CV] C=1000, gamma=2.9224700105547257e-07, kernel=linear .............
[CV]  C=1000, gamma=2.9224700105547257e-07, kernel=linear,

[Parallel(n_jobs=1)]: Done 175 out of 175 | elapsed: 15.0min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.029224700105547258,
                                   0.002922470010554726, 0.0002922470010554726,
                                   2.9224700105547256e-05,
                                   2.9224700105547255e-06,
                                   2.9224700105547257e-07],
                         'kernel': ['linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=No

In [11]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
              precision    recall  f1-score   support

      acinar       0.89      1.00      0.94        32
       alpha       1.00      1.00      1.00       118
        beta       1.00      0.96      0.98        68
       delta       0.97      1.00      0.98        31
        duct       0.97      0.92      0.94        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       0.94      0.94      0.94        16

    accuracy                           0.98       318
   macro avg       0.97      0.98      0.97       318
weighted avg       0.98      0.98      0.98       318



**Polynomial SVM**

In [34]:
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [2, 1,1000000/denom, 100000/denom, 10000/denom, 1000/denom, 100/denom, 10/denom, 1/denom],
              'kernel': ['poly']}

grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] C=0.1, gamma=2, kernel=poly .....................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ...................... C=0.1, gamma=2, kernel=poly, total=   5.8s
[CV] C=0.1, gamma=2, kernel=poly .....................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.9s remaining:    0.0s


[CV] ...................... C=0.1, gamma=2, kernel=poly, total=   5.3s
[CV] C=0.1, gamma=2, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=2, kernel=poly, total=   4.8s
[CV] C=0.1, gamma=2, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=2, kernel=poly, total=   5.0s
[CV] C=0.1, gamma=2, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=2, kernel=poly, total=   4.9s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=1, kernel=poly, total=   4.8s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=1, kernel=poly, total=   5.1s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=1, kernel=poly, total=   4.7s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] .

[CV] ....... C=1, gamma=0.2922470010554726, kernel=poly, total=   5.0s
[CV] C=1, gamma=0.029224700105547258, kernel=poly ....................
[CV] ..... C=1, gamma=0.029224700105547258, kernel=poly, total=   4.6s
[CV] C=1, gamma=0.029224700105547258, kernel=poly ....................
[CV] ..... C=1, gamma=0.029224700105547258, kernel=poly, total=   5.0s
[CV] C=1, gamma=0.029224700105547258, kernel=poly ....................
[CV] ..... C=1, gamma=0.029224700105547258, kernel=poly, total=   4.8s
[CV] C=1, gamma=0.029224700105547258, kernel=poly ....................
[CV] ..... C=1, gamma=0.029224700105547258, kernel=poly, total=   5.0s
[CV] C=1, gamma=0.029224700105547258, kernel=poly ....................
[CV] ..... C=1, gamma=0.029224700105547258, kernel=poly, total=   4.9s
[CV] C=1, gamma=0.002922470010554726, kernel=poly ....................
[CV] ..... C=1, gamma=0.002922470010554726, kernel=poly, total=   4.6s
[CV] C=1, gamma=0.002922470010554726, kernel=poly ....................
[CV] .

[CV] ... C=10, gamma=0.0002922470010554726, kernel=poly, total=   4.8s
[CV] C=10, gamma=0.0002922470010554726, kernel=poly ..................
[CV] ... C=10, gamma=0.0002922470010554726, kernel=poly, total=   5.1s
[CV] C=10, gamma=0.0002922470010554726, kernel=poly ..................
[CV] ... C=10, gamma=0.0002922470010554726, kernel=poly, total=   4.9s
[CV] C=10, gamma=2.9224700105547256e-05, kernel=poly .................
[CV] .. C=10, gamma=2.9224700105547256e-05, kernel=poly, total=   4.6s
[CV] C=10, gamma=2.9224700105547256e-05, kernel=poly .................
[CV] .. C=10, gamma=2.9224700105547256e-05, kernel=poly, total=   5.0s
[CV] C=10, gamma=2.9224700105547256e-05, kernel=poly .................
[CV] .. C=10, gamma=2.9224700105547256e-05, kernel=poly, total=   4.7s
[CV] C=10, gamma=2.9224700105547256e-05, kernel=poly .................
[CV] .. C=10, gamma=2.9224700105547256e-05, kernel=poly, total=   5.0s
[CV] C=10, gamma=2.9224700105547256e-05, kernel=poly .................
[CV] .

[CV] . C=100, gamma=2.9224700105547257e-07, kernel=poly, total=   7.4s
[CV] C=100, gamma=2.9224700105547257e-07, kernel=poly ................
[CV] . C=100, gamma=2.9224700105547257e-07, kernel=poly, total=   8.0s
[CV] C=100, gamma=2.9224700105547257e-07, kernel=poly ................
[CV] . C=100, gamma=2.9224700105547257e-07, kernel=poly, total=   7.6s
[CV] C=100, gamma=2.9224700105547257e-07, kernel=poly ................
[CV] . C=100, gamma=2.9224700105547257e-07, kernel=poly, total=   7.7s
[CV] C=100, gamma=2.9224700105547257e-07, kernel=poly ................
[CV] . C=100, gamma=2.9224700105547257e-07, kernel=poly, total=   7.9s
[CV] C=1000, gamma=2, kernel=poly ....................................
[CV] ..................... C=1000, gamma=2, kernel=poly, total=   4.6s
[CV] C=1000, gamma=2, kernel=poly ....................................
[CV] ..................... C=1000, gamma=2, kernel=poly, total=   5.1s
[CV] C=1000, gamma=2, kernel=poly ....................................
[CV] .

[Parallel(n_jobs=1)]: Done 225 out of 225 | elapsed: 23.2min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [2, 1, 0.2922470010554726,
                                   0.029224700105547258, 0.002922470010554726,
                                   0.0002922470010554726,
                                   2.9224700105547256e-05,
                                   2.9224700105547255e-06,
                                   2.9224700105547257e-07],
                         'kernel': ['poly']},
             pre_dispatch='2*n_jobs', refit

In [15]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'C': 0.1, 'gamma': 1, 'kernel': 'poly'}
              precision    recall  f1-score   support

      acinar       0.86      1.00      0.93        32
       alpha       0.99      1.00      1.00       118
        beta       0.98      0.96      0.97        68
       delta       0.97      1.00      0.98        31
        duct       1.00      0.89      0.94        37
 endothelial       0.50      1.00      0.67         1
 mesenchymal       1.00      1.00      1.00        15
          pp       0.93      0.81      0.87        16

    accuracy                           0.97       318
   macro avg       0.90      0.96      0.92       318
weighted avg       0.97      0.97      0.97       318



**RBF SVM**

In [16]:
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1,1000000/denom, 100000/denom, 10000/denom, 1000/denom, 100/denom, 10/denom, 1/denom],
              'kernel': ['rbf']}

grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ....................... C=0.1, gamma=1, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.0min remaining:    0.0s


[CV] ....................... C=0.1, gamma=1, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total= 2.1min
[CV] C=0.1, gamma=0.2922470010554726, kernel=rbf .....................
[CV] ...... C=0.1, gamma=0.2922470010554726, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=0.2922470010554726, kernel=rbf .....................
[CV] ...... C=0.1, gamma=0.2922470010554726, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=0.2922470010554726, kernel=rbf .....................
[CV] ...... C=0.1, gamma=0.2922470010554726, kernel=rbf, total= 2.0min
[CV] C=0.1, gamma=0.2922470010554726, kernel=rbf .....................
[CV] .

[CV] ...... C=1, gamma=0.002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1, gamma=0.0002922470010554726, kernel=rbf ....................
[CV] ..... C=1, gamma=0.0002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1, gamma=0.0002922470010554726, kernel=rbf ....................
[CV] ..... C=1, gamma=0.0002922470010554726, kernel=rbf, total= 2.1min
[CV] C=1, gamma=0.0002922470010554726, kernel=rbf ....................
[CV] ..... C=1, gamma=0.0002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1, gamma=0.0002922470010554726, kernel=rbf ....................
[CV] ..... C=1, gamma=0.0002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1, gamma=0.0002922470010554726, kernel=rbf ....................
[CV] ..... C=1, gamma=0.0002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1, gamma=2.9224700105547256e-05, kernel=rbf ...................
[CV] .... C=1, gamma=2.9224700105547256e-05, kernel=rbf, total= 2.0min
[CV] C=1, gamma=2.9224700105547256e-05, kernel=rbf ...................
[CV] .

[CV] ... C=10, gamma=2.9224700105547257e-07, kernel=rbf, total=   8.7s
[CV] C=10, gamma=2.9224700105547257e-07, kernel=rbf ..................
[CV] ... C=10, gamma=2.9224700105547257e-07, kernel=rbf, total=   8.5s
[CV] C=10, gamma=2.9224700105547257e-07, kernel=rbf ..................
[CV] ... C=10, gamma=2.9224700105547257e-07, kernel=rbf, total=   8.2s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total= 2.0min
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total= 2.0min
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total= 2.0min
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total= 2.0min
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .

[CV] ... C=1000, gamma=0.002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1000, gamma=0.002922470010554726, kernel=rbf ..................
[CV] ... C=1000, gamma=0.002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1000, gamma=0.002922470010554726, kernel=rbf ..................
[CV] ... C=1000, gamma=0.002922470010554726, kernel=rbf, total= 2.1min
[CV] C=1000, gamma=0.002922470010554726, kernel=rbf ..................
[CV] ... C=1000, gamma=0.002922470010554726, kernel=rbf, total= 2.1min
[CV] C=1000, gamma=0.002922470010554726, kernel=rbf ..................
[CV] ... C=1000, gamma=0.002922470010554726, kernel=rbf, total= 2.1min
[CV] C=1000, gamma=0.0002922470010554726, kernel=rbf .................
[CV] .. C=1000, gamma=0.0002922470010554726, kernel=rbf, total= 2.1min
[CV] C=1000, gamma=0.0002922470010554726, kernel=rbf .................
[CV] .. C=1000, gamma=0.0002922470010554726, kernel=rbf, total= 2.0min
[CV] C=1000, gamma=0.0002922470010554726, kernel=rbf .................
[CV] .

[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed: 326.2min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.2922470010554726, 0.029224700105547258,
                                   0.002922470010554726, 0.0002922470010554726,
                                   2.9224700105547256e-05,
                                   2.9224700105547255e-06,
                                   2.9224700105547257e-07],
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
      

In [17]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'C': 1000, 'gamma': 2.9224700105547257e-07, 'kernel': 'rbf'}
              precision    recall  f1-score   support

      acinar       0.89      1.00      0.94        32
       alpha       1.00      1.00      1.00       118
        beta       1.00      0.99      0.99        68
       delta       1.00      1.00      1.00        31
        duct       1.00      0.95      0.97        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       1.00      0.94      0.97        16

    accuracy                           0.99       318
   macro avg       0.99      0.98      0.98       318
weighted avg       0.99      0.99      0.99       318



**Sigmoid SVM**

In [18]:
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1,1000000/denom, 100000/denom, 10000/denom, 1000/denom, 100/denom, 10/denom, 1/denom],
              'kernel': ['sigmoid']}

grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV] C=0.1, gamma=1, kernel=sigmoid ..................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=0.1, gamma=1, kernel=sigmoid, total=  55.6s
[CV] C=0.1, gamma=1, kernel=sigmoid ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   55.6s remaining:    0.0s


[CV] ................... C=0.1, gamma=1, kernel=sigmoid, total=  55.5s
[CV] C=0.1, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=0.1, gamma=1, kernel=sigmoid, total=  55.3s
[CV] C=0.1, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=0.1, gamma=1, kernel=sigmoid, total=  55.3s
[CV] C=0.1, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=0.1, gamma=1, kernel=sigmoid, total=  55.5s
[CV] C=0.1, gamma=0.2922470010554726, kernel=sigmoid .................
[CV] .. C=0.1, gamma=0.2922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=0.1, gamma=0.2922470010554726, kernel=sigmoid .................
[CV] .. C=0.1, gamma=0.2922470010554726, kernel=sigmoid, total=  55.5s
[CV] C=0.1, gamma=0.2922470010554726, kernel=sigmoid .................
[CV] .. C=0.1, gamma=0.2922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=0.1, gamma=0.2922470010554726, kernel=sigmoid .................
[CV] .

[CV] .. C=1, gamma=0.002922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=1, gamma=0.0002922470010554726, kernel=sigmoid ................
[CV] . C=1, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=1, gamma=0.0002922470010554726, kernel=sigmoid ................
[CV] . C=1, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.6s
[CV] C=1, gamma=0.0002922470010554726, kernel=sigmoid ................
[CV] . C=1, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=1, gamma=0.0002922470010554726, kernel=sigmoid ................
[CV] . C=1, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.3s
[CV] C=1, gamma=0.0002922470010554726, kernel=sigmoid ................
[CV] . C=1, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.6s
[CV] C=1, gamma=2.9224700105547256e-05, kernel=sigmoid ...............
[CV]  C=1, gamma=2.9224700105547256e-05, kernel=sigmoid, total=  59.1s
[CV] C=1, gamma=2.9224700105547256e-05, kernel=sigmoid ...............
[CV]  

[CV]  C=10, gamma=2.9224700105547257e-07, kernel=sigmoid, total=   6.6s
[CV] C=10, gamma=2.9224700105547257e-07, kernel=sigmoid ..............
[CV]  C=10, gamma=2.9224700105547257e-07, kernel=sigmoid, total=   6.5s
[CV] C=10, gamma=2.9224700105547257e-07, kernel=sigmoid ..............
[CV]  C=10, gamma=2.9224700105547257e-07, kernel=sigmoid, total=   6.7s
[CV] C=100, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=100, gamma=1, kernel=sigmoid, total=  55.5s
[CV] C=100, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=100, gamma=1, kernel=sigmoid, total=  55.6s
[CV] C=100, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=100, gamma=1, kernel=sigmoid, total=  56.7s
[CV] C=100, gamma=1, kernel=sigmoid ..................................
[CV] ................... C=100, gamma=1, kernel=sigmoid, total=  55.6s
[CV] C=100, gamma=1, kernel=sigmoid ..................................
[CV

[CV]  C=1000, gamma=0.002922470010554726, kernel=sigmoid, total=  56.7s
[CV] C=1000, gamma=0.002922470010554726, kernel=sigmoid ..............
[CV]  C=1000, gamma=0.002922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=1000, gamma=0.002922470010554726, kernel=sigmoid ..............
[CV]  C=1000, gamma=0.002922470010554726, kernel=sigmoid, total=  56.5s
[CV] C=1000, gamma=0.002922470010554726, kernel=sigmoid ..............
[CV]  C=1000, gamma=0.002922470010554726, kernel=sigmoid, total=  55.5s
[CV] C=1000, gamma=0.002922470010554726, kernel=sigmoid ..............
[CV]  C=1000, gamma=0.002922470010554726, kernel=sigmoid, total=  55.4s
[CV] C=1000, gamma=0.0002922470010554726, kernel=sigmoid .............
[CV]  C=1000, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.3s
[CV] C=1000, gamma=0.0002922470010554726, kernel=sigmoid .............
[CV]  C=1000, gamma=0.0002922470010554726, kernel=sigmoid, total=  55.3s
[CV] C=1000, gamma=0.0002922470010554726, kernel=sigmoid ...........

[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed: 160.7min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.2922470010554726, 0.029224700105547258,
                                   0.002922470010554726, 0.0002922470010554726,
                                   2.9224700105547256e-05,
                                   2.9224700105547255e-06,
                                   2.9224700105547257e-07],
                         'kernel': ['sigmoid']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
  

In [19]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'C': 1, 'gamma': 2.9224700105547257e-07, 'kernel': 'sigmoid'}
              precision    recall  f1-score   support

      acinar       0.91      1.00      0.96        32
       alpha       1.00      1.00      1.00       118
        beta       0.93      0.97      0.95        68
       delta       0.97      1.00      0.98        31
        duct       0.97      0.92      0.94        37
 endothelial       0.00      0.00      0.00         1
 mesenchymal       1.00      0.93      0.97        15
          pp       1.00      0.81      0.90        16

    accuracy                           0.97       318
   macro avg       0.85      0.83      0.84       318
weighted avg       0.97      0.97      0.97       318



  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
# Untuned classifiers
eval_log = {}

# SVM classifier with linear kernel
print('Working... - linear')
clf = svm.SVC(kernel='linear')
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['svm-linear'] = (precision, recall, f1)

# SVM classifier with polynomial kernel
print('Working... - polynomial')
clf = svm.SVC(kernel='poly', degree=3)
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['svm-poly'] = (precision, recall, f1)

# SVM classifier with RBF kernel
print('Working... - rbf')
clf = svm.SVC(kernel='rbf')
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['svm-rbf'] = (precision, recall, f1)

# SVM classifier with sigmoid kernel
print('Working... - sigmoid')
clf = svm.SVC(kernel='sigmoid')
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['svm-sigmoid'] = (precision, recall, f1)

Working... - linear
Classification report for classifier SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

      acinar       0.89      1.00      0.94        32
       alpha       1.00      1.00      1.00       118
        beta       1.00      0.96      0.98        68
       delta       0.97      1.00      0.98        31
        duct       0.97      0.92      0.94        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       0.94      0.94      0.94        16

    accuracy                           0.98       318
   macro avg       0.97      0.98      0.97       318
weighted avg       0.98      0.98      0.98       318


Working... - polynomial


  _warn_prf(average, modifier, msg_start, len(result))


Classification report for classifier SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

      acinar       0.97      0.88      0.92        32
       alpha       1.00      0.97      0.99       118
        beta       0.59      0.99      0.74        68
       delta       0.97      0.97      0.97        31
        duct       1.00      0.32      0.49        37
 endothelial       0.00      0.00      0.00         1
 mesenchymal       1.00      0.47      0.64        15
          pp       1.00      0.69      0.81        16

    accuracy                           0.85       318
   macro avg       0.82      0.66      0.69       318
weighted avg       0.90      0.85      0.84       318


Working... - rbf


  _warn_prf(average, modifier, msg_start, len(result))


Classification report for classifier SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

      acinar       0.89      1.00      0.94        32
       alpha       1.00      1.00      1.00       118
        beta       0.97      0.97      0.97        68
       delta       1.00      1.00      1.00        31
        duct       0.92      0.92      0.92        37
 endothelial       0.00      0.00      0.00         1
 mesenchymal       1.00      0.93      0.97        15
          pp       1.00      0.88      0.93        16

    accuracy                           0.97       318
   macro avg       0.85      0.84      0.84       318
weighted avg       0.97      0.97      0.97       318


Working... - sigmoid
Classification report for classifier SV

  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
# kNN classifier with Euclidean distance
print('Working... - euclidean knn')
clf = KNeighborsClassifier(n_neighbors=num_labels, metric='euclidean')
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['knn-euclidean'] = (precision, recall, f1)

# kNN classifier with Manhattan distance
print('Working... - manhattan knn')
clf = KNeighborsClassifier(n_neighbors=num_labels, metric='manhattan')
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['knn-manhattan'] = (precision, recall, f1)

Working... - euclidean knn
Classification report for classifier KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
                     metric_params=None, n_jobs=None, n_neighbors=9, p=2,
                     weights='uniform'):
              precision    recall  f1-score   support

      acinar       0.89      1.00      0.94        32
       alpha       1.00      1.00      1.00       118
        beta       0.99      0.97      0.98        68
       delta       0.97      1.00      0.98        31
        duct       0.97      0.92      0.94        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       1.00      0.88      0.93        16

    accuracy                           0.98       318
   macro avg       0.98      0.97      0.97       318
weighted avg       0.98      0.98      0.98       318


Working... - manhattan knn
Classification report for classifier KNeighborsClassifier(algorithm='

  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# Multi-layer perceptron classifier
clf = MLPClassifier()
precision, recall, f1 = evaluate_classifier(clf, X_train, X_test, y_train, y_test, average='micro')
eval_log['mlp'] = (precision, recall, f1)

In [28]:
eval_log

{'svm-linear': (0.8382789317507419, 0.8382789317507419, 0.8382789317507419),
 'svm-poly': (0.3353115727002967, 0.3353115727002967, 0.3353115727002967),
 'svm-rbf': (0.5356083086053413, 0.5356083086053413, 0.5356083086053413),
 'svm-sigmoid': (0.1943620178041543, 0.1943620178041543, 0.1943620178041543),
 'knn-euclidean': (0.5771513353115727, 0.5771513353115727, 0.5771513353115727),
 'knn-manhattan': (0.5148367952522255, 0.5148367952522255, 0.5148367952522255)}

## Kernel-based kNN

In [21]:
def rbf_kernel_dist(x, y, gamma):
    return 1 - np.exp(- gamma * ((x - y) ** 2).sum())

def poly_kernel_dist(x, y, gamma, r=0., d=3):
    Kxx = (r + gamma * (x ** 2).sum()) ** d
    Kyy = (r + gamma * (y ** 2).sum()) ** d
    Kxy = (r + gamma * np.dot(x, y)) ** d
    return Kxx + Kyy - 2 * Kxy

def sigmoid_kernel_dist(x, y, gamma, r=0.):
    Kxx = np.tanh(r + gamma * (x ** 2).sum())
    Kyy = np.tanh(r + gamma * (y ** 2).sum())
    Kxy = np.tanh(r + gamma * np.dot(x, y))
    return Kxx + Kyy - 2 * Kxy

**RBF Kernel KNN**

In [22]:
param_grid = {'n_neighbors': [num_labels, 3, 5, 11, 19], 
              'metric_params': [{'gamma' : 1}, {'gamma' : 1000000/denom}, {'gamma' : 100000/denom}, {'gamma' : 10000/denom}, 
                                {'gamma' : 1000/denom}, {'gamma' : 100/denom},{'gamma' : 10/denom},{'gamma' : 1/denom}],
              'metric': [rbf_kernel_dist]}

grid = GridSearchCV(KNeighborsClassifier(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9, total=  33.1s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   33.1s remaining:    0.0s


[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9, total=  32.3s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9, total=  32.3s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9, total=  32.4s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=9, total=  33.0s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=3 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 1}, n_neighbors=3, total=  32.2s
[CV] metric=<fun

[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  32.7s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  33.2s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  32.8s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=11 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=11, total=  35.9s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.2922470010554726}, n_neighbo

[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  32.8s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  32.2s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  32.4s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  32.2s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.029224700

[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  32.5s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  32.2s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  32.2s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=3 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=3, total=  32.3s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 0.000292247

[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3, total=  32.0s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  32.8s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  32.2s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  32.3s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.92

[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  32.0s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  32.6s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  32.3s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  32.5s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma

[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  30.3s
[CV] metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19 
[CV]  metric=<function rbf_kernel_dist at 0x7f9b642f3158>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  30.7s


[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed: 107.7min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'metric': [<function rbf_kernel_dist at 0x7f9b642f3158>],
                         'metric_params': [{'gamma': 1},
                                           {'gamma': 0.2922470010554726},
                                           {'gamma': 0.029224700105547258},
                                           {'gamma': 0.002922470010554726},
                                           {'gamma': 0.0002922470010554726},
                                           {'gamma': 2.9224700105547256e-05},
                                           {'gamma'

In [23]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'metric': <function rbf_kernel_dist at 0x7f9b642f3158>, 'metric_params': {'gamma': 2.9224700105547257e-07}, 'n_neighbors': 3}
              precision    recall  f1-score   support

      acinar       0.86      1.00      0.93        32
       alpha       1.00      1.00      1.00       118
        beta       0.98      0.94      0.96        68
       delta       0.97      1.00      0.98        31
        duct       0.97      0.92      0.94        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       0.93      0.88      0.90        16

    accuracy                           0.97       318
   macro avg       0.97      0.97      0.97       318
weighted avg       0.97      0.97      0.97       318



In [26]:
# Untuned rbf knn
clf = KNeighborsClassifier(n_neighbors=num_labels, metric=rbf_kernel_dist, 
                           metric_params={'gamma' : 1 / num_features})
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
print(f"Classification report for classifier {clf}:\n"
      f"{metrics.classification_report(y_test, prediction)}\n")

Classification report for classifier KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function rbf_kernel_dist at 0x7f864733e1e0>,
                     metric_params={'gamma': 4.3395243881270614e-05},
                     n_jobs=None, n_neighbors=20, p=2, weights='uniform'):
                   precision    recall  f1-score   support

            Aorta       0.00      0.00      0.00        13
          Bladder       0.00      0.00      0.00        42
    Brain_Myeloid       0.00      0.00      0.00       118
Brain_Non-Myeloid       0.07      1.00      0.13        86
        Diaphragm       0.00      0.00      0.00        18
              Fat       0.00      0.00      0.00       119
            Heart       0.00      0.00      0.00       100
           Kidney       0.00      0.00      0.00        18
  Large_Intestine       0.00      0.00      0.00       103
      Limb_Muscle       0.00      0.00      0.00        22
            Liver       0.00      0.00   

  _warn_prf(average, modifier, msg_start, len(result))


**Poly Kernel knn**

In [25]:
param_grid = {'n_neighbors': [num_labels, 3, 5, 11, 19], 
              'metric_params': [{'gamma' : 1, }, {'gamma' : 1000000/denom}, {'gamma' : 100000/denom}, {'gamma' : 10000/denom}, 
                                {'gamma' : 1000/denom}, {'gamma' : 100/denom},{'gamma' : 10/denom},{'gamma' : 1/denom}],
              'metric': [poly_kernel_dist]}

grid = GridSearchCV(KNeighborsClassifier(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  43.7s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   43.7s remaining:    0.0s


[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  44.0s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  44.8s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  43.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  44.2s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=3 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=3, total=  42.5s
[CV] me

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  44.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  43.0s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  44.1s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=11, total=  42.6s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=11, total=  44.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  44.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  44.2s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  44.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  43.6s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  46.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  43.4s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  44.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3, total=  42.6s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3, total=  43.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  42.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  43.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamm

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  43.6s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  43.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  44.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  43.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  44.7s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  43.4s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  44.7s


[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed: 147.4min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'metric': [<function poly_kernel_dist at 0x7f9b642f31e0>],
                         'metric_params': [{'gamma': 1},
                                           {'gamma': 0.2922470010554726},
                                           {'gamma': 0.029224700105547258},
                                           {'gamma': 0.002922470010554726},
                                           {'gamma': 0.0002922470010554726},
                                           {'gamma': 2.9224700105547256e-05},
                                           {'gamma

In [26]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'metric': <function poly_kernel_dist at 0x7f9b642f31e0>, 'metric_params': {'gamma': 1}, 'n_neighbors': 3}
              precision    recall  f1-score   support

      acinar       0.86      0.97      0.91        32
       alpha       1.00      0.99      1.00       118
        beta       0.96      0.96      0.96        68
       delta       0.97      1.00      0.98        31
        duct       1.00      0.92      0.96        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       0.93      0.88      0.90        16

    accuracy                           0.97       318
   macro avg       0.96      0.96      0.96       318
weighted avg       0.97      0.97      0.97       318



In [27]:
# Untuned poly knn
clf = KNeighborsClassifier(n_neighbors=num_labels, metric=poly_kernel_dist, 
                           metric_params={'gamma' : 1 / num_features})
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
print(f"Classification report for classifier {clf}:\n"
      f"{metrics.classification_report(y_test, prediction)}\n")

Classification report for classifier KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function poly_kernel_dist at 0x7f864733e048>,
                     metric_params={'gamma': 4.3395243881270614e-05},
                     n_jobs=None, n_neighbors=20, p=2, weights='uniform'):
                   precision    recall  f1-score   support

            Aorta       1.00      0.08      0.14        13
          Bladder       0.76      0.69      0.72        42
    Brain_Myeloid       0.96      0.96      0.96       118
Brain_Non-Myeloid       0.69      0.59      0.64        86
        Diaphragm       0.38      0.33      0.35        18
              Fat       0.36      0.39      0.37       119
            Heart       0.64      0.48      0.55       100
           Kidney       1.00      0.11      0.20        18
  Large_Intestine       0.38      0.82      0.52       103
      Limb_Muscle       0.40      0.09      0.15        22
            Liver       1.00      0.62  

**Sigmoid knn Classifier**

In [27]:
param_grid = {'n_neighbors': [num_labels, 3, 5, 11, 19], 
              'metric_params': [{'gamma' : 1, }, {'gamma' : 1000000/denom}, {'gamma' : 100000/denom}, {'gamma' : 10000/denom}, 
                                {'gamma' : 1000/denom}, {'gamma' : 100/denom},{'gamma' : 10/denom},{'gamma' : 1/denom}],
              'metric': [poly_kernel_dist]}

grid = GridSearchCV(KNeighborsClassifier(), param_grid, refit = True, verbose = 2)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  43.1s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   43.1s remaining:    0.0s


[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  43.8s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  44.8s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  43.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=9, total=  44.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=3 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 1}, n_neighbors=3, total=  43.8s
[CV] me

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  44.4s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  43.0s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=5, total=  45.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n_neighbors=11, total=  42.8s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.2922470010554726}, n

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=11, total=  59.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  47.4s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  44.1s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.029224700105547258}, n_neighbors=19, total=  51.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  44.2s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  48.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  43.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0002922470010554726}, n_neighbors=9, total=  44.5s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 0.0

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3, total=  42.9s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=3, total=  43.7s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  43.4s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547256e-05}, n_neighbors=5, total=  43.6s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamm

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  42.8s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  43.7s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  44.6s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547255e-06}, n_neighbors=11, total=  48.3s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params

[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  47.0s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  43.2s
[CV] metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19 
[CV]  metric=<function poly_kernel_dist at 0x7f9b642f31e0>, metric_params={'gamma': 2.9224700105547257e-07}, n_neighbors=19, total=  44.2s


[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed: 150.7min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'metric': [<function poly_kernel_dist at 0x7f9b642f31e0>],
                         'metric_params': [{'gamma': 1},
                                           {'gamma': 0.2922470010554726},
                                           {'gamma': 0.029224700105547258},
                                           {'gamma': 0.002922470010554726},
                                           {'gamma': 0.0002922470010554726},
                                           {'gamma': 2.9224700105547256e-05},
                                           {'gamma

In [28]:
print(grid.best_params_)

grid_predictions = grid.predict(X_test)
  
# print classification report
print(metrics.classification_report(y_test, grid_predictions))

{'metric': <function poly_kernel_dist at 0x7f9b642f31e0>, 'metric_params': {'gamma': 1}, 'n_neighbors': 3}
              precision    recall  f1-score   support

      acinar       0.86      0.97      0.91        32
       alpha       1.00      0.99      1.00       118
        beta       0.96      0.96      0.96        68
       delta       0.97      1.00      0.98        31
        duct       1.00      0.92      0.96        37
 endothelial       1.00      1.00      1.00         1
 mesenchymal       1.00      1.00      1.00        15
          pp       0.93      0.88      0.90        16

    accuracy                           0.97       318
   macro avg       0.96      0.96      0.96       318
weighted avg       0.97      0.97      0.97       318



In [29]:
# Untuned sigmoid knn
clf = KNeighborsClassifier(n_neighbors=num_labels, metric=sigmoid_kernel_dist, 
                           metric_params={'gamma' : 1 / num_features})
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
print(f"Classification report for classifier {clf}:\n"
      f"{metrics.classification_report(y_test, prediction)}\n")

Classification report for classifier KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function sigmoid_kernel_dist at 0x7f864733e0d0>,
                     metric_params={'gamma': 4.3395243881270614e-05},
                     n_jobs=None, n_neighbors=20, p=2, weights='uniform'):
                   precision    recall  f1-score   support

            Aorta       0.00      0.00      0.00        13
          Bladder       0.00      0.00      0.00        42
    Brain_Myeloid       0.00      0.00      0.00       118
Brain_Non-Myeloid       0.07      1.00      0.13        86
        Diaphragm       0.00      0.00      0.00        18
              Fat       0.00      0.00      0.00       119
            Heart       0.00      0.00      0.00       100
           Kidney       0.00      0.00      0.00        18
  Large_Intestine       0.00      0.00      0.00       103
      Limb_Muscle       0.00      0.00      0.00        22
            Liver       0.00      0.0

  _warn_prf(average, modifier, msg_start, len(result))
