In [1]:
# uncomment, modify and run this of you work with Google colab to mount the required data

#import os
#from google.colab import drive
#drive.mount('/content/drive')
#os.chdir("/content/drive/MyDrive/DINO_VIT_Tutorial") # insert your path where the downloaded data folder is

In [2]:
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score

# Classification

Now we will are using the apple data set to make things a bit more interesting.
It is a binary classification task ("healthy" vs. "damaged"). We will evaluate how well different embeddings combined with different shallow classifiers do.

In [3]:
X = np.load("./data/cascifw_apple/embeddings/dino_vits16.npy")
y = np.load("./data/cascifw_apple/embeddings/labels.npy")

In [4]:
X.shape, y.shape

((5848, 384), (5848,))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [6]:
model = KNeighborsClassifier()
hyperparams = {"n_neighbors": [5, 10, 15]}

In [7]:
gs = GridSearchCV(
    estimator = model,
    param_grid = hyperparams,
    cv = 5,
    verbose=2
)

gs.fit(X_train, y_train)

gs.cv_results_

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END ......................................n_neighbors=5; total time=   0.1s
[CV] END ......................................n_neighbors=5; total time=   0.1s
[CV] END ......................................n_neighbors=5; total time=   0.1s
[CV] END ......................................n_neighbors=5; total time=   0.1s
[CV] END ......................................n_neighbors=5; total time=   0.1s
[CV] END .....................................n_neighbors=10; total time=   0.1s
[CV] END .....................................n_neighbors=10; total time=   0.1s
[CV] END .....................................n_neighbors=10; total time=   0.1s
[CV] END .....................................n_neighbors=10; total time=   0.1s
[CV] END .....................................n_neighbors=10; total time=   0.1s
[CV] END .....................................n_neighbors=15; total time=   0.1s
[CV] END .....................................n_n

{'mean_fit_time': array([0.00182319, 0.00166297, 0.00192809]),
 'std_fit_time': array([0.00037143, 0.00069917, 0.00088118]),
 'mean_score_time': array([0.10308919, 0.10948772, 0.12154789]),
 'std_score_time': array([0.00575527, 0.01608718, 0.01035699]),
 'param_n_neighbors': masked_array(data=[5, 10, 15],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 5}, {'n_neighbors': 10}, {'n_neighbors': 15}],
 'split0_test_score': array([0.83940774, 0.86218679, 0.85763098]),
 'split1_test_score': array([0.87685291, 0.88141391, 0.89053592]),
 'split2_test_score': array([0.84264538, 0.8608894 , 0.87913341]),
 'split3_test_score': array([0.86431015, 0.8631699 , 0.87343216]),
 'split4_test_score': array([0.86202965, 0.8677309 , 0.87457241]),
 'mean_test_score': array([0.85704917, 0.86707818, 0.87506097]),
 'std_test_score': array([0.0140599 , 0.00752915, 0.01060901]),
 'rank_test_score': array([3, 2, 1], dtype=int32)}

**Now we test the best model on our test set.**

In [8]:
best_model = gs.best_estimator_
pred = best_model.predict(X_test)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.92      0.88      0.90       962
           1       0.79      0.85      0.82       500

    accuracy                           0.87      1462
   macro avg       0.86      0.87      0.86      1462
weighted avg       0.88      0.87      0.87      1462



**Mean validation accuracy and test accuracy are very close, which is good!**

## Tasks (15 minutes)

- Try different combinations of embeddings and classifiers. You can find an overview of available models [here](https://scikit-learn.org/stable/supervised_learning.html). Non-Linear classifiers should do better in general. Use reasonable hyperparameters for your classifier of choice.
- There are precalculated embeddings for `dino_resnet50`. You can make a comparison with the Resnet50 that was pretrained in a supervised way.
- How does the accuracy change if you apply dimensionality reduction before fitting the classifier? What seems to be an ideal number of dimensions (e.g. Principal components) to include?

**Let's see if you can find a combination that matches or surpasses the accuracy we achieved in our paper.**