In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import torch
import os
import sys

In [2]:
sys.path.append('.')
sys.path.append('../src')
from models import model
from dataset import data

In [3]:
path4 = os.path.abspath('../')  # указывает предыдущий каталог каталога, в котором находится текущий файл
device = 'cpu'
filepath = path4 + '\output\\triplet.pth'
triplet = model('Triplet')
triplet.load_state_dict(torch.load(filepath, map_location=torch.device('cpu')))
train_loader, val_loader, test_loader = data('Triplet')

In [4]:
def embedd(loader, model):
    embeddings = []
    targets = []
    model.eval()
    with torch.no_grad():
        for data in loader:
            inputs, labels = data[0], data[3]
            inputs = inputs.to(device)
            labels = labels.to(device)
            # forward pass
            output = model(inputs)

            embeddings.append(output.cpu())
            targets.append(labels.data.cpu())

    return torch.cat(embeddings).numpy(), torch.cat(targets).numpy()

In [5]:
train_embeddings, train_targets = embedd(train_loader, triplet)
val_embeddings, val_targets = embedd(val_loader, triplet)
test_embeddings, test_targets = embedd(test_loader, triplet)

train_dataset_emb = pd.DataFrame({'label': list(train_targets), 'images_train': list(train_embeddings)},
                                 columns=['label', 'images_train']).sort_values('label').reset_index(drop=True)
val_dataset_emb = pd.DataFrame({'label': list(val_targets), 'images_val': list(val_embeddings)},
                               columns=['label', 'images_val']).sort_values('label').reset_index(drop=True)
test_dataset_emb = pd.DataFrame({'label': list(test_targets), 'images_test': list(test_embeddings)},
                                columns=['label', 'images_test']).sort_values('label').reset_index(drop=True)

In [6]:
X = train_dataset_emb.drop("label", axis=1)
X_train = X.values
X_train = np.array(X_train.tolist()).reshape(len(X_train), 512)

In [7]:
y = train_dataset_emb["label"]
y_train = y.values.tolist()

In [8]:
knn = KNeighborsClassifier(n_neighbors=5)
# #Train the model using the training sets
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [9]:
X_val = val_dataset_emb.drop("label", axis=1)
X_val = X_val.values
X_val = np.array(X_val.tolist()).reshape(len(X_val), 512)
y_val = val_dataset_emb["label"]
y_val = y_val.values

In [10]:
X_test = test_dataset_emb.drop("label", axis=1)
X_test = X_test.values
X_test = np.array(X_test.tolist()).reshape(len(X_test), 512)
y_test = test_dataset_emb["label"]
y_test = y_test.values

In [11]:
# Predict the response for val and test dataset
y_pred_t = knn.predict(X_test)
y_pred_v = knn.predict(X_val)

In [12]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy val:", accuracy_score(y_val, y_pred_v))
print("Accuracy test:", accuracy_score(y_test, y_pred_t))

Accuracy val: 0.7678381256656017
Accuracy test: 0.7677784770295784
