In [38]:
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from scipy.sparse.linalg import eigsh
from tensorflow import keras
from PIL import Image
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import NearestNeighbors
from torchvision import models

# Features

In [39]:
transform = transforms.Compose([
    transforms.Resize((224, 244)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), #Is transformed to be better with VGG16 model
    ])

def getDataset(train=True, sample_size=500):
    dataset = CIFAR10(root='./data',
                  train=train, 
                  download=True,
                  transform=transform)
    
    X = []
    y = []

    for i in range(sample_size):
        X.append( dataset[i][0] )
        y.append( dataset[i][1] )

    X = torch.stack( X )

    return X, y

X_train, y_train = getDataset() #Training data
X_test, y_test = getDataset(train=False) #Test data

Files already downloaded and verified
Files already downloaded and verified


In [40]:
pretrained_model = models.vgg16(pretrained=True)
pretrained_model.classifier = pretrained_model.classifier[:4]



In [41]:
X_train = pretrained_model(X_train).detach().numpy()
X_test = pretrained_model(X_test).detach().numpy()

# PCA

In [42]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Standardize the data
X_test = scaler.transform(X_test)

In [43]:
pca = PCA(n_components=150)  # Set the number of components to keep
training_features = pca.fit_transform(X_train)  # Fit PCA on the standardized data and transform
test_features = pca.transform(X_test)

# Spectral

In [44]:
nbrs = NearestNeighbors(n_neighbors=50).fit(training_features)

# Find the nearest neighbors
distances, indices = nbrs.kneighbors(training_features)

# Create an adjacency matrix
n_samples = training_features.shape[0]
adjacency_matrix = np.zeros((n_samples, n_samples))

# Populate the adjacency matrix
for i, neighbors in enumerate(indices):
    for neighbor in neighbors:
        adjacency_matrix[i, neighbor] = 1
        adjacency_matrix[neighbor, i] = 1  # Ensure symmetry for an undirected graph

In [45]:
dim=adjacency_matrix.shape[0]
adjacency_matrix = adjacency_matrix - np.identity(dim)
D = np.zeros([dim,dim])
for i in tqdm(range(dim)):
    D[i,i] = adjacency_matrix[i].sum()
L = D- adjacency_matrix    

100%|██████████| 500/500 [00:00<00:00, 250018.12it/s]


In [46]:
eigenvalues, eigenvectors = eigsh(L, k=32, which="SM") # overvej max_iter, tolerance?

In [47]:
threshold1 = 0
eigenvectors_bin = np.where(eigenvectors > threshold1, 1, 0)

# Classifier

In [48]:
clf = MLPClassifier(hidden_layer_sizes=(100)).fit(training_features, eigenvectors_bin)
test_hashes = clf.predict(test_features)



In [49]:
def mean_average_precision(test_hashes, training_hashes, test_labels, training_labels, compare_n):
    aps = []
    training_hashes = training_hashes[:compare_n,:]
    training_labels = training_labels[:compare_n]
    for i, test_hash in enumerate(tqdm(test_hashes)):
        label = test_labels[i]
        distances = np.abs(training_hashes - test_hashes[i]).sum(axis=1)
        tp = np.where(training_labels==label, 1, 0)
        hash_df = pd.DataFrame({"distances":distances, "tp":tp}).reset_index()
        hash_df = hash_df.drop(index=i)
        hash_df = hash_df.sort_values(["distances", "index"]).reset_index(drop=True)
        hash_df = hash_df.drop(["index", "distances"], axis=1).reset_index()
        hash_df = hash_df[hash_df["tp"]==1]
        hash_df["tp"] = hash_df["tp"].cumsum()
        hash_df["index"] = hash_df["index"] +1 
        precision = np.array(hash_df["tp"]) / np.array(hash_df["index"])
        ap = precision.mean()
        aps.append(ap)
    
    return np.array(aps).mean()

In [50]:
y_test = np.array(y_test)
y_train = np.array(y_train)

In [51]:
aps = mean_average_precision(test_hashes, eigenvectors_bin, y_test, y_train, 500)

100%|██████████| 500/500 [00:00<00:00, 558.37it/s]


In [52]:
aps

0.2369291525436423