In [1]:
import numpy as np
import pickle
from sklearn.covariance import OAS
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import NearestCentroid

In [2]:
### TO COMPLETE ###
dataPath = './wideresnet/'

# Useful functions

### Data loader

In [3]:
# Function used to retrieve the representations of mini-ImageNet images from a wideResNet pretrained with S2M2r.
# base_dataset contains the representations of the images used to pretrain the backbone.
# val_dataset contains the representations of the images used to select the hyperparameters of the backbone.
# novel_dataset contains the representations of the images used to generate few-shot problems. 
def load_pickle(file):
    with open(file, 'rb') as f:
        data = pickle.load(f)
        labels = [np.full(shape=len(data[key]), fill_value=key) for key in data]
        data = [features for key in data for features in data[key]]
        dataset = dict()
        dataset['data'] = np.stack(data, axis=0)
        dataset['labels'] = np.concatenate(labels)
        return dataset

novel_dataset = load_pickle(dataPath + "test.pkl")
val_dataset = load_pickle(dataPath + "val.pkl")
base_dataset = load_pickle(dataPath + "train.pkl")

In [4]:
# The data are reshaped in [n_class, elements_per_class, length of the representations].
def shape_dataset(dataset):
    elements_per_class = 600
    data = np.zeros((0, elements_per_class, dataset["data"].shape[1]))
    data_labels = np.zeros((0, elements_per_class))
    labels = dataset["labels"].copy()
    while labels.shape[0] > 0:
        indices = np.where(dataset["labels"] == labels[0])[0]
        data = np.concatenate([data, np.reshape(dataset["data"][indices,:], (1, elements_per_class, -1))], axis = 0)
        data_labels = np.concatenate([data_labels, np.ones((1, elements_per_class)) * labels[0]], axis = 0)
        indices = np.where(labels != labels[0])[0]
        labels = labels[indices]
    return data, data_labels

novel_data, novel_labels = shape_dataset(novel_dataset)
print(novel_data.shape)
val_data, val_labels = shape_dataset(val_dataset)
print(val_data.shape)
base_data, base_labels = shape_dataset(base_dataset)
print(base_data.shape)

(20, 600, 640)
(16, 600, 640)
(64, 600, 640)


In [5]:
# Generation of a few-shot problem (run).
elements_per_class = 600
shuffle_indices = np.arange(elements_per_class)

def shuffle(data):
    global shuffle_indices    
    for i in range(data.shape[0]):
        shuffle_indices = np.random.permutation(shuffle_indices)
        data[i,:,:] = data[i,shuffle_indices,:]
    return data
        
# w : number of classes.
# k : nombre of training examples per class.
# q : nombre of query examples per class (test).
def generate_run(w, k, q, data):
    data = shuffle(data)
    classes = np.random.permutation(np.arange(data.shape[0]))[:w]
    dataset = data[classes,:k+q,:]
    return dataset

### Classifiers

In [6]:
def stats(precisions):
    return np.mean(precisions), (np.std(precisions) * 1.96 / np.sqrt(len(precisions)))

In [7]:
# The accuracy is averaged over several few-shot problems (runs).
# w: number of classes.
# k: number of training examples per class.
# q: number of query examples per class (test).
# basis: new basis on which the features of the data samples are projected.
# weights:  np.array containing coefficients used to normalize the features of the data samples.
def perfs(w, k, q, runs, data, basis=None, weights=None, clf_name=None):
    precisions = []
    for i in range(runs):
        dataset = generate_run(w, k, q, data)
        precisions.append(perf_on_one_problem(dataset, k, basis=basis, weights=weights, clf_name=clf_name))
    return(stats(precisions))

In [8]:
def classifier(name):
    if name == 'LDA':
        oa = OAS(store_precision=False, assume_centered=False)
        clf = LinearDiscriminantAnalysis(solver='lsqr', covariance_estimator=oa)
    elif name == 'NCM':
        clf = NearestCentroid()
    elif name == 'LR':
        clf = LogisticRegression(random_state=0, max_iter=500)
    return clf

In [9]:
def perf_on_one_problem(dataset, k, basis=None, weights=None, clf_name=None):
    if weights is not None:   
        assert basis is not None
        dataset = our_preprocess(dataset, basis, weights)

    # train the classifier.
    clf = classifier(clf_name)
    X_train = dataset[:,:k,:]
    X_test = dataset[:,k:,:]
    y_train = np.zeros((X_train.shape[0], X_train.shape[1]))
    y_test = np.zeros((X_test.shape[0], X_test.shape[1]))
    for i in range(dataset.shape[0]):
        y_train[i] = np.ones(X_train.shape[1]) * i
        y_test[i] = np.ones(X_test.shape[1]) * i
    X_train = X_train.reshape(-1, X_train.shape[2])
    X_test = X_test.reshape(-1, X_test.shape[2]) 
    y_train = y_train.reshape(-1)
    y_test = y_test.reshape(-1)
    clf.fit(X_train, y_train)
    return clf.score(X_test, y_test)

## Our optimization method

In [25]:
def our_preprocess(dataset, basis, weights):
    """Return data projected into the vectors of basis.
    
    Parameters:
        data  --  matrix of shape [number of samples, length]
        basis  --  matrix of shape [length, number of vectors]
    """
    dataset = np.matmul(dataset, basis)
    return dataset * weights

In [19]:
# Graph Fourier Transform
def graph_fourier_transform(GSO):
    """Return the eigenvectors and eigenvalues of the graph shift operator GSO (e.g. adjacency matrix).
    """
    # Check whether the GSO is symmetric.
    assert (GSO == GSO.T).all()
    # Compute eigenvalues w and eigenvectors v.
    # The eigenvalues in w are sorted in ascending order.
    # v[:, i] is the normalized eigenvector corresponding to the eigenvalue w[i].
    w, v = np.linalg.eigh(GSO)
    return w, v

def improved_covariance_matrix(data):
    # Remove the mean of the class of each sample.
    n_class = data.shape[0]
    centered_data = np.zeros_like(data)

    for c in range(n_class):
        mean = np.mean(data[c], axis=0)
        centered_data[c] = data[c] - np.reshape(mean, (1, -1))
    
    # Compute the covariance matrix using all data samples.
    centered_data = centered_data.reshape(-1, centered_data.shape[-1])   
    cov = np.matmul(np.transpose(centered_data), centered_data) / (centered_data.shape[0] - 1)
    return cov

# Results

In [12]:
# Parameters
runs = 10000
n_shot = 5
sigma = 0.3

### Without preprocessing

In [13]:
mean, conf = perfs(5, n_shot, 15, runs, novel_data, clf_name="NCM")
np.round(mean*100, 2), np.round(conf*100, 2)

(78.5, 0.15)

In [14]:
# LR
mean, conf = perfs(5, n_shot, 15, runs, novel_data, clf_name='LR')
np.round(mean*100, 2), np.round(conf*100, 2)

(81.66, 0.14)

In [15]:
# LDA
mean, conf = perfs(5, n_shot, 15, runs, novel_data, clf_name='LDA')
np.round(mean*100, 2), np.round(conf*100, 2)

(79.81, 0.14)

### With preprocessing

In [23]:
# Graph: covariance matrix computed on the base dataset.
A = improved_covariance_matrix(base_data)
w, v = graph_fourier_transform(A)
weights = np.sqrt(np.abs(w) + sigma**2)
weights = 1 / weights
weights = np.expand_dims(weights, axis=0)

In [26]:
# Ours
mean, conf = perfs(5, n_shot, 15, runs, novel_data, basis=v, weights=weights, clf_name="NCM")
np.round(mean*100, 2), np.round(conf*100, 2)

(79.93, 0.14)

In [27]:
# LR + prep
mean, conf = perfs(5, n_shot, 15, runs, novel_data, basis=v, weights=weights, clf_name='LR')
np.round(mean*100, 2), np.round(conf*100, 2)

(82.56, 0.13)

In [28]:
# LDA
mean, conf = perfs(5, n_shot, 15, runs, novel_data, basis=v, weights=weights, clf_name='LDA')
np.round(mean*100, 2), np.round(conf*100, 2)

(80.32, 0.14)