In [1]:
from scipy.io import loadmat
import json
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler
import random
from metric_learn import MMC_Supervised

In [2]:
def kNN(k,query,qLabels,qID,gallery,gLabels,gID,metric):
    G = [] #list of lists, #list = #query imgs, length of each list = #gallery imgs used
    for i in range(len(query)):
        #indices for gallery features to use
        G.append([x for x in range(len(gallery)) if not(gLabels[x]==qLabels[i] and gID[x]==qID[i])]) 
    print('G')
    
    sorted_idx = []
    for i in range(len(query)):
        Dist = distance.cdist(np.reshape(query[i],(1,-1)),gallery[G[i]],metric = metric)
        sorted_idx.append(np.argsort(Dist))
    print('sorted_idx')

    def accuracy(k):
        NN = [arr[0,:k] for arr in sorted_idx]

        sum = 0
        for i in range(len(query)):
            usedLabels = gLabels[G[i]] # labels of gallery images used for each query image
            if(qLabels[i] in usedLabels[NN[i]]): 
                sum += 1
        acc = sum/len(query)
        return acc
    print('NN')
    
    if type(k) is list:
        acc = []
        for n in k:
            acc.append(accuracy(n))
    else:
        acc = accuracy(k)
    
    return acc

print('working')

working


In [3]:
data = loadmat('PR_data/cuhk03_new_protocol_config_labeled.mat')
camID = data['camId'].flatten()
filelist = data['filelist'].flatten()
gallery_idx = data['gallery_idx'].flatten()
labels = data['labels'].flatten()
query_idx = data['query_idx'].flatten()
train_idx = data['train_idx'].flatten()

print('camID:',camID.shape)
print('filelist:',filelist.shape)
print('gallery_idx:',gallery_idx.shape)
print('labels:',labels.shape)
print('query_idx:',query_idx.shape)
print('train_idx:',train_idx.shape)

camID: (14096,)
filelist: (14096,)
gallery_idx: (5328,)
labels: (14096,)
query_idx: (1400,)
train_idx: (7368,)


In [4]:
try:
    features = loadmat('PR_data/features.mat')
    features = features['features']
except FileNotFoundError:
    print('exception handling')
    with open('PR_data/feature_data.json','r')as f: 
        features = json.load(f) 
        features = np.asarray(features) # each row is a feature (data instance) print(features.shape)
print(features.shape)

(14096, 2048)


In [5]:
train = features[train_idx-1]
scalar = StandardScaler().fit(train)
#train = StandardScaler().fit_transform(features[train_idx-1])
train = scalar.transform(train)
tLabels = labels[train_idx-1]
#query = features[query_idx-1]
query = scalar.transform(features[query_idx-1])
qLabels = labels[query_idx-1]
#gallery = features[gallery_idx-1]
gallery = scalar.transform(features[gallery_idx-1])
gLabels = labels[gallery_idx-1]
tID = camID[train_idx-1]
qID = camID[query_idx-1]
gID = camID[gallery_idx-1]

c = len(np.unique(tLabels)) #767
print(c)

print(len(train)-c) # 6601

767
6601


In [6]:
class Datapoint(object):
    def __init__(self, features, label, cam_id):
        self.features = features
        self.label = label
        self.cam_id = cam_id

class Dataset(object):
    def __init__(self, datapoint_list=None):
        if datapoint_list:
            self.datapoints = datapoint_list
        else:
            self.datapoints = []
    
    def features_array(self):
        return np.array([datapoint.features for datapoint in self.datapoints])
    def labels(self):
        return np.array([datapoint.label for datapoint in self.datapoints])
    def cam_ids(self):
        return np.array([datapoint.cam_id for datapoint in self.datapoints])

training_dataset = Dataset([Datapoint(train[i], tLabels[i], tID[i]) for i in range(len(train))])
query_dataset = Dataset([Datapoint(query[i], qLabels[i], qID[i]) for i in range(len(query))])
gallery_dataset = Dataset([Datapoint(gallery[i], gLabels[i], gID[i]) for i in range(len(gallery))])

In [7]:
def fit_model(model, training_dataset, query_dataset, gallery_dataset):
    model = model.fit(training_dataset.features_array(), training_dataset.labels())
    omega_train = model.transform(training_dataset.features_array())
    omega_query = model.transform(query_dataset.features_array())
    omega_gallery = model.transform(gallery_dataset.features_array())
    
    return model, omega_train, omega_query, omega_gallery

In [8]:
M_pca = 500

pca = PCA(n_components=M_pca)
pca, omega_train, omega_q_pca, omega_g_pca = fit_model(pca, training_dataset, query_dataset, gallery_dataset)
w_pca = pca.components_
print(omega_train.shape)
np.sum(pca.explained_variance_ratio_)

(7368, 500)


0.9880119145376905

In [12]:
L = MMC_Supervised(num_constraints=200)
print('mmc done')


mmc done


TypeError: object of type 'MMC_Supervised' has no len()

In [10]:
import time
start = time.time()
L.fit(omega_train, training_dataset.labels())
print('mmc fit')
print(start)

KeyboardInterrupt: 

In [None]:
t = time.time()
omega_query, omega_gallery = L.transform(omega_q_pca), L.transform(omega_g_pca)
print('mmc transform')
print(t)

In [None]:
acc_MMC = kNN(k = [1,5,10],query=omega_query,qLabels=qLabels,qID=qID,gallery=omega_gallery,
          gLabels=gLabels,gID=gID,metric='euclidean')
print('MMC with pca:',acc_MMC)