In [1]:
import spams
import pickle
from scipy.spatial import distance
from time import time
import numpy as np
import os
import imageio
import matplotlib.pyplot as plt
import scipy
import scipy.stats
from joblib import Parallel, delayed
%matplotlib inline

In [2]:
with open("../data/LFW_DATA.pickle", "rb") as f:
    lfw = pickle.load(f)

In [3]:
def dictionary_learning(patch_feature, lambda1=1, dictionary_size=100, batchsize=100,
                       posD=True):
    # input shape (feature size, sample size)
    X_patch = np.asfortranarray(patch_feature)
    param = { 'K' : dictionary_size, # learns a dictionary with 400 elements
             "mode":0,
              'lambda1' : lambda1, 'numThreads' : -1,
             "batchsize":batchsize,
             'posD':posD
            }
    D = spams.trainDL_Memory(X_patch,**param)
    return D

def sparse_feature_coding(patch_feature, dictionary, lambda1=1, pos=True):
    # lasso
    param = {
        'lambda1' : lambda1, # not more than 20 non-zeros coefficients
        'numThreads' : -1, 
        'mode' : 0, # penalized formulation
        'pos' : pos
    } 
    X_patch = np.asfortranarray(patch_feature)
    alpha = spams.lasso(X_patch, D = D, return_reg_path = False, **param)
    dense_alpha = scipy.sparse.csr_matrix.todense(alpha)
    return dense_alpha

In [4]:
X = np.array(lfw["database_feature"])
print(X.shape)
X_patch = np.asfortranarray(X[:,:59]) # change type to Fortran array
print(X_patch.shape)
X_split = np.split(X,80, axis=1) # split to 80 patch
print(len(X_split))
print(X_split[0].shape)

(13113, 4720)
(13113, 59)
80
(13113, 59)


In [5]:
# Hyper-parameter
BATCHSIZE = 100
DICTIONARY_SIZE = 100
POS_D_CONSTRAINT =  True
POS_LARS_CONSTRAINT =  True
LAMBDA_DL = 1
LAMBDA_LARS = 1


# train 80 different dictionaries for all parts in the faces
# Hyper-parameter
BATCHSIZE = 100
DICTIONARY_SIZE = 100
POS_D_CONSTRAINT =  True
POS_LARS_CONSTRAINT =  True
LAMBDA_DL = 1
LAMBDA_LARS = 1

# train 80 different dictionaries for all parts in the faces
D_list = []
alpha_list = []
patch = 0
for single_patch in X_split:
    patch+=1
    X_patch = single_patch.T
    if patch%10 == 0:
        print("patch:",patch)
    # learn dictionary for single patch
    D = dictionary_learning(X_patch,
                            lambda1=LAMBDA_DL, 
                            dictionary_size=DICTIONARY_SIZE,
                            batchsize=BATCHSIZE, 
                            posD=POS_D_CONSTRAINT)
    alpha = sparse_feature_coding(X_patch, D, 
                                  lambda1=LAMBDA_LARS, 
                                  pos=POS_LARS_CONSTRAINT).T
    D_list.append(D)
    alpha_list.append(alpha)

sparse_database_feature = np.concatenate(np.array(alpha_list),axis=1) 
print("sparse database shape:",sparse_database_feature.shape)

patch: 10
patch: 20
patch: 30
patch: 40
patch: 50
patch: 60
patch: 70
patch: 80
sparse database shape: (13113, 8000)


In [6]:
# sparse encode query feature
X_query = lfw["query_feature"]
# train 80 different dictionaries for all parts in the faces
alpha_list = []
patch = 0
dictionary_index = 0
X_query_split = np.split(X_query,80, axis=1)
for single_patch in X_query_split:
    patch+=1
    D = D_list[dictionary_index]
    dictionary_index += 1
    X_patch = np.asfortranarray(single_patch).T
    if patch%10 == 0:
        print("patch:",patch, "dictionary_index:",dictionary_index)
    # learn dictionary for single patch
    
    alpha = sparse_feature_coding(X_patch, D, lambda1=LAMBDA_LARS, pos=POS_LARS_CONSTRAINT).T
    alpha_list.append(alpha)
sparse_query_feature = np.concatenate(np.array(alpha_list),axis=1) 
print("sparse query shape",sparse_query_feature.shape)

patch: 10 dictionary_index: 10
patch: 20 dictionary_index: 20
patch: 30 dictionary_index: 30
patch: 40 dictionary_index: 40
patch: 50 dictionary_index: 50
patch: 60 dictionary_index: 60
patch: 70 dictionary_index: 70
patch: 80 dictionary_index: 80
sparse query shape (120, 8000)


In [7]:
# identity
database_id_list = np.array([lfw["database_identity"][i][0][0].split("\\")[0] for i in range(len(lfw["database_identity"]))])

database_id_uni = np.array(sorted(list(set(database_id_list))))
print("have {} different names in database".format(len(database_id_uni)))
sparse_database_feature_copy = np.copy(sparse_database_feature)
weight = 5
for identity in database_id_uni:
    id_mask = database_id_list == identity
    sub_feature = sparse_database_feature[id_mask]
    id_mean_feature = sub_feature.mean(axis=0)
    sparse_database_feature_copy[id_mask,:] = sparse_database_feature[id_mask,:]*weight + \
    id_mean_feature*(1-weight)

have 5749 different names in database


In [12]:
np.save("../data/sparse_database_feature_identityF.npy",sparse_database_feature_copy)
np.save("../data/sparse_query_feature_identityF.npy",sparse_query_feature)

In [11]:
 sparse_database_feature.shape

(13113, 8000)

In [None]:
sparse_database_feature