In [0]:
from google.colab import drive
drive.mount('drive')

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [0]:
import sklearn
from sklearn.cluster import KMeans
import numpy as np
from sklearn.svm import SVC
import pickle
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')

In [0]:
def VLAD(X,visualDictionary):

    predictedLabels = visualDictionary.predict(X)
    centers = visualDictionary.cluster_centers_
    labels=visualDictionary.labels_
    k=visualDictionary.n_clusters

    m,d = X.shape
    
    
    V=np.zeros([k,d])
    #computing the differences

    # for all the clusters (visual words)
    for i in range(k):
        # if there is at least one descriptor in that cluster
        if np.sum(predictedLabels==i)>0:
            # add the diferences
            V[i]=np.sum(X[predictedLabels==i,:]-centers[i],axis=0)


    V = V.flatten()
    # power normalization, also called square-rooting normalization
    V = np.sign(V)*np.sqrt(np.abs(V))

    # L2 normalization

    V = V/np.sqrt(np.dot(V,V))
    
    return V

In [0]:
def get_vlad(doc,layer,scales,kmean1,kmean2,pca,type_set="train"):
    for n in scales:
        print(doc, n , layer)
        #laod_data
        if type_set=="train":
            x=np.load(dir_path_DataSet + doc + '/' + load_fold + doc +'X_train_'+layer + '(' + str(n) + ').npy')
        elif type_set=="test":
            x=np.load(dir_path_DataSet + doc + '/' + load_fold + doc +'X_test_'+layer + '(' + str(n) + ').npy')
            

        #reshape (d1,d2,d3,d4) -> (d1,d2*d3,d4)
        if len(x.shape) == 4:
          x = np.reshape(x,(x.shape[0],(x.shape[1]*x.shape[2]),x.shape[3]))
        else:
          x = np.reshape(x,(x.shape[0],1,x.shape[1]))

        #if x_all is empty : x_all take x else add data to x_all 
        if(n==0):
            x_all=x
        else:
            x_all=np.concatenate((x_all,x),axis=1)
                
    
    V_64=[]
    V_256=[]
    #for ech image
    for vec in x_all:
        #we take the first vec and we delete it from the data set: memory problem
        x_all=x_all[1:,...]
        #normalization l2
        vec = normalize(vec, norm = 'l2', axis = 1)
        #transformation with PCA
        vec=pca.transform(vec)
        #normalisation
        vec = normalize(vec, norm = 'l2', axis = 1)
        
        
        V_64.append(VLAD(vec,kmean1))
        V_256.append(VLAD(vec,kmean2))

    
    return V_64,V_256
        

In [0]:
load_fold='datanp_vgg/'
#dataset's path 
dir_path_DataSet = 'drive/My Drive/medical_image_recognition/datasets/'

#fold name for saving files(kmean/PCA)
fold_name="k_means_PCA/"
vlad_fold="vlad_fold/"

docs=['miniMIT_Etus']#,'chest_xray','kvasir-dataset-v2']
scales=[0]
layers=['fc1','fc2']#,'block8_9_conv']#,'block8_8_conv']

# n=1,2,3 ensemble

In [0]:
for doc in docs:
    for layer in layers:
        #load files 
        kmean1 = pickle.load(open(dir_path_DataSet+doc+'/'+fold_name+doc+'_'+layer+'_'+'kmean1.pickle', 'rb'))
        kmean2 = pickle.load(open(dir_path_DataSet+doc+'/'+fold_name+doc+'_'+layer+'_'+'kmean2.pickle', 'rb'))
        pca = pickle.load(open(dir_path_DataSet+doc+'/'+fold_name+doc+'_'+layer+'_'+'PCA.pickle', 'rb'))
        
        #lead file for train set
        v_train_64,v_train_256=get_vlad(doc,layer,scales,kmean1,kmean2,pca,type_set="train")
        v_train_64 = np.asarray(v_train_64)
        v_train_256= np.asarray(v_train_256)
        
        print(v_train_64.shape)
        np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'vlad_train(64).npy',v_train_64)
        np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'vlad_train(256).npy',v_train_256)
        del v_train_64,v_train_256
        
        #for test set
        v_test_64,v_test_256=get_vlad(doc,layer,scales,kmean1,kmean2,pca,type_set="test")
        
        v_test_64 = np.asarray(v_test_64)
        v_test_256= np.asarray(v_test_256)
        print(v_test_64.shape)
        #save vlad files
        np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'vlad_test(64).npy',v_test_64)
        np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'vlad_test(256).npy',v_test_256)
        
        print("done")

miniMIT_Etus 0 block5_pool
miniMIT_Etus 1 block5_pool
miniMIT_Etus 2 block5_pool
(120, 8192)
miniMIT_Etus 0 block5_pool
miniMIT_Etus 1 block5_pool
miniMIT_Etus 2 block5_pool
(120, 8192)
done
miniMIT_Etus 0 fc1
miniMIT_Etus 1 fc1
miniMIT_Etus 2 fc1
(120, 8192)
miniMIT_Etus 0 fc1
miniMIT_Etus 1 fc1
miniMIT_Etus 2 fc1
(120, 8192)
done
miniMIT_Etus 0 fc2
miniMIT_Etus 1 fc2
miniMIT_Etus 2 fc2
(120, 8192)
miniMIT_Etus 0 fc2
miniMIT_Etus 1 fc2
miniMIT_Etus 2 fc2
(120, 8192)
done


# n=1,2,3 separement

In [0]:
def get_vlad(doc,layer,n,kmean1,kmean2,pca,type_set="train"):
    print(doc, n , layer)
    #laod_data
    if type_set=="train":
        x=np.load(dir_path_DataSet + doc + '/' + load_fold + doc +'X_train_'+layer + '(' + str(n) + ').npy')
    elif type_set=="test":
        x=np.load(dir_path_DataSet + doc + '/' + load_fold + doc +'X_test_'+layer + '(' + str(n) + ').npy')


    #reshape (d1,d2,d3,d4) -> (d1,d2*d3,d4)
    if len(x.shape) == 4:
      x = np.reshape(x,(x.shape[0],(x.shape[1]*x.shape[2]),x.shape[3]))
    else:
      x = np.reshape(x,(x.shape[0],1,x.shape[1]))

    #if x_all is empty : x_all take x else add data to x_all 

    x_all=x
                
    
    V_64=[]
    V_256=[]
    #for ech image
    for vec in x_all:
        #we take the first vec and we delete it from the data set: memory problem
        x_all=x_all[1:,...]
        #normalization l2
        vec = normalize(vec, norm = 'l2', axis = 1)
        #transformation with PCA
        vec=pca.transform(vec)
        #normalisation
        vec = normalize(vec, norm = 'l2', axis = 1)
        
        
        V_64.append(VLAD(vec,kmean1))
        V_256.append(VLAD(vec,kmean2))

    
    return V_64,V_256
        

In [23]:
scales=[1]
layers=['fc1','fc2']#,'block8_9_conv']#,'block8_8_conv']
for doc in docs:
    for layer in layers:
        for n in scales:
            #load files 
            kmean1 = pickle.load(open(dir_path_DataSet+doc+'/'+fold_name+doc+'_'+layer+'_'+'n'+str(n)+'_kmean1.pickle', 'rb'))
            kmean2 = pickle.load(open(dir_path_DataSet+doc+'/'+fold_name+doc+'_'+layer+'_'+'n'+str(n)+'_kmean2.pickle', 'rb'))
            pca = pickle.load(open(dir_path_DataSet+doc + '/' + fold_name+doc+'_'+layer+'_'+'n'+str(n)+'_PCA.pickle', 'rb'))

            #lead file for train set
            v_train_64,v_train_256=get_vlad(doc,layer,n,kmean1,kmean2,pca,type_set="train")
            v_train_64 = np.asarray(v_train_64)
            v_train_256= np.asarray(v_train_256)

            print(v_train_64.shape)
            np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'n'+str(n)+'_vlad_train(64).npy',v_train_64)
            np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'n'+str(n)+'_vlad_train(256).npy',v_train_256)
            del v_train_64,v_train_256

            #for test set
            v_test_64,v_test_256=get_vlad(doc,layer,n,kmean1,kmean2,pca,type_set="test")

            v_test_64 = np.asarray(v_test_64)
            v_test_256= np.asarray(v_test_256)
            print(v_test_64.shape)
            #save vlad files
            np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'n'+str(n)+'_vlad_test(64).npy',v_test_64)
            np.save(dir_path_DataSet+doc+'/'+vlad_fold+doc+'_'+layer+'_'+'n'+str(n)+'_vlad_test(256).npy',v_test_256)

            print("done")

miniMIT_Etus 1 fc1
(120, 8192)
miniMIT_Etus 1 fc1
(120, 8192)
done
miniMIT_Etus 1 fc2
(120, 8192)
miniMIT_Etus 1 fc2
(120, 8192)
done
