In [None]:
#Transfer Learning and Optimal Transport

In [59]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from itertools import permutations 
from time import time

In [60]:
#caffenet
caffe_webcam = loadmat("CaffeNet4096/webcam.mat")
caffe_dslr = loadmat("CaffeNet4096/dslr.mat")
caffe_amazon = loadmat("CaffeNet4096/amazon.mat")

In [61]:
caffe_list = [caffe_webcam,caffe_dslr,caffe_amazon]
caffe_iter = list(permutations(caffe_list,2))

In [62]:
#googlenet
google_webcam = loadmat("GoogleNet1024/webcam.mat")
google_dslr = loadmat("GoogleNet1024/dslr.mat")
google_amazon = loadmat("GoogleNet1024/amazon.mat")

In [63]:
google_list = [google_webcam,google_dslr,google_amazon]
google_iter = list(permutations(google_list,2))

In [64]:
#surf
surf_webcam = loadmat("surf/webcam.mat")
surf_dslr = loadmat("surf/dslr.mat")
surf_amazon = loadmat("surf/amazon.mat")

In [65]:
surf_list = [surf_webcam,surf_dslr,surf_amazon]
surf_iter = list(permutations(surf_list,2))

In [66]:
class subspace:
    def __init__(self,dataset,S,T,d):
        self.dataset = dataset
        self.S = S
        self.T = T
        self.d = d
        
    def pca(self,x, n_components):
        cov = np.cov(x , rowvar = False)
        eigen_values , eigen_vectors = np.linalg.eigh(cov)
        sorted_index = np.argsort(eigen_values)[::-1]
        sorted_eigenvalue = eigen_values[sorted_index]
        sorted_eigenvectors = eigen_vectors[:,sorted_index]
        return sorted_eigenvectors[:,0:n_components]
    def fit_predict(self):
        #normalising the data
        scale = StandardScaler()
        x_S = self.S["fts"]
        x_T = self.T["fts"]
        x_T = scale.fit_transform(x_T)
        x_S = scale.fit_transform(x_S)
        
        if self.dataset == "surf":
            y_S = self.S["labels"]
            y_T = self.T["labels"]
        else:
            y_S = self.S["labels"][0]
            y_T = self.T["labels"][0]
        
        #pca and picking the top eigen vectors
        xs = self.pca(x_S,self.d)
        xt = self.pca(x_T,self.d)
        xa = np.dot(np.dot(xs,xs.T),xt)
        sa = np.dot(x_S,xa)
        st = np.dot(x_T,xt)
        
        #knn classifier
        knn = KNeighborsClassifier(1)
        knn.fit(sa,y_S)
        labels = knn.predict(st)
        return accuracy_score(labels,y_T)
    
def knn(dataset,S,T):
    x_S = S["fts"]
    x_T = T["fts"]
    if dataset == "surf":
        y_S = S["labels"]
        y_T = T["labels"]
    else:
        y_S = S["labels"][0]
        y_T = T["labels"][0]
    scale = StandardScaler()
    x_S = scale.fit_transform(x_S)
    x_T = scale.fit_transform(x_T)
    knn = KNeighborsClassifier(1)
    knn.fit(x_S,y_S)
    labels = knn.predict(x_T)
    return accuracy_score(labels,y_T)

In [123]:
#caffe

list1 = ["webcam","dslr","amazon"]
index = list(permutations(list1,2))
methods = ["subspace","without subspace"]
caffe_timedf = pd.DataFrame(index=index,columns=methods)
caffe_accdf = pd.DataFrame(index=index,columns=methods)

for i in range(len(index)):
    t0 = time()
    sub = subspace("caffe",caffe_iter[i][0],caffe_iter[i][1],13)
    caffe_accdf.iloc[i,0] = sub.fit_predict()*100
    t1 = time()
    caffe_timedf.iloc[i,0] = t1-t0
    t2 = time()
    caffe_accdf.iloc[i,1] = knn("caffe",caffe_iter[i][0],caffe_iter[i][1])*100
    t3 = time()
    caffe_timedf.iloc[i,1] = t3-t2

In [124]:
#google

list1 = ["webcam","dslr","amazon"]
index = list(permutations(list1,2))
methods = ["subspace","without subspace"]
google_timedf = pd.DataFrame(index=index,columns=methods)
google_accdf = pd.DataFrame(index=index,columns=methods)

for i in range(len(index)):
    t0 = time()
    sub = subspace("google",google_iter[i][0],google_iter[i][1],13)
    google_accdf.iloc[i,0] = sub.fit_predict()*100
    t1 = time()
    google_timedf.iloc[i,0] = t1-t0
    t2 = time()
    google_accdf.iloc[i,1] = knn("google",google_iter[i][0],google_iter[i][1])*100
    t3 = time()
    google_timedf.iloc[i,1] = t3-t2

In [125]:
#surf

list1 = ["webcam","dslr","amazon"]
index = list(permutations(list1,2))
methods = ["subspace","without subspace"]
surf_timedf = pd.DataFrame(index=index,columns=methods)
surf_accdf = pd.DataFrame(index=index,columns=methods)

for i in range(len(index)):
    t0 = time()
    sub = subspace("surf",surf_iter[i][0],surf_iter[i][1],13)
    surf_accdf.iloc[i,0] = sub.fit_predict()*100
    t1 = time()
    surf_timedf.iloc[i,0] = t1-t0
    t2 = time()
    surf_accdf.iloc[i,1] = knn("surf",surf_iter[i][0],surf_iter[i][1])*100
    t3 = time()
    surf_timedf.iloc[i,1] = t3-t2

  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)


In [120]:
t = surf_accdf

In [121]:
t["improvement"] = (t["subspace"]-t["without subspace"])/t["subspace"]*100
t.improvement = t.improvement.map(lambda x:round(x))

In [126]:
caffe_accdf

Unnamed: 0,subspace,without subspace
"(webcam, dslr)",98.7261,96.1783
"(webcam, amazon)",87.2651,69.7286
"(dslr, webcam)",98.6441,95.2542
"(dslr, amazon)",82.4635,70.7724
"(amazon, webcam)",87.7966,74.2373
"(amazon, dslr)",90.4459,81.5287


In [129]:
surf_timedf

Unnamed: 0,subspace,without subspace
"(webcam, dslr)",0.140591,0.140592
"(webcam, amazon)",0.173923,0.52877
"(dslr, webcam)",0.140594,0.140592
"(dslr, amazon)",0.201098,0.296805
"(amazon, webcam)",0.172899,0.566615
"(amazon, dslr)",0.140561,0.393137
