In [None]:
#Transfer Learning and Optimal Transport

In [14]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
from sklearn.preprocessing import normalize
from scipy.spatial.distance import cdist
from sklearn.preprocessing import StandardScaler
import ot
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from itertools import permutations 
from time import time

In [6]:
#caffenet
caffe_webcam = loadmat("CaffeNet4096/webcam.mat")
caffe_dslr = loadmat("CaffeNet4096/dslr.mat")
caffe_amazon = loadmat("CaffeNet4096/amazon.mat")

In [7]:
caffe_list = [caffe_webcam,caffe_dslr,caffe_amazon]
caffe_iter = list(permutations(caffe_list,2))

In [8]:
#googlenet
google_webcam = loadmat("GoogleNet1024/webcam.mat")
google_dslr = loadmat("GoogleNet1024/dslr.mat")
google_amazon = loadmat("GoogleNet1024/amazon.mat")

In [9]:
google_list = [google_webcam,google_dslr,google_amazon]
google_iter = list(permutations(google_list,2))

In [10]:
#surf
surf_webcam = loadmat("surf/webcam.mat")
surf_dslr = loadmat("surf/dslr.mat")
surf_amazon = loadmat("surf/amazon.mat")

In [11]:
surf_list = [surf_webcam,surf_dslr,surf_amazon]
surf_iter = list(permutations(surf_list,2))

In [15]:
class entropic_ot:
    def __init__(self,dataset,S,T,reg_e):
        self.dataset = dataset
        self.S = S
        self.T = T
        self.reg_e = reg_e   
            
    def fit_predict(self):
        x_S = self.S["fts"]
        x_T = self.T["fts"]
        if self.dataset == "surf":
            y_S = self.S["labels"]
            y_T = self.T["labels"]
        else:
            y_S = self.S["labels"][0]
            y_T = self.T["labels"][0]
        a = np.ones(x_S.shape[0])
        b = np.ones(x_T.shape[0])
        M = cdist(x_S,x_T)
        M = normalize(M,norm="max")
        G = ot.sinkhorn(a,b,M,self.reg_e) #entropic regularization parameter is chosen as 0.1
        sa = np.dot(G,x_T)
        #knn classifier
        knn = KNeighborsClassifier(1)
        knn.fit(sa,y_S)
        labels = knn.predict(x_T)
        return accuracy_score(labels,y_T)*100
    
def knn(dataset,S,T):
    x_S = S["fts"]
    x_T = T["fts"]
    if dataset == "surf":
        y_S = S["labels"]
        y_T = T["labels"]
    else:
        y_S = S["labels"][0]
        y_T = T["labels"][0]
    scale = StandardScaler()
    x_S = scale.fit_transform(x_S)
    x_T = scale.fit_transform(x_T)
    knn = KNeighborsClassifier(1)
    knn.fit(x_S,y_S)
    labels = knn.predict(x_T)
    return accuracy_score(labels,y_T)*100

def sinkhorn_implementation(dataset,S,T):
    x_S = S["fts"]
    x_T = T["fts"]
    if dataset == "surf":
        y_S = S["labels"]
        y_T = T["labels"]
    else:
        y_S = S["labels"][0]
        y_T = T["labels"][0]
    a = np.ones(x_S.shape[0])
    b = np.ones(x_T.shape[0])
    M = cdist(x_S,x_T)
    M = normalize(M,norm="max")
    gamma = 0.01
    K = np.exp(-M/gamma)
    u = np.ones(x_S.shape[0])
    v = np.ones(x_T.shape[0])

    for i in range(70):
        u = a/np.dot(K,v)
        v = b/np.dot(K.T,u)

    G = u.reshape(-1, 1) * K * v.reshape(1, -1)
    sa = np.dot(G,x_T)
    #knn classifier
    knn = KNeighborsClassifier(1)
    knn.fit(sa,y_S)
    labels = knn.predict(x_T)
    return accuracy_score(labels,y_T)*100

In [17]:
#caffe

list1 = ["webcam","dslr","amazon"]
index = list(permutations(list1,2))
methods = ["sinkhorn library","sinkhorn implementation","without sinkhorn"]
caffe_timedf = pd.DataFrame(index=index,columns=methods)
caffe_accdf = pd.DataFrame(index=index,columns=methods)

for i in range(len(index)):
    t0 = time()
    ent = entropic_ot("caffe",caffe_iter[i][0],caffe_iter[i][1],0.1)
    caffe_accdf.iloc[i,0] = ent.fit_predict()
    t1 = time()
    caffe_timedf.iloc[i,0] = t1-t0
    t0 = time()
    caffe_accdf.iloc[i,2] = knn("caffe",caffe_iter[i][0],caffe_iter[i][1])
    t1 = time()
    caffe_timedf.iloc[i,2] = t1-t0
#     t0 = time()
#     skh = sinkhorn_implementation("caffe",caffe_iter[i][0],caffe_iter[i][1])
#     caffe_accdf.iloc[i,1] = skh
#     t1 = time()
#     caffe_timedf.iloc[i,1] = t1-t0

  v = np.divide(b, KtransposeU)




  v = np.divide(b, KtransposeU)




  u = 1. / np.dot(Kp, v)




  u = 1. / np.dot(Kp, v)




In [96]:
#google

list1 = ["webcam","dslr","amazon"]
index = list(permutations(list1,2))
methods = ["sinkhorn library","sinkhorn implementation","without sinkhorn"]
google_timedf = pd.DataFrame(index=index,columns=methods)
google_accdf = pd.DataFrame(index=index,columns=methods)

for i in range(len(index)):
    t0 = time()
    ent = entropic_ot("google",google_iter[i][0],google_iter[i][1],0.1)
    google_accdf.iloc[i,0] = ent.fit_predict()
    t1 = time()
    google_timedf.iloc[i,0] = t1-t0
    t0 = time()
    google_accdf.iloc[i,2] = knn("google",google_iter[i][0],google_iter[i][1])
    t1 = time()
    google_timedf.iloc[i,2] = t1-t0
#     t0 = time()
#     skh = sinkhorn_implementation("caffe",caffe_iter[i][0],caffe_iter[i][1])
#     caffe_accdf.iloc[i,1] = skh
#     t1 = time()
#     caffe_timedf.iloc[i,1] = t1-t0

  v = np.divide(b, KtransposeU)




  v = np.divide(b, KtransposeU)




  u = 1. / np.dot(Kp, v)




  u = 1. / np.dot(Kp, v)




In [154]:
#surf

list1 = ["webcam","dslr","amazon"]
index = list(permutations(list1,2))
methods = ["sinkhorn library","sinkhorn implementation","without sinkhorn"]
surf_timedf = pd.DataFrame(index=index,columns=methods)
surf_accdf = pd.DataFrame(index=index,columns=methods)

for i in range(len(index)):
    t0 = time()
    ent = entropic_ot("surf",surf_iter[i][0],surf_iter[i][1],0.1)
    surf_accdf.iloc[i,0] = ent.fit_predict()
    t1 = time()
    surf_timedf.iloc[i,0] = t1-t0
    t0 = time()
    surf_accdf.iloc[i,2] = knn("surf",surf_iter[i][0],surf_iter[i][1])
    t1 = time()
    surf_timedf.iloc[i,2] = t1-t0
#     t0 = time()
#     skh = sinkhorn_implementation("caffe",caffe_iter[i][0],caffe_iter[i][1])
#     caffe_accdf.iloc[i,1] = skh
#     t1 = time()
#     caffe_timedf.iloc[i,1] = t1-t0

  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  v = np.divide(b, KtransposeU)
  knn.fit(sa,y_S)




  knn.fit(x_S,y_S)
  knn.fit(sa,y_S)
  knn.fit(x_S,y_S)
  v = np.divide(b, KtransposeU)
  knn.fit(sa,y_S)




  knn.fit(x_S,y_S)
  u = 1. / np.dot(Kp, v)
  knn.fit(sa,y_S)




  knn.fit(x_S,y_S)
  u = 1. / np.dot(Kp, v)
  knn.fit(sa,y_S)




  knn.fit(x_S,y_S)


In [177]:
#for experiments by varying values of gamma
i = 0
t0 = time()
skh = sinkhorn_implementation("surf",surf_iter[i][0],surf_iter[i][1])
surf_accdf.iloc[i,1] = skh
t1 = time()
surf_timedf.iloc[i,1] = t1-t0
print(skh)

  knn.fit(sa,y_S)


17.197452229299362


In [178]:
surf_accdf

Unnamed: 0,sinkhorn library,sinkhorn implementation,without sinkhorn
"(webcam, dslr)",55.414,56.051,30.5732
"(webcam, amazon)",28.1837,26.3048,17.5365
"(dslr, webcam)",54.2373,58.3051,31.1864
"(dslr, amazon)",28.81,12.2129,13.9875
"(amazon, webcam)",32.5424,33.5593,17.6271
"(amazon, dslr)",34.3949,17.1975,11.465


In [181]:
surf_timedf

Unnamed: 0,sinkhorn library,sinkhorn implementation,without sinkhorn
"(webcam, dslr)",0.20847,0.162536,0.119691
"(webcam, amazon)",0.862689,0.89953,0.535571
"(dslr, webcam)",0.165522,0.162564,0.0937493
"(dslr, amazon)",0.453831,0.393946,0.271275
"(amazon, webcam)",0.688167,0.585432,0.506642
"(amazon, dslr)",0.43205,0.446838,0.322095
