In [4]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.append("./src/")
os.environ["CUDA_VISIBLE_DEVICES"]="1"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import numpy as np
import pandas as pd
import ot
import torch
import pickle
import gc

from torch import nn
from torch import optim

import lightgbm as lgb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from collections import defaultdict

from DANN import DANN
from DAN import DAN
from CORAL import coral
from optimalTransport1D import optimalTransport1D
from nnModel import FC_embedding, EmbeddingModel

from utils import *
from io_utils import *

## Preprocessing

In [None]:
dim = 5000

datalist = []
for filename in ["books", "dvd", "elec", "kitchen"]:
    for train in [True, False]:
        x, y = load_amazon_dataset(filename, train)
        datalist.append(x)

x = np.vstack(datalist)
if dim < 5000:
    x = x[:, :dim]

_, Wlist = msda_fit(x.T, nb_layers=5)

for filename in ["books", "dvd", "elec", "kitchen"]:
    for train in [True, False]:
        x, y = load_amazon_dataset(filename, train)
        x_msda = msda_forward(x.T, Wlist)[:,-dim:]
        
        if train:
            np.save("./data/preprocessing/{}_msda_{}_train".format(filename, dim), x_msda)
            np.save("./data/preprocessing/{}_label_train".format(filename), y)
        else:
            np.save("./data/preprocessing/{}_msda_{}_test".format(filename, dim), x_msda)
            np.save("./data/preprocessing/{}_label_test".format(filename), y)

## Constante

In [3]:
dim = 5000
device = torch.device("cuda")
adversarial = True          # set to False to learn a standard NN
hidden_layer_size = 50
lambda_adapt = 0.1 if adversarial else 0.
learning_rate = 0.0001
maxiter = 200

### Neural Networks

In [None]:
# DANN
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    
    v_size = int(0.1 * xs.shape[0])
    xv = xs[:v_size]
    yv = ys[:v_size]
    xs = xs[v_size:]
    ys = ys[v_size:]

    for target in ["books", "dvd", "elec", "kitchen"]:
        if target != source:
            xt, yt = load_amazon_msda(target, True, dim)
            xtest, ytest = load_amazon_msda(target, False, dim)

            clf = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                        maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, verbose=False)
            clf.fit(xs, ys, xt, xv, yv)

            res = np.mean(clf.predict(xtest)==ytest)
            print(source, target, res)

In [None]:
# Naive NN
adversarial = False

for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    
    v_size = int(0.1 * xs.shape[0])
    xv = xs[:v_size]
    yv = ys[:v_size]
    xs = xs[v_size:]
    ys = ys[v_size:]

    for target in ["books", "dvd", "elec", "kitchen"]:
        if target != source:
            xt, yt = load_amazon_msda(target, True, dim)
            xtest, ytest = load_amazon_msda(target, False, dim)

            try:
                clf = load_model("./model/{}_nn_{}".format(source, dim))
            except:
                clf = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                           maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, 
                           verbose=False)
                clf.fit(xs, ys, xt, xv, yv)
                save_model(clf, "./model/{}_nn_{}".format(source, dim))

            res = np.mean(clf.predict(xtest)==ytest)
            print(source, target, res)

In [None]:
# CORAL NN model
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)

    v_size = int(0.1 * xs.shape[0])
    xv = xs[:v_size]
    yv = ys[:v_size]
    xs = xs[v_size:]
    ys = ys[v_size:]

    for target in ["books", "dvd", "elec", "kitchen"]:
        if target != source:
            xt, yt = load_amazon_msda(target, False, dim)

            try:
                clf = load_model("./model/{}_nn_{}".format(source, dim))
            except:
                clf = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                           maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, 
                           verbose=False)
                clf.fit(xs, ys, xt, xv, yv)
                save_model(clf, "./model/{}_nn_{}".format(source, dim))

            target_tsf = coral(xt, xs)
            res = np.mean(clf.predict(target_tsf)==yt)
            print(source, target, res)

In [None]:
# 1D OT
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    
    v_size = int(0.1 * xs.shape[0])
    xv = xs[:v_size]
    yv = ys[:v_size]
    xs = xs[v_size:]
    ys = ys[v_size:]

    for target in ["books", "dvd", "elec", "kitchen"]:
        if target != source:
            xt, yt = load_amazon_msda(target, False, dim)

            try:
                clf = load_model("./model/{}_nn_{}".format(source, dim))
            except:
                clf = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                           maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, 
                           verbose=False)
                clf.fit(xs, ys, xt, xv, yv)
                save_model(clf, "./model/{}_nn_{}".format(source, dim))

            opt = optimalTransport1D()
            target_tsf, w_distance = opt.fit_transform(xt, xs, njobs=50)
            res = np.mean(clf.predict(target_tsf)==yt)
            print(source, target, res)

In [None]:
# Classical OT
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    
    v_size = int(0.1 * xs.shape[0])
    xv = xs[:v_size]
    yv = ys[:v_size]
    xs = xs[v_size:]
    ys = ys[v_size:]

    for target in ["books", "dvd", "elec", "kitchen"]:
        if target != source:
            xt, yt = load_amazon_msda(target, False, dim)

            try:
                clf = load_model("./model/{}_nn_{}".format(source, dim))
            except:
                clf = DANN(lambda_adapt=lambda_adapt, hidden_layer_size=hidden_layer_size, learning_rate=learning_rate,
                           maxiter=maxiter, epsilon_init=None, seed=12342, adversarial_representation=adversarial, 
                           verbose=False)
                clf.fit(xs, ys, xt, xv, yv)
                save_model(clf, "./model/{}_nn_{}".format(source, dim))

            ot_emd = ot.da.EMDTransport(max_iter=200000)
            ot_emd.fit(Xs=xt, Xt=xs)
            target_tsf = ot_emd.transform(xt)
            res = np.mean(clf.predict(target_tsf)==yt)
            print(source, target, res)

In [None]:
# DAN 
class DanseNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_layer = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Sigmoid()
        )
        self.output_layer = nn.Sequential(
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, inputs):
        self.hidden_rep = self.input_layer(inputs)
        return self.output_layer(self.hidden_rep)

for i in range(10):
    res_list = []
    for source in ["books", "dvd", "elec", "kitchen"]:
        for target in ["books", "dvd", "elec", "kitchen"]:
            if target != source:
                xs, ys = load_amazon_msda(source, True, dim)
                xt, yt = load_amazon_msda(target, True, dim)
                xtest, ytest = load_amazon_msda(target, False, dim)
                
                v_size = int(0.1 * xs.shape[0])
                xv = xs[:v_size]
                yv = ys[:v_size]
                xs = xs[v_size:]
                ys = ys[v_size:]

                indext = reduce_dataset(xs, xt)
                xt_adpt = xt[indext]

                nn = DanseNet(input_dim=dim, hidden_dim=50, output_dim=1)
                clf = DAN(nn, device=torch.device("cuda"))
                clf.fit(xs, ys, xt_adpt, xv, yv, epoch=100, batch_size=128, lr=0.0001, beta=0.1, verbose=False)

                res = np.mean(clf.predict(xtest)==ytest)
                res_list.append(res)
                print(source, target, res)


## SVM

In [None]:
# navie
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    try:
        clf = load_model("./model/{}_svm_{}".format(source, dim))
    except:
        clf = LinearSVC(C=0.1, tol=0.001, max_iter=10000)
        clf.fit(xs, ys)
        save_model(clf, "./model/{}_svm_{}".format(source, dim))
        
    for target in ["books", "dvd", "elec", "kitchen"]:
        xt, yt = load_amazon_msda(target, False, dim)
        res = np.mean(clf.predict(xt)==yt)
        print(source, target, res)

In [None]:
# coral
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    try:
        clf = load_model("./model/{}_svm_{}".format(source, dim))
    except:
        clf = LinearSVC(C=0.1, tol=0.001, max_iter=10000)
        clf.fit(xs, ys)
        save_model(clf, "./model/{}_svm_{}".format(source, dim))
        
    for target in ["books", "dvd", "elec", "kitchen"]:
        xt, yt = load_amazon_msda(target, False, dim)
        target_tsf = coral(xt, xs)
        res = np.mean(clf.predict(target_tsf)==yt)
        print(source, target, res)

In [None]:
# 1D OT
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    try:
        clf = load_model("./model/{}_svm_{}".format(source, dim))
    except:
        clf = LinearSVC(C=0.1, tol=0.001, max_iter=10000)
        clf.fit(xs, ys)
        save_model(clf, "./model/{}_svm_{}".format(source, dim))
        
    for target in ["books", "dvd", "elec", "kitchen"]:
        xt, yt = load_amazon_msda(target, False, dim)
        opt = optimalTransport1D()
        target_tsf, w_distance = opt.fit_transform(xt, xs, njobs=50)
        res = np.mean(clf.predict(target_tsf)==yt)
        print(source, target, res)

In [None]:
# OT
for source in ["books", "dvd", "elec", "kitchen"]:
    xs, ys = load_amazon_msda(source, True, dim)
    try:
        clf = load_model("./model/{}_svm_{}".format(source, dim))
    except:
        clf = LinearSVC(C=0.1, tol=0.001, max_iter=10000)
        clf.fit(xs, ys)
        save_model(clf, "./model/{}_svm_{}".format(source, dim))
        
    for target in ["books", "dvd", "elec", "kitchen"]:
        xt, yt = load_amazon_msda(target, False, dim)
        ot_emd = ot.da.EMDTransport(max_iter=200000)
        ot_emd.fit(Xs=xt, Xt=xs)
        target_tsf = ot_emd.transform(xt)
        res = np.mean(clf.predict(target_tsf)==yt)
        print(source, target, res)