In [1]:
import sys
sys.path.append("../src/")
sys.path.append("../model/")
sys.path.append("../src/UDA")

In [2]:
from argparse import Namespace

import numpy as np
import torch

from sklearn.model_selection import train_test_split

from io_utils import load_dataset, model_log
from metric import performance_logloss, performance_acc
from train_utils import reduce_dataset

from UDA.trainer import *

In [None]:
task = "amazon"
data_type = "msda"
model_type = "toalign"

dim = 400
epoch = 25
batch_size = 128
version = "opt"

In [24]:
cfg = Namespace()

cfg.MODEL = Namespace()
cfg.DATASET = Namespace()
cfg.TRAIN = Namespace()
cfg.METHOD = Namespace()
cfg.OPTIM = Namespace()

cfg.METHOD.HDA = Namespace()

cfg.WORKERS = 5

cfg.MODEL.BASENET = "NumNN"
cfg.MODEL.DNET = 'Discriminator'

cfg.MODEL.D_HIDDEN_SIZE = 32
cfg.MODEL.D_OUTDIM = 1

cfg.DATASET.NUM_CLASSES = 2

cfg.TRAIN.OUTPUT_CKPT = "./"

cfg.METHOD.ENT = True
cfg.METHOD.W_ALG = 0.001
cfg.METHOD.HDA.LR_MULT = 0.001

cfg.TRAIN.TEST_SIZE = 0.25

cfg.OPTIM.MOMENTUM = 0.9
cfg.OPTIM.WEIGHT_DECAY = 5e-5


cfg.TRAIN.TTL_ITE = 800
cfg.TRAIN.BATCH_SIZE = 128

cfg.TRAIN.PRINT_FREQ = 10
cfg.TRAIN.TEST_FREQ = 10

cfg.MODEL.EMBED_INPUT = [704, 134, 702, 12, 220, 3, 219]
cfg.MODEL.EMBED_DIM = [5, 3, 5, 2, 4, 1, 4]
cfg.MODEL.NUM_DIM = 400

cfg.TRAIN.CATE_INDEX = 0

In [None]:
mean_perfs = []
for lr in [0.001, 0.002, 0.005]:
    cfg.TRAIN.LR = lr
    perfs = []
    for model_domain in ["books", "dvd", "elec", "kitchen"]:
        for data_domain in ["books", "dvd", "elec", "kitchen"]:
            if data_domain != model_domain:
                torch.manual_seed(0)
                np.random.seed(0)

                # Load dataset
                source_train, source_train_label, source_test, source_test_label = load_dataset("../data/", 
                                                                                        task, model_domain, data_type, dim)
                target_train, target_train_label, target_test, target_test_label = load_dataset("../data/", 
                                                                                        task, data_domain, data_type, dim)

                # Split train valid data
                source_train, source_valid, source_train_label, source_valid_label = train_test_split(
                    source_train, source_train_label, test_size=0.25, shuffle=True, random_state=0)

                source_index, target_index = reduce_dataset(source_train, target_train)

                cfg.TRAIN.SOURCE_INPUT = source_train[source_index]
                cfg.TRAIN.SOURCE_OUTPUT = source_train_label[source_index]
                cfg.TRAIN.TARGET_INPUT = target_train[target_index]

                model = ToAlign(cfg)
                model.train()

                # Source prediction
                pred = model.predict(source_test)

                perf = performance_acc(pred, source_test_label)
                print("Source Prediction accuracy", model_domain, data_domain, perf, flush=True)
            
                perfs.append(perf)
    mean_perfs.append(np.array(perfs).mean())

In [None]:
half_lr = [("books", "elec"), ("books", "kitchen"), ("dvd", "elec"), ("dvd", "kitchen"), ("kitchen", "books"), ("kitchen", "dvd"), ("kitchen", "elec")]

In [None]:
cfg.TRAIN.LR = 0.002
for seed in range(10):
    for model_domain in ["books", "dvd", "elec", "kitchen"]:
        for data_domain in ["books", "dvd", "elec", "kitchen"]:
            if data_domain != model_domain:
                if (model_domain, data_domain) in half_lr:
                    cfg.TRAIN.LR = 0.001

                torch.manual_seed(seed)
                np.random.seed(seed)

                # Load dataset
                source_train, source_train_label, source_test, source_test_label = load_dataset("../data/", 
                                                                                        task, model_domain, data_type, dim)
                target_train, target_train_label, target_test, target_test_label = load_dataset("../data/", 
                                                                                        task, data_domain, data_type, dim)

                source_index, target_index = reduce_dataset(source_train, target_train)

                cfg.TRAIN.SOURCE_INPUT = source_train[source_index]
                cfg.TRAIN.SOURCE_OUTPUT = source_train_label[source_index]
                cfg.TRAIN.TARGET_INPUT = target_train[target_index]

                model = ToAlign(cfg)
                model.train()

                # Source prediction
                pred = model.predict(source_test)

                perf = performance_logloss(pred, source_test_label)
                model_log("../logs/logloss/", task, model_domain, "nn", dim, version, 
                         "{};source_{}: {}".format(model_type, data_domain, perf))
                print("Source Prediction logloss", model_domain, data_domain, perf, flush=True)

                perf = performance_acc(pred, source_test_label)
                model_log("../logs/acc/", task, model_domain, "nn", dim, version, 
                         "{};source_{}: {}".format(model_type, data_domain, perf))
                print("Source Prediction accuracy", model_domain, data_domain, perf, flush=True)
                
                # Traget prediction
                pred = model.predict(target_test)

                perf = performance_logloss(pred, target_test_label)
                model_log("../logs/logloss/", task, model_domain, "nn", dim, version, 
                         "{};target_{}: {}".format(model_type, data_domain, perf))
                print("Target Prediction logloss", model_domain, data_domain, perf, flush=True)

                perf = performance_acc(pred, target_test_label)
                model_log("../logs/acc/", task, model_domain, "nn", dim, version, 
                         "{};target_{}: {}".format(model_type, data_domain, perf))
                print("Target Prediction accuracy", model_domain, data_domain, perf, flush=True)