In [None]:
import os
import sys
sys.path.append("../src/")
sys.path.append("../model/")

In [None]:
import numpy as np
import torch

from io_utils import load_dataset, model_log
from metric import performance_logloss, performance_pr_auc

from labelshift_correction import build_pivot_dataset
from train_utils import sample_validation_data, extend_dataset

from latent_multiDA import latent_multiDA_model, embed_nn

### Setting

In [None]:
task = "kaggle"
model_type = "nn" 

num_dim = 43
period = [0, 1, 2]
cate_index = 8
embedding_input = [3, 131, 4, 483, 103, 5, 106, 4] 
embedding_dim = [1, 3, 1, 4, 3, 1, 3, 1] 

test_flag = False

n_label = 200

train_mode = "sup"
# train_mode = "unsup"

version = "exp_latent_multiDA_" + train_mode

source_version = "uni"
data_type = "uni"
source_domain = "source"
target_domain = "target"
njobs = 20

### Adaptation

In [None]:
for seed in range(10):
    for p in period:
        torch.manual_seed(seed)
        np.random.seed(seed)

        print("Period:", p, seed, flush=True)
        
        # load source and target data
        source_train, source_train_label, source_test, source_test_label = load_dataset("../data/", 
                                                                                        task, source_domain, data_type, 0)
        target_train, target_train_label, target_test, target_test_label = load_dataset("../data/", 
                                                                                        task, target_domain, data_type, p)
        
        
        # set time order to label first index
        source_train_label[:, 0] = np.arange(source_train_label.shape[0])
        target_train_label[:, 0] = np.arange(target_train_label.shape[0])

        
        # sample target supervised examples
        target_train_index, sample_label = sample_validation_data(task, target_train_label, 
                                                                  ratio=1.0, number_examples=n_label)
        target_sample = target_train[target_train_index]
        target_sample_label = target_train_label[target_train_index]


        source_factor = (source_train_label[:, 1]==0).sum() / source_train_label[:, 1].sum()
        target_factor = (target_train_label[:, 1]==0).sum() / target_train_label[:, 1].sum()

        # adjusting source train dataset
        source_train, source_train_label, source_index = build_pivot_dataset(
            source_train, source_train_label, target_factor, source_factor)

        
        kt = 1
        ks = 2
        
        source_index, target_index = extend_dataset(source_train, target_train)
        xs, ys = source_train[source_index], source_train_label[source_index, 1]
        xt = target_train[target_index]

        
        source_perfs = []
        target_perfs_logloss = []
        target_perfs_prauc = []
        lmbda = 0.01
        for lr in [0.001, 0.003, 0.005, 0.007, 0.01]:
        
            embed = embed_nn(embedding_input, embedding_dim, num_dim)
            latent_model = latent_multiDA_model(embed, cate_index, torch.device("cuda"))

            if train_mode == "unsup":
                latent_model.fit(xs, ys, xt, 
                                xtt=None, ytt=None,
                                epoch=25, batch_size=1024, lr=lr, tol=0, verbose=False)
            elif train_mode == "sup":
                latent_model.fit(xs, ys, xt, 
                                xtt=target_sample, ytt=target_sample_label[:,1], lmbda=lmbda,
                                epoch=25, batch_size=1024, lr=lr, tol=0, verbose=False)
            else:
                print("Unexpected parameters", flush=True)
                raise 

                
            pred = latent_model.predict(source_test)
            
            perf = performance_pr_auc(pred, source_test_label[:, 1])
            print("Source Prediction pr_auc:", perf, flush=True)
            source_perfs.append(perf)
            
            if train_mode == "sup":
                pred = latent_model.predict(target_sample)
                perf = performance_pr_auc(pred, target_sample_label[:,1])
                source_perfs[-1] = source_perfs[-1] + perf * lmbda
            

            # target prediction
            pred = latent_model.predict(target_test)

            perf = performance_logloss(pred, target_test_label[:, 1])
            target_perfs_logloss.append(perf)
            print("Target Prediction logloss:", perf, flush=True)

            perf = performance_pr_auc(pred, target_test_label[:, 1])
            target_perfs_prauc.append(perf)
            print("Target Prediction pr_auc:", perf, flush=True)

        
        print("Source performances:", source_perfs)
        print("Target performances Logloss:", target_perfs_logloss)
        print("Target performances pr_auc:", target_perfs_prauc)
        opt_ind = np.argmax(source_perfs)
        print("Target performances :", target_perfs_prauc[opt_ind])
        if not test_flag:
            
            model_log("../logs/logloss/", task, source_domain, model_type, p, source_version, 
                     "{}: {}".format(version, target_perfs_logloss[opt_ind]))
            model_log("../logs/pr_auc/", task, source_domain, model_type, p, source_version, 
                     "{}: {}".format(version, target_perfs_prauc[opt_ind]))