In [None]:
import os
import sys
sys.path.append("../src/")
sys.path.append("../model/")

In [None]:
import numpy as np
import torch


from io_utils import load_dataset, load_model, model_log
from metric import performance_logloss, performance_pr_auc

from train_utils import sample_validation_data

from sklearn.model_selection import train_test_split

In [None]:
task = "kaggle"
model_type = "nn"

num_dim = 43
period = [0, 1, 2]
cate_index = 8

epoch = 25
batch_size = 512
n_label = 200

version = "exp_finetune"


source_version = "uni"
data_type = "uni"
source_domain = "source"
target_domain = "target"


for seed in range(10):
    for p in period:
        torch.manual_seed(seed)
        np.random.seed(seed)

        print("Period:", p, seed, flush=True)
        
        # load source and target data
        source_train, source_train_label, source_test, source_test_label = load_dataset("../data/", 
                                                                                        task, source_domain, data_type, 0)
        target_train, target_train_label, target_test, target_test_label = load_dataset("../data/", 
                                                                                        task, target_domain, data_type, p)


        # get source reference prediction
        model = load_model("../model/", task, source_domain, model_type, 0, source_version)


        # sample target supervised examples
        target_train_index, sample_label = sample_validation_data(task, target_train_label, 
                                                                  ratio=1.0, number_examples=n_label)
        target_sample = target_train[target_train_index]
        target_sample_label = target_train_label[target_train_index]

        
        # Train the model with the best learning rate
        train, valid, train_label, valid_label = train_test_split(target_sample, target_sample_label, test_size=0.25, 
                                                                  shuffle=True, random_state=0)
        
        for param in model.model.input_layer.parameters():
            param.requires_grad = False

        model.fit(train, train_label[:, 1], 
                  train, 
                  valid, valid_label[:, 1], 
                  epoch=epoch, batch_size=batch_size, lr=0.001, beta=0, 
                  early_stop=False, verbose=False)
        

        pred = model.predict(target_test)

        perf = performance_logloss(pred, target_test_label[:, 1])
        model_log("../logs/logloss/", task, source_domain, model_type, p, source_version, 
                 "{}: {}".format(version, perf))
        print("Target Prediction logloss", perf, flush=True)


        perf = performance_pr_auc(pred, target_test_label[:, 1])
        model_log("../logs/pr_auc/", task, source_domain, model_type, p, source_version, 
                 "{}: {}".format(version, perf))
        print("Target Prediction pr_auc", perf, flush=True)