In [1]:
import os, sys
import numpy as np
import torch
import json
from scipy.special import softmax

sys.path.append("..")
from singleVis.SingleVisualizationModel import VisModel
from singleVis.data import ActiveLearningDataProvider
from singleVis.projector import TimeVisProjector,tfDVIProjector
from singleVis.trajectory_manager import Recommender

In [2]:
# read results
import pandas as pd
eval_path = "/home/xianglin/projects/DVI_data/active_learning/random/resnet18/feedback.xlsx"
col = np.array(["task", "dataset", "method", "rate", "tolerance", "iter", "eval"])
df = pd.read_excel(eval_path, index_col=0, dtype={"task":str, "dataset":str, "method":str, "rate":int, "tolerance":float, "iter":int, "eval":float})

In [5]:
df[(df.task == "RA_M")&(df.dataset=="cifar10")&(df.method=="TimeVis")&(df.rate==30)&(df.iter==3)]

In [2]:
DATASET = "MNIST"
RATE = "10"
VIS_METHOD = "TimeVis"

In [3]:
CONTENT_PATH = "/home/xianglin/projects/DVI_data/active_learning/random/resnet18/{}/{}".format(DATASET, RATE)
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [4]:
CLASSES = config["CLASSES"]
GPU_ID = config["GPU"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]
# embedding trajectories
TOTOAL_EPOCH = (EPOCH_END-EPOCH_START)//EPOCH_PERIOD + 1

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
VIS_MODEL_NAME = VISUALIZATION_PARAMETER["VIS_MODEL_NAME"]

# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))
data_provider = ActiveLearningDataProvider(CONTENT_PATH, net, EPOCH_START,device=DEVICE, classes=CLASSES, iteration_name="Epoch")

if VIS_METHOD == "DVI":
    # Define Projector
    flag = "_temporal_id_withoutB"
    projector = tfDVIProjector(CONTENT_PATH, flag=flag)
elif VIS_METHOD == "TimeVis":
    model = VisModel(ENCODER_DIMS, DECODER_DIMS)
    projector = TimeVisProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, device=DEVICE)
else:
    raise NotImplementedError

In [None]:
# uncertainty
samples = data_provider.train_representation_all(EPOCH_END)
pred = data_provider.get_pred(EPOCH_END, samples)
confidence = np.amax(softmax(pred, axis=1), axis=1)
uncertainty = 1-confidence

In [None]:
samples = np.zeros((TOTOAL_EPOCH, LEN, 512))
for i in range(EPOCH_START, EPOCH_END, EPOCH_PERIOD):
    e = (i-EPOCH_START)//EPOCH_PERIOD
    samples[e] = data_provider.train_representation_all(i)

In [None]:
embeddings_2d = np.zeros((TOTOAL_EPOCH, LEN, 2))
for i in range(EPOCH_START, EPOCH_END, EPOCH_PERIOD):
    e = (i-EPOCH_START)//EPOCH_PERIOD
    embeddings_2d[e] = projector.batch_project(i, samples[e])
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])

In [None]:
path = os.path.join(CONTENT_PATH, "Model", "{}_trajectory_embeddings.npy".format(VIS_METHOD))
np.save(path,embeddings_2d)

In [None]:
path = os.path.join(CONTENT_PATH, "Model", "{}_trajectory_embeddings.npy".format(VIS_METHOD))
embeddings_2d = np.load(path)

In [None]:
samples.shape,uncertainty.shape, embeddings_2d.shape

In [5]:
labels = data_provider.train_labels_all(EPOCH_END)
# remove label data
lb_idxs = data_provider.get_labeled_idx(EPOCH_END)
ulb_idxs = data_provider.get_unlabeled_idx(LEN, lb_idxs)

In [None]:
ulb_uncertainty = uncertainty[ulb_idxs]
ulb_trajectory = embeddings_2d[ulb_idxs]

In [None]:
import time
import pickle
t_start = time.time()
tm = Recommender(ulb_uncertainty, ulb_trajectory, cls_num=30, period=15, metric="a")
tm.clustered()
t_end = time.time()
with open(os.path.join(CONTENT_PATH,  '{}_sample_recommender.pkl'.format(VIS_METHOD)), 'wb') as f:
    pickle.dump(tm, f, pickle.HIGHEST_PROTOCOL)

In [6]:
import pickle
with open(os.path.join(CONTENT_PATH,  '{}_sample_recommender.pkl'.format(VIS_METHOD)), 'rb') as f:
    tm = pickle.load(f)

In [7]:
def add_noise(rate, acc_idxs, rej_idxs):
    if rate == 0:
        return acc_idxs, rej_idxs
    acc_noise = np.random.choice(len(acc_idxs), size=int(len(acc_idxs)*rate))
    acc_noise = acc_idxs[acc_noise]
    new_acc = np.setdiff1d(acc_idxs, acc_noise)

    rej_noise = np.random.choice(len(rej_idxs), size=int(len(rej_idxs)*rate))
    rej_noise = rej_idxs[rej_noise]
    new_rej = np.setdiff1d(rej_idxs, rej_noise)

    new_acc = np.concatenate((new_acc, rej_noise), axis=0)
    new_rej = np.concatenate((new_rej, acc_noise), axis=0)
    return new_acc, new_rej


def init_sampling(tm, method, round, budget, ulb_wrong):
    print("Feedback sampling initialization ({}):".format(method))
    rate = list()
    for _ in range(round):
        correct = np.array([]).astype(np.int32)
        wrong = np.array([]).astype(np.int32)
        
        suggest_idxs, _ = tm.sample_batch_init(correct, wrong, budget)
        suggest_idxs = ulb_idxs[suggest_idxs]
        correct = np.intersect1d(suggest_idxs, ulb_wrong)
        rate.append(len(correct)/budget)
    print("Init success Rate:\t{:.4f}".format(sum(rate)/len(rate)))
    return sum(rate)/len(rate)


def feedback_sampling(tm, method, round, budget, ulb_wrong, noise_rate=0):
    print("Feedback sampling ({}) with noise rate {}:".format(method, noise_rate))
    rate = np.zeros(round)
    correct = np.array([]).astype(np.int32)
    wrong = np.array([]).astype(np.int32)
    map_ulb =ulb_idxs.tolist()

    map_acc_idxs = np.array([map_ulb.index(i) for i in correct]).astype(np.int32)
    map_rej_idxs = np.array([map_ulb.index(i) for i in wrong]).astype(np.int32)
    suggest_idxs, _ = tm.sample_batch_init(map_acc_idxs, map_rej_idxs, budget)
    suggest_idxs = ulb_idxs[suggest_idxs]
    correct = np.intersect1d(suggest_idxs, ulb_wrong)
    wrong = np.setdiff1d(suggest_idxs, correct)
    rate[0] = len(correct)/budget
    # inject noise
    correct, wrong = add_noise(noise_rate, correct, wrong)
    for r in range(1, round):
        map_acc_idxs = np.array([map_ulb.index(i) for i in correct]).astype(np.int32)
        map_rej_idxs = np.array([map_ulb.index(i) for i in wrong]).astype(np.int32)
        suggest_idxs,_,coef_ = tm.sample_batch(map_acc_idxs, map_rej_idxs, budget, True)
        suggest_idxs = ulb_idxs[suggest_idxs]

        c = np.intersect1d(np.intersect1d(suggest_idxs, ulb_idxs), ulb_wrong)
        w = np.setdiff1d(suggest_idxs, c)
        rate[r] = len(c) / budget

        # inject noise
        c, w = add_noise(noise_rate, c, w)
        correct = np.concatenate((correct, c), axis=0)
        wrong = np.concatenate((wrong, w), axis=0)
    print("Success Rate:\t{:.4f}".format(sum(rate)/len(rate)))
    ac_rate = np.array([rate[:i].mean() for i in range(1, len(rate)+1)])
    print("Feature Importance: {}".format(coef_))
    return ac_rate

In [8]:
# meta info
lb_idxs = data_provider.get_labeled_idx(EPOCH_END)
ulb_idxs = data_provider.get_unlabeled_idx(LEN, lb_idxs)

data = data_provider.train_representation_all(EPOCH_END)
labels = data_provider.train_labels_all(EPOCH_END)
pred = data_provider.get_pred(EPOCH_END, data).argmax(1)
wrong_pred_idx = np.argwhere(pred!=labels).squeeze()
ulb_wrong = np.intersect1d(wrong_pred_idx, ulb_idxs)

In [9]:
# get hyperparameters
BUDGET = 50
TOLERANCE = 0.1
ROUND = 10
INIT_ROUND = 10000

In [10]:
# random init
print("Random sampling init")
random_rate = list()
pool = np.array(ulb_idxs)
for _ in range(INIT_ROUND):
    s_idxs = np.random.choice(pool,size=BUDGET,replace=False)
    random_rate.append(len(np.intersect1d(s_idxs, ulb_wrong))/BUDGET)
print("Success Rate:\t{:.4f}".format(sum(random_rate)/len(random_rate)))

In [11]:
# init
init_sampling(tm=tm, method=VIS_METHOD, round=INIT_ROUND, budget=BUDGET, ulb_wrong=ulb_wrong)

In [15]:
# random sampling
print("Random sampling feedback:")
random_rate = list()
pool = np.array(ulb_idxs)
for _ in range(ROUND):
    s_idxs = np.random.choice(pool,size=BUDGET,replace=False)
    random_rate.append(len(np.intersect1d(s_idxs, ulb_wrong))/BUDGET)
    pool = np.setdiff1d(pool, s_idxs)
print("Success Rate:\t{:.4f}".format(sum(random_rate)/len(random_rate)))

In [12]:
# sampling
feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET, ulb_wrong=ulb_wrong, noise_rate=0.0)

In [13]:
# noise tolerance
feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET, ulb_wrong=ulb_wrong, noise_rate=.05)