In [1]:
import scipy.stats as stats
from scipy.special import expit
from random import seed
from random import randrange
import pandas as pd
import scipy
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
import argparse 
import torch.nn as nn 
import random
from torch.utils.data import Dataset, TensorDataset, DataLoader
from skmultilearn.dataset import load_dataset
from sklearn.naive_bayes import MultinomialNB
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import hamming_loss, roc_auc_score
from sklearn.linear_model import LogisticRegression
import time 
from tqdm.notebook import tqdm


nrep = 5
dataset = 'real_focus'
hidd = 2
rand_test = 0
C = 0.05



class Net(torch.nn.Module):
    def __init__(self, input_size, num_classes, hidden = 128):
        super(Net, self).__init__()
        self.norm0 = torch.nn.BatchNorm1d(input_size)
        self.linear1 = torch.nn.Linear(input_size, hidden)
        self.linear2 = torch.nn.Linear(hidden, num_classes)
        self.drop_layer = nn.Dropout(p=0.2)

    def emb(self, x):
        emb = self.linear1(x)
        return emb

    def forward(self, x):

        hidd1 = self.linear1(x)
        out = self.linear2(hidd1)
        out = self.drop_layer(out)
        return out



class SelNet(torch.nn.Module):
    def __init__(self, input_size, num_classes, hidden = 128):
        super(SelNet, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, hidden)
        self.linear2 = torch.nn.Linear(hidden, num_classes)

    def emb(self, x):
        emb = self.linear1(x)
        return emb

    def forward(self, x):
        hidd1 = F.relu(self.linear1(x))
        out = self.linear2(hidd1)
        return out

def find_instance(w):
    # input worker id 
    # return the instances worker labeled
    return np.where(human[w][0].any(axis=1))[0]

def find_worker(ins):
    # input instance id 
    # return which workers labeled it
    ws = []
    for w in range(18):
        if ins in  np.where(human[w][0].any(axis=1))[0]:
            ws.append(w)
    return np.array(ws)



def find_unlabeled(human):
    inss = []
    for ins in range(700):
        ws = []
        for w in range(18):
            if ins in  np.where(human[w][0].any(axis=1))[0]:
                ws.append(w)
        if len(ws) == 0:
            inss.append(ins)
    return np.array(inss)



@ torch.no_grad()
def test_multi(model, X_test, y_test):
    set_seed(999)
    model.eval()
    model.cpu()
    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)
    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat.numpy()]
    total_reward = labels.reshape(-1).sum()
    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))
    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())
    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    model.train()
    return total_reward, ham_loss, auc


@ torch.no_grad()
def test_multi_human(model, X_test, y_test, human, selector):
    set_seed(999)
    model.cpu()
    human.cpu()
    selector.cpu()

    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)

    pred = out[range(y_test.shape[0]),treat]
    htreat = np.argmax(human(X_test.float()).detach().cpu().numpy(),1)
    htreat = torch.from_numpy(htreat)
    if_alg = selector(X_test.float())
    if_alg = torch.sigmoid(if_alg)[range(y_test.shape[0]),[0]*y_test.shape[0]]
    treat = torch.where(if_alg > 0.5, treat, htreat)
    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat.numpy()]
    total_reward = labels.reshape(-1).sum()

    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))

    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())

    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    return total_reward, ham_loss, auc

@ torch.no_grad()
def test_real_human(model, X_test, y_test, human, selector, C=0):
    set_seed(999)
    model.eval()
    model.cpu()
    selector.cpu()

    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)
    alg_treat = treat

    pred = out[range(y_test.shape[0]),treat]
    pred = out[range(y_test.shape[0]),treat]

    treatment = []
    for ins in indices_test:
        #ws = np.array([9, 3, 11])
        ws = np.array([0, 1, 2,3,4])
        # randomly sample a worker
        w = np.random.choice(ws)
        full = human[w][ins]
        treatment.append(randargmax(full))

    htreat = np.array(treatment)

    htreat = torch.from_numpy(htreat)
    if_alg = selector(X_test.float())
    if_alg = torch.sigmoid(if_alg)[range(y_test.shape[0]),[0]*y_test.shape[0]]
    print('fraction:')
    print((if_alg>0.5).float().mean())
    treat = torch.where(if_alg > 0.5, treat, htreat)

    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat.numpy()]
    labels = labels.float()
    this_reward = labels.reshape(-1)
    this_reward = torch.where(if_alg > 0.5, labels, labels-C)
    total_reward = this_reward.reshape(-1).sum()

    print('Pure Alg Reward:')
    labels = y_test[range(y_test.shape[0]),alg_treat.numpy()]
    alg_reward = labels.reshape(-1).sum()
    print(alg_reward)

    print('Pure Human Reward:')
    labels = y_test[range(y_test.shape[0]),htreat.numpy()]
    labels = labels.float()
    labels = labels - C
    human_reward = labels.reshape(-1).sum()
    print(human_reward)

    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))

    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())

    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    model.train()
    return total_reward, ham_loss, auc, human_reward

@ torch.no_grad()
def test_real_human_person(model, X_test, y_test, human, selector, C = 0):
    set_seed(999)
    model.eval()
    model.cpu()
    selector.cpu()

    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)

    pred = out[range(y_test.shape[0]),treat]
    pred = out[range(y_test.shape[0]),treat]


    if_alg = selector(X_test.float())
    if_alg = F.softmax(if_alg, 1)

    print(if_alg)
    _, selection = torch.max(if_alg, 1)

    ma = {0:0,1:1,2:2,3:3,4:4}
    final_treat = []

    treatment = []
    treat = treat.numpy()
    human_cost = 0 
    count1 = 0
    count2 = 0
    count3 = 0
    count4 = 0
    count5 = 0
    for ind, ins in enumerate(indices_test):
        i = selection[ind]
        if i == 0:
            fulllabel = human[0][ins]
            treatment.append(randargmax(fulllabel))
            human_cost += C
            count1 += 1
        if i == 1:
            fulllabel = human[1][ins]
            treatment.append(randargmax(fulllabel))
            human_cost += C
            count2 += 1
        if i == 2:
            fulllabel = human[2][ins]
            treatment.append(randargmax(fulllabel))  
            human_cost += C                 
            count3 += 1
        if i == 3:
            fulllabel = human[3][ins]
            treatment.append(randargmax(fulllabel))  
            human_cost += C                 
            count4 += 1
        if i == 4:
            fulllabel = human[4][ins]
            treatment.append(randargmax(fulllabel))  
            human_cost += C                 
            count5 += 1
        if i == 5:
            treatment.append(treat[ind])                  
    print(count1, count2, count3, count4, count5)
    treat = np.array(treatment)
    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat]
    total_reward = labels.reshape(-1).sum() - human_cost

    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))

    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())

    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    model.train()
    return total_reward, ham_loss, auc



def train_ips(trainloader, input_dim, output_dim, lamb = 0):
    model = Net(input_dim, output_dim, hidden = hidd)
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    #optimizer = torch.optim.SGD(model.parameters(), lr = 1e-1, momentum=0.9, weight_decay = 1e-4)
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-4)
    num_epochs = 5000
    min_loss = 1e3
    for epoch in tqdm(range(num_epochs)):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob) in enumerate(trainloader):
            # forward
            out = model(inputx.float())
            out = F.softmax(out, 1)
            out = out[range(out.size(0)),treatment]
            logp = logging_prob[range(out.size(0)),treatment]
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * out / logp
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))
    return model

def train_ips_2s(trainloader, input_dim, output_dim, model, lamb = 0, C = 0):
    model = model
    #model = Net(input_dim, output_dim, hidden = hidd)
    selector = SelNet(input_dim, 1, hidden = 16)
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    #optimizer = torch.optim.SGD(selector.parameters(), lr = 1e-2, momentum=0.9, weight_decay = 1e-4)
    optimizer = torch.optim.Adam(selector.parameters(), lr = 1e-3, weight_decay = 1e-4)
    num_epochs = 2000
    min_loss = 1e3
    for epoch in tqdm(range(num_epochs)):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob) in enumerate(trainloader):
            # forward
            with torch.no_grad():
                out = model(inputx.float())
                out = F.softmax(out, 1)
                out = out[range(out.size(0)),treatment]
            # prob that selcts alg
            sel_prob = selector(inputx.float())
            sel_prob = torch.sigmoid(sel_prob).reshape(-1)
            #print('prob')
            #print(sel_prob)
            logp = logging_prob[range(out.size(0)),treatment]
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * (sel_prob * out / logp) - (1-sel_prob) * (reward - lamb - C)
            '''
            print('sel_prob')
            print(sel_prob)
            print('alg_reward')
            print(reward *  out / logp)
            print('human reward')
            print(reward)
            '''
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))
    return model, selector


def train_ips_hai(trainloader, model, selector, input_dim, output_dim, lamb = 0, C = 0):
    model = Net(input_dim, output_dim, hidden = hidd)
    #selector = SelNet(input_dim, 1, hidden = 16)
    #model = model
    selector = selector
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.Adam(list(model.parameters())+list(selector.parameters()), lr = 1e-3, weight_decay = 1e-4)
    #optimizer = torch.optim.SGD(list(model.parameters())+list(selector.parameters()), lr = 1e-4, momentum = 0.9, weight_decay = 1e-4)
    #optimizer = torch.optim.SGD(list(model.parameters())+list(selector.parameters()), lr = 1e-3, momentum=0.9, weight_decay = 1e-4)
    #model_optimizer = torch.optim.SGD(list(model.parameters()), lr = 0.01, momentum=0.9, weight_decay = 1e-4)
    #sel_optimizer = torch.optim.Adam(list(selector.parameters()), lr = 0.01, weight_decay = 1e-4)
    num_epochs = 5000
    min_loss = 1e3
    for epoch in tqdm(range(num_epochs)):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob) in enumerate(trainloader):
            # forward
            out = model(inputx.float())
            out = F.softmax(out, 1)
            out = out[range(out.size(0)),treatment]
            # prob that selcts alg
            sel_prob = selector(inputx.float())
            sel_prob = torch.sigmoid(sel_prob).reshape(-1)
            #print('prob')
            #print(sel_prob)
            logp = logging_prob[range(out.size(0)),treatment]
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * (sel_prob * out / logp) - (1-sel_prob) * (reward - lamb - C)
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))
    return model, selector


def train_ips_hai_person(trainloader, model, input_dim, output_dim, lamb = 0, C = 0):
    #model = Net(input_dim, output_dim, hidden = hidd)
    model = model
    selector = SelNet(input_dim, 6, hidden = 16)
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.Adam(list(model.parameters())+list(selector.parameters()), lr = 1e-3,weight_decay=1e-4)
    #optimizer = torch.optim.SGD(list(model.parameters())+list(selector.parameters()), lr = 1e-1, momentum=0.9, weight_decay = 1e-4)
    #temp_optimizer = torch.optim.SGD([temperature], lr=0.001, momentum=0.9, weight_decay = 1e-4)
    num_epochs = 5000
    min_loss = 1e3
    for epoch in tqdm(range(num_epochs)):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob, prop, wids) in enumerate(trainloader):            # forward
            out = model(inputx.float())
            out = F.softmax(out, 1)
            out = out[range(out.size(0)),treatment]
            # prob that selcts alg
            sel_prob = selector(inputx.float())
            sel_prob = F.softmax(sel_prob, 1)
            
            logp = logging_prob
            #logp = logging_prob[range(out.size(0)),treatment]
            #logp = torch.sum(prop, 1) * 1 / 5
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * (sel_prob[:,5] * out / logp) - (reward - lamb - C) * 5 * sel_prob[:,wids] 
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))   
    return model, selector

def randargmax(b,**kw):
  """ a random tie-breaking argmax"""
  return np.argmax(np.random.random(b.shape) * (b==b.max()), **kw)

def set_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

opt_result = []
ips_result = []
ips_loss = []
ips_time = []
ips_auc = []
ips2s_result = []
ips2s_loss = []
ips2s_time = []
ips2s_auc = []
ipsj_result = []
ipsj_loss = []
ipsj_time = []
ipsj_auc = []
ipsjp_result = []
ipsjp_loss = []
ipsjp_time = []
ipsjp_auc = []
human_result = []

lamb = 0.
#C = 0
dataset0 = dataset

for r in range(nrep):

    set_seed(r)

    if dataset0 == 'real_focus':
        data = pd.read_csv('/home/ruijiang/E/utaustin/project/cost_efficient_labeling/Quick-and-Dirty/data/real/data.csv')
        annotation = pd.read_csv('/home/ruijiang/E/utaustin/project/cost_efficient_labeling/Quick-and-Dirty/data/real/focus.csv')
        data = data.iloc[:,4:]
        X = data.values
        y = annotation['gt'].values
        a = np.zeros((X.shape[0],2))
        a[range(a.shape[0]),y] = 1
        y = a
        human = dict()
        for w in range(5):
            temp = np.array(annotation[str(w)])
            anno = np.zeros((X.shape[0],2))
            anno[range(anno.shape[0]),temp] = 1
            human[w] = anno
        X, X_test, y, y_test, indices_train, indices_test = train_test_split(X, y, range(X.shape[0]), test_size=0.3, random_state =  r)
        print(X.shape)
        print(X_test.shape)
        y_all = y
        X_test = torch.from_numpy(X_test)
        ws = range(5)
    # human generate the logging policy
    np.random.seed(r + 81)
    treatment = []
    wids = []
    for ins in indices_train:
        # randomly sample a worker
        w = np.random.choice(ws)
        wids.append(w)
        full = human[w][ins]
        treatment.append(randargmax(full))
    treatment = np.array(treatment)
    wids = np.array(wids)
    #m = {3:0,9:1,11:2}
    m = {0:0,1:1,2:2,3:3,4:4}
    wids_map = np.array([m[i] for i in wids])
    # record instances recorded by )
    wids_oh = np.zeros((wids.size, wids_map.max()+1))
    wids_oh[np.arange(wids.size),wids_map] = 1
    y = y_all[range(y_all.shape[0]), treatment]

    print(set(treatment))
    y = torch.from_numpy(y)

    # for ips baseline:
    # learn logging policy 
    Xw = np.append(X,wids_oh,axis=1)
    #log_est = LogisticRegression(random_state=0).fit(Xw, treatment)
    #log_est0 = LogisticRegression(random_state=0).fit(X, treatment)
    log_est = RandomForestClassifier(random_state=0).fit(Xw, treatment)
    #log_est = MLPClassifier(random_state=0, max_iter=10000).fit(Xw, treatment)
    #log_est = MLPClassifier(random_state=0,max_iter = 10000).fit(X, treatment)
    log_est0 = RandomForestClassifier(random_state=0).fit(X, treatment)
    #log_est0 = MLPClassifier(random_state=0, max_iter=10000).fit(X, treatment)


    logging_prob = log_est0.predict_proba(X)
    logging_prob = torch.from_numpy(logging_prob)

    X = torch.from_numpy(X)
    treatment = torch.from_numpy(treatment)

    print('Current Rep: ', r)
    print('Dataset : ', dataset)
    print('Number of samples: ', X.shape[0])
    #model = generate_logging_policy(X, y, frac = 0.05)

    dataset = TensorDataset(X, treatment, y, logging_prob)
    trainloader = DataLoader(dataset, batch_size = 8, shuffle = True)
    
    # Naive IPS
    start = time.time()
    ips = train_ips(trainloader, X.shape[1], y_all.shape[1], lamb = lamb)
    end = time.time()
    rev, _, _ = test_multi(ips, X_test, y_test)
    print('IPS = %.2f' % rev)
    ips_result.append(rev)
    ips_fix = ips
    
    start = time.time()
    ips, selector = train_ips_2s(trainloader, X.shape[1], y_all.shape[1], ips, lamb = lamb, C = C)
    end = time.time()
    rev, _, _, hreward = test_real_human(ips, X_test, y_test, human, selector, C = C)
    print('TS Rev = %.2f' % rev)
    ips2s_result.append(rev)
    human_result.append(hreward)
    
    start = time.time()
    ips, selector = train_ips_hai(trainloader, ips, selector, X.shape[1], y_all.shape[1], lamb = lamb, C = C)
    end = time.time()
    rev, _, _, _ = test_real_human(ips, X_test, y_test, human, selector, C = C)
    print('JC Rev = %.2f' % rev)
    ipsj_result.append(rev)
    
    X = X.numpy()
    Xw = np.append(X,wids_oh,axis=1)
    logging_prob = log_est.predict_proba(Xw)
    logging_prob = torch.from_numpy(logging_prob)
    X = torch.from_numpy(X)
    w1 = np.zeros((wids.size, wids_map.max()+1))
    w1[:,0] = 1
    w1 = torch.from_numpy(w1)
    w2 = np.zeros((wids.size, wids_map.max()+1))
    w2[:,1] = 1
    w2 = torch.from_numpy(w2)
    w3 = np.zeros((wids.size, wids_map.max()+1))
    w3[:,2] = 1
    w3 = torch.from_numpy(w3)
    w4 = np.zeros((wids.size, wids_map.max()+1))
    w4[:,3] = 1
    w4 = torch.from_numpy(w4)
    w5 = np.zeros((wids.size, wids_map.max()+1))
    w5[:,4] = 1
    w5 = torch.from_numpy(w5)
    prop = torch.from_numpy(np.stack([log_est.predict_proba(np.append(X,w1,axis=1))[np.arange(X.shape[0]),treatment],log_est.predict_proba(np.append(X,w2,axis=1))[np.arange(X.shape[0]),treatment],
        log_est.predict_proba(np.append(X,w3,axis=1))[np.arange(X.shape[0]),treatment],log_est.predict_proba(np.append(X,w4,axis=1))[np.arange(X.shape[0]),treatment],log_est.predict_proba(np.append(X,w5,axis=1))[np.arange(X.shape[0]),treatment]],axis=1))
    logging_prob = prop[range(prop.shape[0]),wids_map]
    wids_map = torch.from_numpy(wids_map)
    dataset = TensorDataset(X, treatment, y, logging_prob, prop, wids_map)
    trainloader = DataLoader(dataset, batch_size = 8, shuffle = True)
    
    start = time.time()
    ips, selector = train_ips_hai_person(trainloader, ips, X.shape[1], y_all.shape[1], lamb = lamb, C = C)
    end = time.time()
    rev, _, _ = test_real_human_person(ips, X_test, y_test, human, selector, C = C)
    print('JCP Rev = %.2f' % rev)
    ipsjp_result.append(rev)


(700, 292)
(300, 292)
{0, 1}
Current Rep:  0
Dataset :  real_focus
Number of samples:  700


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.461158
Epoch[1000/5000], loss: -0.941453
Epoch[2000/5000], loss: -0.938224
Epoch[3000/5000], loss: -0.938767
Epoch[4000/5000], loss: -0.943260
test loss:  tensor(1.2155, dtype=torch.float64)
hamming loss: 0.23666666666666666
total reward:  tensor(229., dtype=torch.float64)
auc:  0.7339846252402306
IPS = 229.00


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch[0/2000], loss: -0.886355
Epoch[1000/2000], loss: -0.946903
fraction:
tensor(0.9967)
Pure Alg Reward:
tensor(229., dtype=torch.float64)
Pure Human Reward:
tensor(232.0000)
test loss:  tensor(1.2155, dtype=torch.float64)
hamming loss: 0.23666666666666666
total reward:  tensor(227.9500)
auc:  0.7339846252402306
TS Rev = 227.95


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.551704
Epoch[1000/5000], loss: -0.921517
Epoch[2000/5000], loss: -0.915742
Epoch[3000/5000], loss: -0.917560
Epoch[4000/5000], loss: -0.920954
fraction:
tensor(0.7733)
Pure Alg Reward:
tensor(223., dtype=torch.float64)
Pure Human Reward:
tensor(232.0000)
test loss:  tensor(1.5551, dtype=torch.float64)
hamming loss: 0.25666666666666665
total reward:  tensor(242.6000)
auc:  0.5117931395958302
JC Rev = 242.60


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.782421
Epoch[1000/5000], loss: -1.103883
Epoch[2000/5000], loss: -1.108832
Epoch[3000/5000], loss: -1.151796
Epoch[4000/5000], loss: -1.090226
tensor([[5.6720e-05, 1.3421e-04, 4.5004e-03, 7.1101e-05, 3.9290e-05, 9.9520e-01],
        [5.7208e-06, 1.5031e-06, 4.9327e-03, 7.4361e-06, 3.7388e-06, 9.9505e-01],
        [2.1834e-03, 6.7432e-02, 7.4024e-01, 2.6630e-03, 1.5790e-03, 1.8590e-01],
        ...,
        [5.8799e-04, 2.4273e-02, 5.4531e-02, 7.3930e-04, 4.0500e-04, 9.1946e-01],
        [3.0561e-06, 2.8328e-04, 6.4126e-07, 3.8062e-06, 2.0579e-06, 9.9971e-01],
        [2.5589e-06, 1.1448e-03, 9.9885e-01, 3.2642e-06, 1.8055e-06, 7.1602e-08]])
0 121 101 0 0
test loss:  tensor(1.4211, dtype=torch.float64)
hamming loss: 0.25666666666666665
total reward:  tensor(257.9000, dtype=torch.float64)
auc:  0.5033923475627512
JCP Rev = 257.90
(700, 292)
(300, 292)
{0, 1}
Current Rep:  1
Dataset :  <torch.utils.data.dataset.TensorDataset object at 0x7f3562184e48>
Number of samp

  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.591901
Epoch[1000/5000], loss: -0.939943
Epoch[2000/5000], loss: -0.952745
Epoch[3000/5000], loss: -0.938183
Epoch[4000/5000], loss: -0.942530
test loss:  tensor(1.3080, dtype=torch.float64)
hamming loss: 0.2633333333333333
total reward:  tensor(221., dtype=torch.float64)
auc:  0.6984304932735426
IPS = 221.00


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch[0/2000], loss: -0.891462
Epoch[1000/2000], loss: -0.939814
fraction:
tensor(0.9967)
Pure Alg Reward:
tensor(221., dtype=torch.float64)
Pure Human Reward:
tensor(238.0000)
test loss:  tensor(1.3080, dtype=torch.float64)
hamming loss: 0.2633333333333333
total reward:  tensor(219.9500)
auc:  0.6984304932735426
TS Rev = 219.95


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.562037
Epoch[1000/5000], loss: -0.921865
Epoch[2000/5000], loss: -0.916219
Epoch[3000/5000], loss: -0.912456
Epoch[4000/5000], loss: -0.916698
fraction:
tensor(0.7800)
Pure Alg Reward:
tensor(223., dtype=torch.float64)
Pure Human Reward:
tensor(238.0000)
test loss:  tensor(1.5594, dtype=torch.float64)
hamming loss: 0.25666666666666665
total reward:  tensor(240.7000)
auc:  0.572287577892959
JC Rev = 240.70


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.793020
Epoch[1000/5000], loss: -1.086794
Epoch[2000/5000], loss: -1.095784
Epoch[3000/5000], loss: -1.106419
Epoch[4000/5000], loss: -1.090041
tensor([[9.2826e-03, 7.9566e-01, 1.6314e-01, 2.5167e-03, 1.7991e-03, 2.7602e-02],
        [6.6391e-05, 9.9988e-01, 3.5852e-05, 8.1558e-06, 5.8976e-06, 1.2895e-06],
        [1.2744e-02, 7.3701e-01, 2.7218e-02, 3.1335e-03, 2.2348e-03, 2.1766e-01],
        ...,
        [2.3925e-08, 9.9974e-01, 2.6441e-04, 1.1675e-07, 7.5272e-08, 9.5672e-09],
        [1.7847e-06, 9.9565e-01, 1.7833e-03, 4.5969e-05, 3.1409e-05, 2.4829e-03],
        [1.4065e-04, 9.9840e-01, 6.4111e-05, 4.7672e-05, 3.2687e-05, 1.3191e-03]])
40 164 23 0 0
test loss:  tensor(1.3792, dtype=torch.float64)
hamming loss: 0.25666666666666665
total reward:  tensor(261.6500, dtype=torch.float64)
auc:  0.5741803040009318
JCP Rev = 261.65
(700, 292)
(300, 292)
{0, 1}
Current Rep:  2
Dataset :  <torch.utils.data.dataset.TensorDataset object at 0x7f356220e0f0>
Number of samp

  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.541917
Epoch[1000/5000], loss: -0.908897
Epoch[2000/5000], loss: -0.922262
Epoch[3000/5000], loss: -0.920890
Epoch[4000/5000], loss: -0.916292
test loss:  tensor(1.0704, dtype=torch.float64)
hamming loss: 0.225
total reward:  tensor(233., dtype=torch.float64)
auc:  0.793252156265855
IPS = 233.00


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch[0/2000], loss: -0.876266
Epoch[1000/2000], loss: -0.932317
fraction:
tensor(0.9733)
Pure Alg Reward:
tensor(233., dtype=torch.float64)
Pure Human Reward:
tensor(233.0000)
test loss:  tensor(1.0704, dtype=torch.float64)
hamming loss: 0.225
total reward:  tensor(228.6000)
auc:  0.793252156265855
TS Rev = 228.60


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.564255
Epoch[1000/5000], loss: -0.909232
Epoch[2000/5000], loss: -0.905639
Epoch[3000/5000], loss: -0.909308
Epoch[4000/5000], loss: -0.904738
fraction:
tensor(0.7000)
Pure Alg Reward:
tensor(219., dtype=torch.float64)
Pure Human Reward:
tensor(233.0000)
test loss:  tensor(1.6308, dtype=torch.float64)
hamming loss: 0.27
total reward:  tensor(243.5000)
auc:  0.5705789503354192
JC Rev = 243.50


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.790168
Epoch[1000/5000], loss: -1.037080
Epoch[2000/5000], loss: -1.057438
Epoch[3000/5000], loss: -1.044019
Epoch[4000/5000], loss: -1.032149
tensor([[5.4488e-06, 4.1485e-05, 6.5536e-06, 7.1009e-06, 3.7495e-06, 9.9994e-01],
        [4.3576e-06, 1.9291e-05, 5.2198e-06, 5.6466e-06, 3.0138e-06, 9.9996e-01],
        [7.6220e-06, 9.9993e-01, 9.1155e-06, 9.7657e-06, 5.5076e-06, 4.1986e-05],
        ...,
        [7.8750e-06, 3.8288e-05, 9.3710e-06, 1.0108e-05, 5.5243e-06, 9.9993e-01],
        [1.6043e-05, 9.9986e-01, 1.8938e-05, 2.0176e-05, 1.1872e-05, 6.9132e-05],
        [4.5677e-05, 2.0713e-03, 5.4717e-05, 5.8877e-05, 3.1723e-05, 9.9774e-01]])
0 198 0 0 0
test loss:  tensor(1.5187, dtype=torch.float64)
hamming loss: 0.27
total reward:  tensor(265.1000, dtype=torch.float64)
auc:  0.5885055527369074
JCP Rev = 265.10
(700, 292)
(300, 292)
{0, 1}
Current Rep:  3
Dataset :  <torch.utils.data.dataset.TensorDataset object at 0x7f35ec0b75f8>
Number of samples:  700


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.425340
Epoch[1000/5000], loss: -0.912237
Epoch[2000/5000], loss: -0.918009
Epoch[3000/5000], loss: -0.924076
Epoch[4000/5000], loss: -0.920213
test loss:  tensor(0.9093, dtype=torch.float64)
hamming loss: 0.20833333333333334
total reward:  tensor(238., dtype=torch.float64)
auc:  0.8037333333333333
IPS = 238.00


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch[0/2000], loss: -0.863911
Epoch[1000/2000], loss: -0.932407
fraction:
tensor(0.9767)
Pure Alg Reward:
tensor(238., dtype=torch.float64)
Pure Human Reward:
tensor(244.0000)
test loss:  tensor(0.9093, dtype=torch.float64)
hamming loss: 0.20833333333333334
total reward:  tensor(236.6500)
auc:  0.8037333333333333
TS Rev = 236.65


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.557725
Epoch[1000/5000], loss: -0.909039
Epoch[2000/5000], loss: -0.899954
Epoch[3000/5000], loss: -0.900003
Epoch[4000/5000], loss: -0.901699
fraction:
tensor(0.7367)
Pure Alg Reward:
tensor(225., dtype=torch.float64)
Pure Human Reward:
tensor(244.0000)
test loss:  tensor(1.5172, dtype=torch.float64)
hamming loss: 0.25
total reward:  tensor(248.0500)
auc:  0.5569481481481482
JC Rev = 248.05


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.763268
Epoch[1000/5000], loss: -1.096096
Epoch[2000/5000], loss: -1.092821
Epoch[3000/5000], loss: -1.094880
Epoch[4000/5000], loss: -1.115732
tensor([[8.2638e-08, 9.9986e-01, 1.3732e-04, 8.7380e-07, 5.3205e-07, 4.1287e-06],
        [1.2547e-06, 5.1805e-06, 2.6289e-07, 8.4712e-07, 5.4736e-07, 9.9999e-01],
        [4.5870e-03, 8.4162e-01, 1.1847e-01, 1.9096e-03, 1.3036e-03, 3.2105e-02],
        ...,
        [2.7102e-05, 4.0929e-01, 4.0099e-06, 6.2372e-05, 4.1341e-05, 5.9057e-01],
        [8.0811e-05, 5.1534e-03, 1.3998e-03, 1.3566e-04, 9.0400e-05, 9.9314e-01],
        [4.3291e-03, 4.1838e-01, 2.0535e-02, 1.8477e-03, 1.2356e-03, 5.5367e-01]])
34 76 120 0 0
test loss:  tensor(1.3127, dtype=torch.float64)
hamming loss: 0.25
total reward:  tensor(254.5000, dtype=torch.float64)
auc:  0.5658222222222222
JCP Rev = 254.50
(700, 292)
(300, 292)
{0, 1}
Current Rep:  4
Dataset :  <torch.utils.data.dataset.TensorDataset object at 0x7f35622bc748>
Number of samples:  700


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.612258
Epoch[1000/5000], loss: -0.935395
Epoch[2000/5000], loss: -0.936476
Epoch[3000/5000], loss: -0.943802
Epoch[4000/5000], loss: -0.941429
test loss:  tensor(1.1858, dtype=torch.float64)
hamming loss: 0.24
total reward:  tensor(228., dtype=torch.float64)
auc:  0.7541043356997972
IPS = 228.00


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch[0/2000], loss: -0.889658
Epoch[1000/2000], loss: -0.938252
fraction:
tensor(1.)
Pure Alg Reward:
tensor(228., dtype=torch.float64)
Pure Human Reward:
tensor(228.0000)
test loss:  tensor(1.1858, dtype=torch.float64)
hamming loss: 0.24
total reward:  tensor(228.)
auc:  0.7541043356997972
TS Rev = 228.00


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.555091
Epoch[1000/5000], loss: -0.916905
Epoch[2000/5000], loss: -0.914880
Epoch[3000/5000], loss: -0.911075
Epoch[4000/5000], loss: -0.912373
fraction:
tensor(0.6867)
Pure Alg Reward:
tensor(232., dtype=torch.float64)
Pure Human Reward:
tensor(228.0000)
test loss:  tensor(1.3529, dtype=torch.float64)
hamming loss: 0.22666666666666666
total reward:  tensor(245.3000)
auc:  0.635744168356998
JC Rev = 245.30


  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch[0/5000], loss: -0.781773
Epoch[1000/5000], loss: -1.085147
Epoch[2000/5000], loss: -1.073618
Epoch[3000/5000], loss: -1.121552
Epoch[4000/5000], loss: -1.122499
tensor([[8.4785e-01, 1.7144e-04, 1.4979e-01, 2.0486e-03, 1.2819e-04, 1.1517e-05],
        [1.7344e-05, 1.6820e-05, 2.3296e-05, 1.3533e-03, 1.2049e-05, 9.9858e-01],
        [7.3045e-03, 7.8554e-05, 2.2043e-03, 2.5205e-05, 5.6427e-05, 9.9033e-01],
        ...,
        [1.1639e-03, 3.0730e-05, 6.6636e-06, 1.5343e-04, 2.3240e-05, 9.9862e-01],
        [2.1276e-02, 1.9457e-05, 1.7283e-05, 9.7867e-01, 1.5086e-05, 9.5719e-07],
        [1.1683e-05, 6.6883e-06, 2.1315e-02, 2.7043e-07, 4.6426e-06, 9.7866e-01]])
38 0 83 78 0
test loss:  tensor(1.2592, dtype=torch.float64)
hamming loss: 0.22666666666666666
total reward:  tensor(247.0500, dtype=torch.float64)
auc:  0.5950652890466532
JCP Rev = 247.05


In [2]:
print('HUMAN:')
print(np.mean(human_result))
print(np.std(human_result))
print('IPS:')
print(np.mean(ips_result))
print(np.std(ips_result)/np.sqrt(len(ips_result)))
print('IPS-2S:')
print(np.mean(ips2s_result))
print(np.std(ips2s_result)/np.sqrt(len(ips2s_result)))
print('IPS-J:')
print(np.mean(ipsj_result))
print(np.std(ipsj_result)/np.sqrt(len(ipsj_result)))
print('IPS-J-P:')
print(np.mean(ipsjp_result))
print(np.std(ipsjp_result)/np.sqrt(len(ipsjp_result)))

HUMAN:
234.99997
5.5136194
IPS:
229.8
2.5203174403237383
IPS-2S:
228.23001
2.3640893178329865
IPS-J:
244.02998
1.1169239259736272
IPS-J-P:
257.23999999999995
2.778539184535648
