In [1]:
import scipy.stats as stats
from scipy.special import expit
from random import seed
from random import randrange
import pandas as pd
import scipy
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
import argparse 
import torch.nn as nn 
import random
from torch.utils.data import Dataset, TensorDataset, DataLoader
from skmultilearn.dataset import load_dataset
from sklearn.naive_bayes import MultinomialNB
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import hamming_loss, roc_auc_score
from sklearn.linear_model import LogisticRegression
import time 


nrep = 5
dataset = 'real_focus'
hidd = 2
rand_test = 0
C = 0.05



class Net(torch.nn.Module):
    def __init__(self, input_size, num_classes, hidden = 128):
        super(Net, self).__init__()
        self.norm0 = torch.nn.BatchNorm1d(input_size)
        self.linear1 = torch.nn.Linear(input_size, hidden)
        self.linear2 = torch.nn.Linear(hidden, num_classes)
        self.drop_layer = nn.Dropout(p=0.2)

    def emb(self, x):
        emb = self.linear1(x)
        return emb

    def forward(self, x):

        hidd1 = self.linear1(x)
        out = self.linear2(hidd1)
        out = self.drop_layer(out)
        return out



class SelNet(torch.nn.Module):
    def __init__(self, input_size, num_classes, hidden = 128):
        super(SelNet, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, hidden)
        self.linear2 = torch.nn.Linear(hidden, num_classes)

    def emb(self, x):
        emb = self.linear1(x)
        return emb

    def forward(self, x):
        hidd1 = F.relu(self.linear1(x))
        out = self.linear2(hidd1)
        return out

def find_instance(w):
    # input worker id 
    # return the instances worker labeled
    return np.where(human[w][0].any(axis=1))[0]

def find_worker(ins):
    # input instance id 
    # return which workers labeled it
    ws = []
    for w in range(18):
        if ins in  np.where(human[w][0].any(axis=1))[0]:
            ws.append(w)
    return np.array(ws)



def find_unlabeled(human):
    inss = []
    for ins in range(700):
        ws = []
        for w in range(18):
            if ins in  np.where(human[w][0].any(axis=1))[0]:
                ws.append(w)
        if len(ws) == 0:
            inss.append(ins)
    return np.array(inss)



@ torch.no_grad()
def test_multi(model, X_test, y_test):
    set_seed(999)
    model.eval()
    model.cpu()
    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)
    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat.numpy()]
    total_reward = labels.reshape(-1).sum()
    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))
    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())
    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    model.train()
    return total_reward, ham_loss, auc


@ torch.no_grad()
def test_multi_human(model, X_test, y_test, human, selector):
    set_seed(999)
    model.cpu()
    human.cpu()
    selector.cpu()

    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)

    pred = out[range(y_test.shape[0]),treat]
    htreat = np.argmax(human(X_test.float()).detach().cpu().numpy(),1)
    htreat = torch.from_numpy(htreat)
    if_alg = selector(X_test.float())
    if_alg = torch.sigmoid(if_alg)[range(y_test.shape[0]),[0]*y_test.shape[0]]
    treat = torch.where(if_alg > 0.5, treat, htreat)
    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat.numpy()]
    total_reward = labels.reshape(-1).sum()

    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))

    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())

    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    return total_reward, ham_loss, auc

@ torch.no_grad()
def test_real_human(model, X_test, y_test, human, selector, C=0):
    set_seed(999)
    model.eval()
    model.cpu()
    selector.cpu()

    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)
    alg_treat = treat

    pred = out[range(y_test.shape[0]),treat]
    pred = out[range(y_test.shape[0]),treat]

    treatment = []
    for ins in indices_test:
        #ws = np.array([9, 3, 11])
        ws = np.array([0, 1, 2,3,4])
        # randomly sample a worker
        w = np.random.choice(ws)
        full = human[w][ins]
        treatment.append(randargmax(full))

    htreat = np.array(treatment)

    htreat = torch.from_numpy(htreat)
    if_alg = selector(X_test.float())
    if_alg = torch.sigmoid(if_alg)[range(y_test.shape[0]),[0]*y_test.shape[0]]
    print('fraction:')
    print((if_alg>0.5).float().mean())
    treat = torch.where(if_alg > 0.5, treat, htreat)

    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat.numpy()]
    labels = labels.float()
    this_reward = labels.reshape(-1)
    this_reward = torch.where(if_alg > 0.5, labels, labels-C)
    total_reward = this_reward.reshape(-1).sum()

    print('Pure Alg Reward:')
    labels = y_test[range(y_test.shape[0]),alg_treat.numpy()]
    alg_reward = labels.reshape(-1).sum()
    print(alg_reward)

    print('Pure Human Reward:')
    labels = y_test[range(y_test.shape[0]),htreat.numpy()]
    labels = labels.float()
    labels = labels - C
    human_reward = labels.reshape(-1).sum()
    print(human_reward)

    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))

    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())

    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    model.train()
    return total_reward, ham_loss, auc, human_reward

@ torch.no_grad()
def test_real_human_person(model, X_test, y_test, human, selector, C = 0):
    set_seed(999)
    model.eval()
    model.cpu()
    selector.cpu()

    out1 = model(X_test.float())
    out = torch.sigmoid(out1)
    y_test = torch.from_numpy(y_test)
    _, treat = torch.max(out, 1)

    pred = out[range(y_test.shape[0]),treat]
    pred = out[range(y_test.shape[0]),treat]


    if_alg = selector(X_test.float())
    if_alg = F.softmax(if_alg, 1)

    print(if_alg)
    _, selection = torch.max(if_alg, 1)

    ma = {0:0,1:1,2:2,3:3,4:4}
    final_treat = []

    treatment = []
    treat = treat.numpy()
    human_cost = 0 
    count1 = 0
    count2 = 0
    count3 = 0
    count4 = 0
    count5 = 0
    for ind, ins in enumerate(indices_test):
        i = selection[ind]
        if i == 0:
            fulllabel = human[0][ins]
            treatment.append(randargmax(fulllabel))
            human_cost += C
            count1 += 1
        if i == 1:
            fulllabel = human[1][ins]
            treatment.append(randargmax(fulllabel))
            human_cost += C
            count2 += 1
        if i == 2:
            fulllabel = human[2][ins]
            treatment.append(randargmax(fulllabel))  
            human_cost += C                 
            count3 += 1
        if i == 3:
            fulllabel = human[3][ins]
            treatment.append(randargmax(fulllabel))  
            human_cost += C                 
            count4 += 1
        if i == 4:
            fulllabel = human[4][ins]
            treatment.append(randargmax(fulllabel))  
            human_cost += C                 
            count5 += 1
        if i == 5:
            treatment.append(treat[ind])                  
    print(count1, count2, count3, count4, count5)
    treat = np.array(treatment)
    #y = sample_y(X_test, hx_test, np.array(opt), dataset)
    labels = y_test[range(y_test.shape[0]),treat]
    total_reward = labels.reshape(-1).sum() - human_cost

    pred = torch.where(out<0.5, torch.zeros_like(out), torch.ones_like(out))

    ham_loss = hamming_loss(y_test.numpy(), pred.detach().numpy())
    auc = roc_auc_score(y_test.numpy(), out.detach().numpy())

    print('test loss: ', torch.nn.MultiLabelSoftMarginLoss()(out1, y_test))
    print('hamming loss:', hamming_loss(y_test.numpy(), pred.detach().numpy()))
    print('total reward: ', total_reward)
    print('auc: ', auc)
    model.train()
    return total_reward, ham_loss, auc



def train_ips(trainloader, input_dim, output_dim, lamb = 0):
    model = Net(input_dim, output_dim, hidden = hidd)
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    #optimizer = torch.optim.SGD(model.parameters(), lr = 1e-1, momentum=0.9, weight_decay = 1e-4)
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-4)
    num_epochs = 5000
    min_loss = 1e3
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob) in enumerate(trainloader):
            # forward
            out = model(inputx.float())
            out = F.softmax(out, 1)
            out = out[range(out.size(0)),treatment]
            logp = logging_prob[range(out.size(0)),treatment]
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * out / logp
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))
    return model

def train_ips_2s(trainloader, input_dim, output_dim, model, lamb = 0, C = 0):
    model = model
    #model = Net(input_dim, output_dim, hidden = hidd)
    selector = SelNet(input_dim, 1, hidden = 16)
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    #optimizer = torch.optim.SGD(selector.parameters(), lr = 1e-2, momentum=0.9, weight_decay = 1e-4)
    optimizer = torch.optim.Adam(selector.parameters(), lr = 1e-3, weight_decay = 1e-4)
    num_epochs = 2000
    min_loss = 1e3
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob) in enumerate(trainloader):
            # forward
            with torch.no_grad():
                out = model(inputx.float())
                out = F.softmax(out, 1)
                out = out[range(out.size(0)),treatment]
            # prob that selcts alg
            sel_prob = selector(inputx.float())
            sel_prob = torch.sigmoid(sel_prob).reshape(-1)
            #print('prob')
            #print(sel_prob)
            logp = logging_prob[range(out.size(0)),treatment]
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * (sel_prob * out / logp) - (1-sel_prob) * (reward - lamb - C)
            '''
            print('sel_prob')
            print(sel_prob)
            print('alg_reward')
            print(reward *  out / logp)
            print('human reward')
            print(reward)
            '''
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))
    return model, selector


def train_ips_hai(trainloader, model, selector, input_dim, output_dim, lamb = 0, C = 0):
    model = Net(input_dim, output_dim, hidden = hidd)
    #selector = SelNet(input_dim, 1, hidden = 16)
    #model = model
    selector = selector
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.Adam(list(model.parameters())+list(selector.parameters()), lr = 1e-3, weight_decay = 1e-4)
    #optimizer = torch.optim.SGD(list(model.parameters())+list(selector.parameters()), lr = 1e-4, momentum = 0.9, weight_decay = 1e-4)
    #optimizer = torch.optim.SGD(list(model.parameters())+list(selector.parameters()), lr = 1e-3, momentum=0.9, weight_decay = 1e-4)
    #model_optimizer = torch.optim.SGD(list(model.parameters()), lr = 0.01, momentum=0.9, weight_decay = 1e-4)
    #sel_optimizer = torch.optim.Adam(list(selector.parameters()), lr = 0.01, weight_decay = 1e-4)
    num_epochs = 5000
    min_loss = 1e3
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob) in enumerate(trainloader):
            # forward
            out = model(inputx.float())
            out = F.softmax(out, 1)
            out = out[range(out.size(0)),treatment]
            # prob that selcts alg
            sel_prob = selector(inputx.float())
            sel_prob = torch.sigmoid(sel_prob).reshape(-1)
            #print('prob')
            #print(sel_prob)
            logp = logging_prob[range(out.size(0)),treatment]
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * (sel_prob * out / logp) - (1-sel_prob) * (reward - lamb - C)
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))
    return model, selector


def train_ips_hai_person(trainloader, model, input_dim, output_dim, lamb = 0, C = 0):
    #model = Net(input_dim, output_dim, hidden = hidd)
    model = model
    selector = SelNet(input_dim, 6, hidden = 16)
    criterion = torch.nn.BCEWithLogitsLoss()
    cmcriterion = torch.nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.Adam(list(model.parameters())+list(selector.parameters()), lr = 1e-3,weight_decay=1e-4)
    #optimizer = torch.optim.SGD(list(model.parameters())+list(selector.parameters()), lr = 1e-1, momentum=0.9, weight_decay = 1e-4)
    #temp_optimizer = torch.optim.SGD([temperature], lr=0.001, momentum=0.9, weight_decay = 1e-4)
    num_epochs = 5000
    min_loss = 1e3
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (inputx, treatment, targety, logging_prob, prop, wids) in enumerate(trainloader):            # forward
            out = model(inputx.float())
            out = F.softmax(out, 1)
            out = out[range(out.size(0)),treatment]
            # prob that selcts alg
            sel_prob = selector(inputx.float())
            sel_prob = F.softmax(sel_prob, 1)
            
            logp = logging_prob
            #logp = logging_prob[range(out.size(0)),treatment]
            #logp = torch.sum(prop, 1) * 1 / 5
            reward = targety.reshape(-1)
            loss = - (reward - lamb) * (sel_prob[:,5] * out / logp) - (reward - lamb - C) * 5 * sel_prob[:,wids] 
            loss = loss.mean()
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if epoch % 1000 == 0:
            print('Epoch[{}/{}], loss: {:.6f}'.format(epoch, num_epochs, running_loss / (batch_idx+1)))   
    return model, selector

def randargmax(b,**kw):
  """ a random tie-breaking argmax"""
  return np.argmax(np.random.random(b.shape) * (b==b.max()), **kw)

def set_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

opt_result = []
ips_result = []
ips_loss = []
ips_time = []
ips_auc = []
ips2s_result = []
ips2s_loss = []
ips2s_time = []
ips2s_auc = []
ipsj_result = []
ipsj_loss = []
ipsj_time = []
ipsj_auc = []
ipsjp_result = []
ipsjp_loss = []
ipsjp_time = []
ipsjp_auc = []
human_result = []

lamb = 0.
#C = 0
dataset0 = dataset

for r in range(nrep):

    set_seed(r)

    if dataset0 == 'human':
        '''
        selAnn:      crowds annotations, [numWorker,1] cell, one cell for one worker's annotation, [numInst, numLab] matrix. range in {1(positive annotation), -1(negative annotation), 0(no annotation)}  
        selGT:       groundtruth labels,  [numInst, numLab] matrix. range in {1(positive), -1(negative)}  
        selfvFeat:   feature representation of data, [numFeat,numInst] matrix
        trainPairs:  label pairs constructed for datasets 
        trainPairsW: label pairs constructed for each worker
        foldWorker:   folds of worker sequence permutation, [numFold, numWorker]
        '''
        from scipy.io import loadmat
        data = loadmat('./data/data_sample/dataset1/selDatanp.mat')
        X = data['selfvFeat'].T
        y = data['selGT']
        # need to find instances labeled by all labelers
        # workers labeled most instances: 9, 3, 11, 15, 12
        # choose 3 workers 9, 3, 11

        human = data['selAnn']
        #instances = np.array(list(set(find_instance(9))&set(find_instance(3))&set(find_instance(11))))
        instances = np.array(list(set(find_instance(0))&set(find_instance(7))&set(find_instance(13))))
        for w in range(18):
            human[w][0] = human[w][0][instances]
        X = X[instances]
        y = y[instances]
        y = y / 2 + 0.5 
        print(y)
        X, X_test, y, y_test, indices_train, indices_test = train_test_split(X, y, range(len(instances)), test_size=0.1, random_state =  r)
        print(X.shape)
        print(X_test.shape)
        y_all = y
        X_test = torch.from_numpy(X_test)
    if dataset0 == 'real_focus':
        data = pd.read_csv('/home/ruijiang/E/utaustin/project/cost_efficient_labeling/Quick-and-Dirty/data/real/data.csv')
        annotation = pd.read_csv('/home/ruijiang/E/utaustin/project/cost_efficient_labeling/Quick-and-Dirty/data/real/focus.csv')
        data = data.iloc[:,4:]
        X = data.values
        y = annotation['gt'].values
        a = np.zeros((X.shape[0],2))
        a[range(a.shape[0]),y] = 1
        y = a
        human = dict()
        for w in range(5):
            temp = np.array(annotation[str(w)])
            anno = np.zeros((X.shape[0],2))
            anno[range(anno.shape[0]),temp] = 1
            human[w] = anno
        X, X_test, y, y_test, indices_train, indices_test = train_test_split(X, y, range(X.shape[0]), test_size=0.3, random_state =  r)
        print(X.shape)
        print(X_test.shape)
        y_all = y
        X_test = torch.from_numpy(X_test)
        ws = range(5)
    # human generate the logging policy
    np.random.seed(r + 81)
    treatment = []
    wids = []
    for ins in indices_train:
        # randomly sample a worker
        w = np.random.choice(ws)
        wids.append(w)
        full = human[w][ins]
        treatment.append(randargmax(full))
    treatment = np.array(treatment)
    wids = np.array(wids)
    #m = {3:0,9:1,11:2}
    m = {0:0,1:1,2:2,3:3,4:4}
    wids_map = np.array([m[i] for i in wids])
    # record instances recorded by )
    wids_oh = np.zeros((wids.size, wids_map.max()+1))
    wids_oh[np.arange(wids.size),wids_map] = 1
    y = y_all[range(y_all.shape[0]), treatment]

    print(set(treatment))
    y = torch.from_numpy(y)

    # for ips baseline:
    # learn logging policy 
    Xw = np.append(X,wids_oh,axis=1)
    #log_est = LogisticRegression(random_state=0).fit(Xw, treatment)
    #log_est0 = LogisticRegression(random_state=0).fit(X, treatment)
    log_est = RandomForestClassifier(random_state=0).fit(Xw, treatment)
    #log_est = MLPClassifier(random_state=0, max_iter=10000).fit(Xw, treatment)
    #log_est = MLPClassifier(random_state=0,max_iter = 10000).fit(X, treatment)
    log_est0 = RandomForestClassifier(random_state=0).fit(X, treatment)
    #log_est0 = MLPClassifier(random_state=0, max_iter=10000).fit(X, treatment)


    logging_prob = log_est0.predict_proba(X)
    logging_prob = torch.from_numpy(logging_prob)

    X = torch.from_numpy(X)
    treatment = torch.from_numpy(treatment)

    print('Current Rep: ', r)
    print('Dataset : ', dataset)
    print('Number of samples: ', X.shape[0])
    #model = generate_logging_policy(X, y, frac = 0.05)

    dataset = TensorDataset(X, treatment, y, logging_prob)
    trainloader = DataLoader(dataset, batch_size = 8, shuffle = True)
    
    # Naive IPS
    start = time.time()
    ips = train_ips(trainloader, X.shape[1], y_all.shape[1], lamb = lamb)
    end = time.time()
    rev, _, _ = test_multi(ips, X_test, y_test)
    print('IPS = %.2f' % rev)
    ips_result.append(rev)
    ips_fix = ips
    
    start = time.time()
    ips, selector = train_ips_2s(trainloader, X.shape[1], y_all.shape[1], ips, lamb = lamb, C = C)
    end = time.time()
    rev, _, _, hreward = test_real_human(ips, X_test, y_test, human, selector, C = C)
    print('TS Rev = %.2f' % rev)
    ips2s_result.append(rev)
    human_result.append(hreward)
    
    start = time.time()
    ips, selector = train_ips_hai(trainloader, ips, selector, X.shape[1], y_all.shape[1], lamb = lamb, C = C)
    end = time.time()
    rev, _, _, _ = test_real_human(ips, X_test, y_test, human, selector, C = C)
    print('JC Rev = %.2f' % rev)
    ipsj_result.append(rev)
    
    X = X.numpy()
    Xw = np.append(X,wids_oh,axis=1)
    logging_prob = log_est.predict_proba(Xw)
    logging_prob = torch.from_numpy(logging_prob)
    X = torch.from_numpy(X)
    w1 = np.zeros((wids.size, wids_map.max()+1))
    w1[:,0] = 1
    w1 = torch.from_numpy(w1)
    w2 = np.zeros((wids.size, wids_map.max()+1))
    w2[:,1] = 1
    w2 = torch.from_numpy(w2)
    w3 = np.zeros((wids.size, wids_map.max()+1))
    w3[:,2] = 1
    w3 = torch.from_numpy(w3)
    w4 = np.zeros((wids.size, wids_map.max()+1))
    w4[:,3] = 1
    w4 = torch.from_numpy(w4)
    w5 = np.zeros((wids.size, wids_map.max()+1))
    w5[:,4] = 1
    w5 = torch.from_numpy(w5)
    prop = torch.from_numpy(np.stack([log_est.predict_proba(np.append(X,w1,axis=1))[np.arange(X.shape[0]),treatment],log_est.predict_proba(np.append(X,w2,axis=1))[np.arange(X.shape[0]),treatment],
        log_est.predict_proba(np.append(X,w3,axis=1))[np.arange(X.shape[0]),treatment],log_est.predict_proba(np.append(X,w4,axis=1))[np.arange(X.shape[0]),treatment],log_est.predict_proba(np.append(X,w5,axis=1))[np.arange(X.shape[0]),treatment]],axis=1))
    logging_prob = prop[range(prop.shape[0]),wids_map]
    wids_map = torch.from_numpy(wids_map)
    dataset = TensorDataset(X, treatment, y, logging_prob, prop, wids_map)
    trainloader = DataLoader(dataset, batch_size = 8, shuffle = True)
    
    start = time.time()
    ips, selector = train_ips_hai_person(trainloader, ips, X.shape[1], y_all.shape[1], lamb = lamb, C = C)
    end = time.time()
    rev, _, _ = test_real_human_person(ips, X_test, y_test, human, selector, C = C)
    print('JCP Rev = %.2f' % rev)
    ipsjp_result.append(rev)


(700, 292)
(300, 292)
{0, 1}
Current Rep:  0
Dataset :  real_focus
Number of samples:  700
Epoch[0/5000], loss: -0.456829
Epoch[1000/5000], loss: -0.949159
Epoch[2000/5000], loss: -0.954586
Epoch[3000/5000], loss: -0.940715
Epoch[4000/5000], loss: -0.945776
test loss:  tensor(1.4636, dtype=torch.float64)
hamming loss: 0.2683333333333333
total reward:  tensor(219., dtype=torch.float64)
auc:  0.7661172907809679
IPS = 219.00
Epoch[0/2000], loss: -0.881051
Epoch[1000/2000], loss: -0.952524
fraction:
tensor(1.)
Pure Alg Reward:
tensor(219., dtype=torch.float64)
Pure Human Reward:
tensor(231.9999)
test loss:  tensor(1.4636, dtype=torch.float64)
hamming loss: 0.2683333333333333
total reward:  tensor(219.)
auc:  0.7661172907809679
TS Rev = 219.00
Epoch[0/5000], loss: -0.546349
Epoch[1000/5000], loss: -0.912815
Epoch[2000/5000], loss: -0.910126
Epoch[3000/5000], loss: -0.911151
Epoch[4000/5000], loss: -0.911385
fraction:
tensor(0.6633)
Pure Alg Reward:
tensor(223., dtype=torch.float64)
Pure Hum

Current Rep:  4
Dataset :  <torch.utils.data.dataset.TensorDataset object at 0x7ff6756c9c50>
Number of samples:  700
Epoch[0/5000], loss: -0.613075
Epoch[1000/5000], loss: -0.930609
Epoch[2000/5000], loss: -0.951893
Epoch[3000/5000], loss: -0.942655
Epoch[4000/5000], loss: -0.945098
test loss:  tensor(1.4736, dtype=torch.float64)
hamming loss: 0.24666666666666667
total reward:  tensor(226., dtype=torch.float64)
auc:  0.7187183062880325
IPS = 226.00
Epoch[0/2000], loss: -0.880306
Epoch[1000/2000], loss: -0.953990
fraction:
tensor(0.9900)
Pure Alg Reward:
tensor(226., dtype=torch.float64)
Pure Human Reward:
tensor(227.9999)
test loss:  tensor(1.4736, dtype=torch.float64)
hamming loss: 0.24666666666666667
total reward:  tensor(224.8500)
auc:  0.7187183062880325
TS Rev = 224.85
Epoch[0/5000], loss: -0.561402
Epoch[1000/5000], loss: -0.915542
Epoch[2000/5000], loss: -0.909702
Epoch[3000/5000], loss: -0.913887
Epoch[4000/5000], loss: -0.921799
fraction:
tensor(0.7133)
Pure Alg Reward:
tensor

In [2]:
print('HUMAN:')
print(np.mean(human_result))
print(np.std(human_result))
print('IPS:')
print(np.mean(ips_result))
print(np.std(ips_result)/np.sqrt(len(ips_result)))
print('IPS-2S:')
print(np.mean(ips2s_result))
print(np.std(ips2s_result)/np.sqrt(len(ips2s_result)))
print('IPS-J:')
print(np.mean(ipsj_result))
print(np.std(ipsj_result)/np.sqrt(len(ipsj_result)))
print('IPS-J-P:')
print(np.mean(ipsjp_result))
print(np.std(ipsjp_result)/np.sqrt(len(ipsjp_result)))

HUMAN:
234.99995
5.5136194
IPS:
229.4
3.131772660970141
IPS-2S:
228.95
3.0514590400263524
IPS-J:
242.86996
2.7978486667321816
IPS-J-P:
259.01
2.368476303449118
