In [23]:
import numpy as np
import pickle

from proglearn.voters import TreeClassificationVoter
from proglearn.deciders import SimpleAverage

from sklearn.metrics import accuracy_score

from utils import load_embedded_data

In [2]:
X_train, y_train, X_test, y_test = load_embedded_data(split_train=True, split_test=True)

X_train shape: (10, 5000, 1000)
y_train shape: (10, 5000, 1)
X_test shape: (10, 1000, 1000)
y_test shape: (10, 1000, 1)


In [9]:
lf = pickle.load(open("output/lf_task_10.p", "rb"))

In [3]:
# For each task
# Check the performance of... 

# UF (LF with transformer ids = t)
# LF (all transformers)
# The best out of either of them.
# The best out of UF, LF, some random ones with UF and LF in the mix.

In [25]:
def sample_transformer_ids():
    transformer_ids = []
    for t in range(10):
        if np.random.binomial(1, 0.5):
            transformer_ids.append(t)
    return transformer_ids

In [36]:
def get_te(task_id, num_candidates = 10, verbose = False):
    
    train_x = X_train[task_id][0:4500]
    train_y = y_train[task_id][0:4500]
    
    val_x = X_train[task_id][4500:]
    val_y = y_train[task_id][4500:]
    
    test_y = y_test[task_id]
    test_x = X_test[task_id]
    
    # Method 1: UF
    if verbose: print("Running Method 1: UF...")
    uf_pred = lf.predict(test_x, task_id, transformer_ids = [task_id])
    uf_acc = accuracy_score(uf_pred, test_y)
    
    # Method 2: LF
    if verbose: print("Running Method 2: LF...")
    lf_pred = lf.predict(test_x, task_id)
    lf_acc = accuracy_score(lf_pred, test_y)
    
    # Method 3: Pick the best on the training set between LF and UF.
    if verbose: print("Running Method 3: One-vs-All (train)...")
    lf_train_acc = accuracy_score(lf.predict(train_x, task_id), train_y)
    uf_train_acc = accuracy_score(lf.predict(train_x, task_id, transformer_ids = [task_id]), train_y)
    if uf_train_acc > lf_train_acc:
        ova_train_acc = uf_acc
        ova_train_transformers = [task_id]
    else:
        ova_train_acc = lf_acc
        ova_train_transformers = np.arange(10)
    
    
    # Method 4: Pick the best on validation set between UF and LF.
    if verbose: print("Running Method 4: One-vs-All (val)...")
    lf_val_acc = accuracy_score(lf.predict(val_x, task_id), val_y)
    uf_val_acc = accuracy_score(lf.predict(val_x, task_id, transformer_ids = [task_id]), val_y)
    if uf_val_acc > lf_val_acc:
        ova_val_acc = uf_acc
        ova_val_transformers = [task_id]
    else:
        ova_val_acc = lf_acc
        ova_val_transformers = np.arange(10)
    
    
    # Method 5: Sample the best transformers based on the training set.
    if verbose: print("Running Method 5: Sample (train)...")
    best_acc = ova_train_acc
    best_transformer_ids = ova_train_transformers
    for c in range(num_candidates):
        transformer_ids = sample_transformer_ids()
        acc = accuracy_score(lf.predict(train_x, task_id, transformer_ids = transformer_ids), train_y)
        if acc > best_acc:
            best_acc = acc
            best_transformer_ids = transformer_ids
    sample_train_acc = accuracy_score(lf.predict(test_x, task_id, transformer_ids = best_transformer_ids), test_y)
    sample_train_transformed_ids = best_transformer_ids
            
    
    # Methpd 6: Sample the best transformers based on the validation set.
    if verbose: print("Running Method 6: Sample (val)...")
    best_acc = ova_val_acc
    best_transformer_ids = ova_val_transformers
    for c in range(num_candidates):
        transformer_ids = sample_transformer_ids()
        acc = accuracy_score(lf.predict(val_x, task_id, transformer_ids = transformer_ids), val_y)
        if acc > best_acc:
            best_acc = acc
            best_transformer_ids = transformer_ids
    sample_val_acc = accuracy_score(lf.predict(test_x, task_id, transformer_ids = best_transformer_ids), test_y)
    sample_val_transformed_ids = best_transformer_ids
    
    results = {
        'lf_te' : (1 - uf_acc) / (1 - lf_acc),
        'ova_train_te' : (1 - uf_acc) / (1 - ova_train_acc),
        'ova_val_te' : (1 - uf_acc) / (1 - ova_val_acc),
        'sample_train_te' : (1 - uf_acc) / (1 - sample_train_acc),
        'sample_val_te' : (1 - uf_acc) / (1 - sample_val_acc)
    }
    
    pickle.dump(results, open("output/robust_result_%d.p" % task_id, "wb"))
    
    print("Task %d" % task_id)
    print("--------------------")
    print(results)

In [38]:
get_te(0, verbose=True)

Running Method 1: UF...
Running Method 2: LF...
Running Method 3: One-vs-All (train)...
Running Method 4: One-vs-All (val)...
Running Method 5: Sample (train)...
Running Method 6: Sample (val)...


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()