In [1]:
import numpy as np
import pickle
import copy

from utils import load_imdb, load_yelp, load_toxic_comment, load_amazon

from sklearn.model_selection import train_test_split

from proglearn.forest import LifelongForest, UncertaintyForest

In [2]:
# Experimental parameters.
n_estimators = 10
subsample_fracs = [6e-5, 6e-4, 6e-3, 6e-2]
# subsample_fracs = [6e-5, 6e-4] # for testing.
verbose = True

In [3]:
source_tasks = [
    {
        'name' : 'Yelp Review Sentiment Analysis',
        'filename' : 'yelp',
        'load' : load_yelp,
        'subsample_frac' : 0.02,
        'task_id' : 0,
    },
    {
        'name' : 'IMDB Review Sentiment Analysis',
        'filename' : 'imdb',
        'load' : load_imdb,
        'subsample_frac' : 0.2,
        'task_id' : 1,
    },
    {
        'name' : 'Amazon Review Sentiment Analysis',
        'filename' : 'amazon',
        'load' : load_amazon,
        'subsample_frac' : 0.01,
        'task_id' : 2,
    }
]
target_task = {
        'name' : 'Toxic Comment Identification',
        'filename' : 'toxic_comment',
        'load' : load_toxic_comment,
        'task_id' : 3,
}

In [4]:
# Load data.

for task in source_tasks:
    print("------------------------------------------------------")
    print("LOADING TASK:", task['name'])
    print("------------------------------------------------------")
    task['X_train'], task['y_train'], task['X_test'], task['y_test'] = task['load'](verbose = verbose, subsample_frac = task['subsample_frac'])
print("------------------------------------------------------")

------------------------------------------------------
LOADING TASK: Yelp Review Sentiment Analysis
------------------------------------------------------
'X_train' and 'X_test' are each an n-by-d array of BERT embedded reviews of a business.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 1120
Input dimension d = 512
Number of testing examples = 38000
------------------------------------------------------
LOADING TASK: IMDB Review Sentiment Analysis
------------------------------------------------------
'X_train' and 'X_test' are each an n-by-d array of BERT embedded movie reviews.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 900
Input dimension d = 512
Number of testing examples = 5000
------------------------------------------------------
LOADING TASK: Amazon Review Sentiment Analysis
----------------------------------

## Source Task Training

In [5]:
lf = LifelongForest(n_estimators=n_estimators)

for task in source_tasks:
    print("TRAINING TASK:", task['name'])    
    X_train, y_train, _, _ = task['load'](verbose = verbose, subsample_frac = task['subsample_frac'])
    lf.add_task(X_train, y_train, task_id=task['task_id'])
    
pickle.dump(lf, open("output/lf_source_trained_%d.p" % n_estimators, "wb"))

TRAINING TASK: Yelp Review Sentiment Analysis
'X_train' and 'X_test' are each an n-by-d array of BERT embedded reviews of a business.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 1120
Input dimension d = 512
Number of testing examples = 38000
TRAINING TASK: IMDB Review Sentiment Analysis
'X_train' and 'X_test' are each an n-by-d array of BERT embedded movie reviews.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 900
Input dimension d = 512
Number of testing examples = 5000
TRAINING TASK: Amazon Review Sentiment Analysis
'X_train' and 'X_test' are each an 
                n-by-d array of BERT embedded product reviews.
'y_train' and 'y_test' are each list of binary sentiments, 
                where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 300
Input dimension d = 512
Number of testing examples = 300

In [6]:
X_train_full, y_train_full, X_test, y_test = load_toxic_comment(verbose = verbose, subsample_frac = None)

'X_train' and 'X_test' are each an n-by-d array of BERT embedded reviews of a business.
'y_train' and 'y_test' are each list of multilabel binary sentiments, 
                where the columns indicate 'toxic', 'severe_toxic', 'obscene', 'threat', 
                'insult', 'identity_hate', and 'not_toxic', in that order.
Number of training examples = 159571
Input dimension d = 512
Number of testing examples = 63978


In [8]:
# Target task training and testing.
num_tasks = len(source_tasks) + 1
task = target_task
results = []

for s, subsample_frac in enumerate(subsample_fracs):
    
    task['subsample_frac'] = subsample_frac
    _, X_train, _, y_train = train_test_split(X_train_full, y_train_full, test_size=subsample_frac)
    n_train = len(X_train)
    print("TESTING TASK:", task['name'], "at sample size n =", n_train)
    
    uf = UncertaintyForest(n_estimators=num_tasks*n_estimators)
    uf.fit(X_train, y_train)
    
    lf = pickle.load(open("output/lf_source_trained_%d.p" % n_estimators, "rb"))
    lf.add_task(X_train, y_train, task_id=task['task_id'])
    
    result = {
        'n_train' : n_train,
        'err_uf_train' : np.mean(np.abs(uf.predict(X_train) - y_train)),
        'err_uf_test' : np.mean(np.abs(uf.predict(X_test) - y_test)),
        'err_lf_train' : np.mean(np.abs(lf.predict(X_train, task['task_id']) - y_train)),
        'err_lf_test' : np.mean(np.abs(lf.predict(X_test, task['task_id']) - y_test)),
    }
    result['te'] = result['err_uf_test'] / result['err_lf_test']
    
    results.append(result)
    
pickle.dump(results, open("output/toxic_comment_sweep_%d.p" % n_estimators, "wb"))

TESTING TASK: Toxic Comment Identification at sample size n = 10


  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors

TESTING TASK: Toxic Comment Identification at sample size n = 96


  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors = np.nan_to_num(np.array(label_counts) / np.sum(label_counts))
  posteriors

In [None]:
# Display results.

# num_tasks = len(tasks)

# for task_ in tasks:
    
#     task = pickle.load(open("output/%s.p" % task_['filename'], "rb"))
    
#     print("-------------------------------------------------------")
#     print("TASK:", task['name'])
#     print("-------------------------------------------------------")

#     print("Number of training examples:", len(task['X_train']))
#     print("Number of testing examples:", len(task['X_test']))
#     print(n_estimators, "estimators per task for Lifelong Forest.")
#     print(num_tasks*n_estimators, "estimators for Uncertainty Forest.")

#     print("UF train error: ", task['err_uf_train'])
#     print("UF test error: ", task['err_uf_test'])
#     print("LF train error: ", task['err_lf_train'])
#     print("LF test error: ", task['err_lf_test'])
    
#     print("Transfer Efficiency: ", task['te'])
#     print("-------------------------------------------------------")