This notebook chooses the sample sizes for the source tasks (Amazon, Yelp, IMDB) that achieve 'good' performance on the classification tasks. This sample size is chosen for the pregressive learning experiments.

In [6]:
import numpy as np
import pickle
import copy

from utils import load_imdb, load_yelp, load_toxic_comment, load_amazon

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from proglearn.forest import UncertaintyForest

In [7]:
# Experimental parameters
n_estimators = 10
verbose = True
subsample_fracs = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1]
# subsample_fracs = [0.001, 0.003] # for testing

In [8]:
source_tasks = [
    {
        'name' : 'Yelp Review Sentiment Analysis',
        'filename' : 'yelp',
        'load' : load_yelp,
    },
    {
        'name' : 'IMDB Review Sentiment Analysis',
        'filename' : 'imdb',
        'load' : load_imdb,
        'task_id' : 1,
    },
    {
        'name' : 'Amazon Review Sentiment Analysis',
        'filename' : 'amazon',
        'load' : load_amazon,
    }
]

In [9]:
for task in source_tasks:
    print("TASK:", task['name'])    
    print("----------------------------")
    X_train_full, y_train_full, X_test, y_test = task['load'](verbose = verbose)
    
    accs = np.zeros(len(subsample_fracs))
    for i, subsample_frac in enumerate(subsample_fracs):
        _, X_train, _, y_train = train_test_split(X_train_full, y_train_full, test_size=subsample_frac)
        uf = UncertaintyForest(n_estimators=n_estimators)
        uf.fit(X_train, y_train)
        
        accs[i] = accuracy_score(uf.predict(X_test), y_test)
    
    pickle.dump(accs, open("output/uf_accs_%s_%d.p" % (task['filename'], n_estimators), "wb"))
pickle.dump(subsample_fracs, open("output/uf_subsample_fracs.p", "wb"))

TASK: Yelp Review Sentiment Analysis
'X_train' and 'X_test' are each an n-by-d array of BERT embedded reviews of a business.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 560000
Input dimension d = 512
Number of testing examples = 38000
TASK: IMDB Review Sentiment Analysis
'X_train' and 'X_test' are each an n-by-d array of BERT embedded movie reviews.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 45000
Input dimension d = 512
Number of testing examples = 5000
TASK: Amazon Review Sentiment Analysis
'X_train' and 'X_test' are each an 
                n-by-d array of BERT embedded product reviews.
'y_train' and 'y_test' are each list of binary sentiments, 
                where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 300000
Input dimension d = 512
Number of testing examples = 300000
