In [13]:
import numpy as np
import pickle

from utils import load_imdb, load_yelp, load_amazon

from sklearn.metrics import classification_report, accuracy_score

In [2]:
# Get p(y | x, t). This should be (n * 6) array, with n = the number of test examples from toxic comment classification.
y_test_toxic = pickle.load(open("output/y_test.p", "rb"))
task_cond_probs = pickle.load(open("output/probs.p", "rb"))

In [3]:
print("y_test shape: ", y_test_toxic.shape)
print("task_cond_probs shape: ", task_cond_probs.shape)

y_test shape:  (63978, 6)
task_cond_probs shape:  (63978, 6)


In [4]:
# Convert y_test to be binary, where any label is considered 'toxic' = 0 = negative, and no labels = 'non-toxic' positive.
y_test_bin = (y_test_toxic.sum(axis = 1) == 0).astype(int)

print(y_test_toxic[0:10])
print(y_test_bin[0:10])

[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
[1 1 1 1 1 1 1 1 0 1]


In [5]:
# get p(t | x), probability of each task. This should be This should be (n * 3) array, with 
# n = the number of test examples from toxic comment classification.
n_estimators = 35

task_prior_probs_target = pickle.load(open("output/task_prior_probs_target_%d.p" % 10, "rb"))

In [6]:
print("task_prior_probs_target shape:", task_prior_probs_target.shape)

task_prior_probs_target shape: (63978, 3)


In [8]:
# Check that this can actually discriminate the class well for the source data.
task_prior_probs_source = pickle.load(open("output/task_prior_probs_source_%d.p" % 10, "rb"))
source_task_labels = pickle.load(open("output/source_task_labels.p", "rb"))

print("source task priors shape: ", task_prior_probs_source.shape)
print("source task labels shape: ", source_task_labels.shape)

print(task_prior_probs_source[0:10])
print(source_task_labels[0:10])

source task priors shape:  (343000, 3)
source task labels shape:  (343000,)
[[0.69515271 0.05152521 0.25332209]
 [0.94367129 0.02546505 0.03086366]
 [0.94367129 0.02546505 0.03086366]
 [0.94367129 0.02546505 0.03086366]
 [0.94367129 0.02546505 0.03086366]
 [0.87360956 0.08456998 0.04182045]
 [0.93478971 0.02349136 0.04171893]
 [0.87360956 0.08456998 0.04182045]
 [0.72259415 0.17597701 0.10142884]
 [0.79697351 0.11859914 0.08442735]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [14]:
predicted_source_task_labels = np.argmax(task_prior_probs_source, axis = 1)

acc = accuracy_score(predicted_source_task_labels, source_task_labels)
print("Source task prediction accuracy: ", acc)

Source task prediction accuracy:  0.9280991253644315


In [10]:
# Check that for the source tasks, 0 corresponds to negative and 1 corresponds to positive.
X_train, y_train, X_test, y_test = load_imdb(view="raw", verbose=True, subsample_frac=0.001)

'X_train' and 'X_test' are each a list of string-valued reviews of movies.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 45
Number of testing examples = 5000
First few examples:
Review: -------------------------------------------------------
<br /><br />Once I ignored some of the implausibilities, this was actually a fairly decent horror/monster flick. So, I'll give some of the good points first: - the dragon was quite convincing, especially as she prowled through the tunnels looking for lunch (hint: she likes humans). - the action was fairly non stop, and, after a weak beginning, I got quite absorbed in the storyline. - sorry to say, I was kind of rooting for the dragon - she was probably the most convincing and consistent character in the movie.<br /><br />Now for the implausible stuff **maybe some spoilers**: - if you were hunting a fire-breathing dragon in 1100 AD, would you charge into its cave wit

In [11]:
X_train, y_train, X_test, y_test = load_yelp(view="raw", verbose=True, subsample_frac=0.001)

'X_train' and 'X_test' are each a list of string-valued reviews of business.
'y_train' and 'y_test' are each list of binary sentiments, where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 560
Number of testing examples = 38000
First few examples:
Review: -------------------------------------------------------
This place is one of my favorite comic shops. I actually live closer to a different one, but I drive to chandler just to go to this one. I like their selection and when they have the dollar sale you can get some ridiculous deals. The staff is ridiculously friendly and I usually always walk out with whatever I wanted. They also have some cool events from time to time and i've found their prices to be reasonable and comparable to other comic shops.
Sentiment: ----------------------------------------------------
1
Review: -------------------------------------------------------
The wait time for an appointment is ridiculous. Been waiting over an hour and a half for 

In [12]:
X_train, y_train, X_test, y_test = load_amazon(view="raw", verbose=True, subsample_frac=0.001)

'X_train' and 'X_test' are each 
                a list of string-valued reviews of products.
'y_train' and 'y_test' are each list of binary sentiments, 
                where 0 = 'negative' and 1 = 'positive'.
Number of training examples = 3600
Number of testing examples = 400000
First few examples:
Review: -------------------------------------------------------
Expensive Junk
Sentiment: ----------------------------------------------------
0.0
Review: -------------------------------------------------------
Toast too dark
Sentiment: ----------------------------------------------------
0.0
Review: -------------------------------------------------------
Excellent imagery...dumbed down story
Sentiment: ----------------------------------------------------
1.0


In [15]:
# Produce toxic comment predictions.
# task_prior_probs_target = [n*3]
# task_cond_probs [n*6]
# y_test_bin [n*1]

task_0_class_0 = task_cond_probs[:, 0] * task_prior_probs_target[:, 0]
task_0_class_1 = task_cond_probs[:, 1] * task_prior_probs_target[:, 0]
task_1_class_0 = task_cond_probs[:, 2] * task_prior_probs_target[:, 1]
task_1_class_1 = task_cond_probs[:, 3] * task_prior_probs_target[:, 1]
task_2_class_0 = task_cond_probs[:, 4] * task_prior_probs_target[:, 2]
task_2_class_1 = task_cond_probs[:, 5] * task_prior_probs_target[:, 2]

In [16]:
class_0 = task_0_class_0 + task_1_class_0 + task_2_class_0
class_1 = task_0_class_1 + task_1_class_1 + task_2_class_1

# Should add up to 1.
print((class_0 + class_1)[0:10])

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [17]:
y_pred = (class_1 > 0.5).astype(int)

report = classification_report(y_test_bin, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.13      0.83      0.22      6243
           1       0.96      0.39      0.55     57735

    accuracy                           0.43     63978
   macro avg       0.54      0.61      0.39     63978
weighted avg       0.87      0.43      0.52     63978



In [18]:
# Chance:
print("Chance: ", y_test_bin.sum() / len(y_test_bin))

Chance:  0.90241958173122
