In [None]:
import importlib
import numpy as np
import os
import pickle
import sys
import torch

from transformers import (BertTokenizer, 
                          BertForSequenceClassification,
                          DistilBertForSequenceClassification, 
                          DistilBertTokenizer)

# Our code imports
sys.path.insert(0, os.path.join(os.getcwd(), '..', 'src'))
import train_eval
import synonym


if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU {}!".format(torch.cuda.get_device_name(0)))
else:
    device = torch.device("cpu")
    print("No GPU :( using CPU")

importlib.reload(synonym)
importlib.reload(train_eval)

In [None]:
neg = np.genfromtxt('generated_negative_reviews.csv', delimiter='\n', dtype=str)
pos = np.genfromtxt('generated_positive_reviews.csv', delimiter='\n', dtype=str)[:-10]

In [None]:
test_sentences = np.concatenate([neg, pos])
test_labels = np.concatenate([np.zeros(len(neg)), np.ones(len(pos))])

## BERT

In [None]:
accuracies = []
pretrained_weights = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)


# Set up test data
evaluation_data, _ = train_eval.ReviewDataset.setUpData(test_sentences, 
                                                           test_labels, 
                                                           tokenizer, 256,
                                                       split = "no_shuffle")
model = torch.load('yelp_bert.model')

# evaluate
acc, pred_labels, _, _, true_labels = train_eval.evaluate(model, evaluation_data, 16, return_pred_labels=True)

pred = np.array([q for p in pred_labels for q in p]) 
true = np.array([q for p in true_labels for q in p])

## DistilBERT

In [None]:
pretrained_weights = 'distilbert-base-cased'
tokenizer = DistilBertTokenizer.from_pretrained(pretrained_weights)


# Set up test data
evaluation_data, _ = train_eval.ReviewDataset.setUpData(test_sentences, 
                                                           test_labels, 
                                                           tokenizer, 256,
                                                       split = "no_shuffle")
model = torch.load('yelp_distil.model')

# evaluate
acc2, pred_labels2, _, _, true_labels2 = train_eval.evaluate(model, evaluation_data, 16, return_pred_labels=True)

pred2 = np.array([q for p in pred_labels2 for q in p]) 
true2 = np.array([q for p in true_labels2 for q in p])

## Output

In [None]:
output = {"bert_accuracies": acc,
          "bert_pred_labels": pred,
          "bert_true_labels": true,
          "distil_accuracies": acc2,
          "distil_pred_labels": pred2,
          "distil_true_labels": true2}

pickle.dump(output, open("gen_accs_preds.p", "wb"))

In [None]:
np.sum(output["bert_pred_labels"] == output["bert_true_labels"]) / 30000

In [None]:
np.mean(output["bert_accuracies"])