In [2]:
!pip install -q -U watermark

In [3]:
pip install transformers==2.8.0



In [4]:
%reload_ext watermark
%watermark -v -p numpy,pandas,torch,transformers

Python implementation: CPython
Python version       : 3.7.12
IPython version      : 5.5.0

numpy       : 1.19.5
pandas      : 1.1.5
torch       : 1.9.0+cu111
transformers: 2.8.0



In [5]:
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap

In [6]:
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [7]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x7fe95477ff70>

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [9]:
!gdown --id 1S6qMioqPJjyBLpLVz4gmRTnJHnjitnuV
!gdown --id 1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv

Downloading...
From: https://drive.google.com/uc?id=1S6qMioqPJjyBLpLVz4gmRTnJHnjitnuV
To: /content/apps.csv
100% 134k/134k [00:00<00:00, 48.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv
To: /content/reviews.csv
100% 7.17M/7.17M [00:00<00:00, 113MB/s]


In [10]:
df = pd.read_csv("reviews.csv")
df.head()

Unnamed: 0,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,Andrew Thomas,https://lh3.googleusercontent.com/a-/AOh14GiHd...,Update: After getting a response from the deve...,1,21,4.17.0.3,2020-04-05 22:25:57,"According to our TOS, and the term you have ag...",2020-04-05 15:10:24,most_relevant,com.anydo
1,Craig Haines,https://lh3.googleusercontent.com/-hoe0kwSJgPQ...,Used it for a fair amount of time without any ...,1,11,4.17.0.3,2020-04-04 13:40:01,It sounds like you logged in with a different ...,2020-04-05 15:11:35,most_relevant,com.anydo
2,steven adkins,https://lh3.googleusercontent.com/a-/AOh14GiXw...,Your app sucks now!!!!! Used to be good but no...,1,17,4.17.0.3,2020-04-01 16:18:13,This sounds odd! We are not aware of any issue...,2020-04-02 16:05:56,most_relevant,com.anydo
3,Lars Panzerbjørn,https://lh3.googleusercontent.com/a-/AOh14Gg-h...,"It seems OK, but very basic. Recurring tasks n...",1,192,4.17.0.2,2020-03-12 08:17:34,We do offer this option as part of the Advance...,2020-03-15 06:20:13,most_relevant,com.anydo
4,Scott Prewitt,https://lh3.googleusercontent.com/-K-X1-YsVd6U...,Absolutely worthless. This app runs a prohibit...,1,42,4.17.0.2,2020-03-14 17:41:01,We're sorry you feel this way! 90% of the app ...,2020-03-15 23:45:51,most_relevant,com.anydo


In [11]:
df.shape

(15746, 11)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15746 entries, 0 to 15745
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   userName              15746 non-null  object
 1   userImage             15746 non-null  object
 2   content               15746 non-null  object
 3   score                 15746 non-null  int64 
 4   thumbsUpCount         15746 non-null  int64 
 5   reviewCreatedVersion  13533 non-null  object
 6   at                    15746 non-null  object
 7   replyContent          7367 non-null   object
 8   repliedAt             7367 non-null   object
 9   sortOrder             15746 non-null  object
 10  appId                 15746 non-null  object
dtypes: int64(2), object(9)
memory usage: 1.3+ MB


In [13]:
def to_sentiment(rating):
  rating = int(rating)
  if rating <= 2:
    return 0
  elif rating == 3:
    return 1
  else: 
    return 2

df['sentiment'] = df.score.apply(to_sentiment)

## Data Preprocessing

You might already know that Machine Learning models don't work with raw text. You need to convert text to numbers (of some sort). BERT requires even more attention (good one, right?). Here are the requirements: 

- Add special tokens to separate sentences and do classification
- Pass sequences of constant length (introduce padding)
- Create array of 0s (pad token) and 1s (real token) called *attention mask*

The Transformers library provides (you've guessed it) a wide variety of Transformer models (including BERT). It works with TensorFlow and PyTorch! It also includes prebuild tokenizers that do the heavy lifting for us!


In [14]:
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'

In [15]:
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [16]:
sample_txt = 'When was I last outside? I am stuck at home for 2 weeks.'

In [17]:
tokens = tokenizer.tokenize(sample_txt)
token_ids = tokenizer.convert_tokens_to_ids(tokens)

print(f' Sentence: {sample_txt}')
print(f'   Tokens: {tokens}')
print(f'Token IDs: {token_ids}')

 Sentence: When was I last outside? I am stuck at home for 2 weeks.
   Tokens: ['When', 'was', 'I', 'last', 'outside', '?', 'I', 'am', 'stuck', 'at', 'home', 'for', '2', 'weeks', '.']
Token IDs: [1332, 1108, 146, 1314, 1796, 136, 146, 1821, 5342, 1120, 1313, 1111, 123, 2277, 119]


In [18]:
tokenizer.sep_token, tokenizer.sep_token_id

('[SEP]', 102)

In [19]:
tokenizer.cls_token, tokenizer.cls_token_id

('[CLS]', 101)

In [20]:
tokenizer.pad_token, tokenizer.pad_token_id

('[PAD]', 0)

In [21]:
tokenizer.unk_token, tokenizer.unk_token_id

('[UNK]', 100)

In [22]:
#Ignore warnings
import warnings
warnings.filterwarnings('ignore')
encoding = tokenizer.encode_plus(
  sample_txt,
  max_length=32,
  add_special_tokens=True, # Add '[CLS]' and '[SEP]'
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',  # Return PyTorch tensors
)

encoding.keys()

dict_keys(['input_ids', 'attention_mask'])

In [23]:
print(len(encoding['input_ids'][0]))
encoding['input_ids'][0]

32


tensor([ 101, 1332, 1108,  146, 1314, 1796,  136,  146, 1821, 5342, 1120, 1313,
        1111,  123, 2277,  119,  102,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0])

In [24]:
print(len(encoding['attention_mask'][0]))
encoding['attention_mask']

32


tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0]])

In [25]:
tokenizer.convert_ids_to_tokens(encoding['input_ids'][0])

['[CLS]',
 'When',
 'was',
 'I',
 'last',
 'outside',
 '?',
 'I',
 'am',
 'stuck',
 'at',
 'home',
 'for',
 '2',
 'weeks',
 '.',
 '[SEP]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]']

In [26]:
token_lens = []
for txt in df.content:
  tokens = tokenizer.encode(txt, max_length=512)
  token_lens.append(len(tokens))

In [27]:
MAX_LEN = 160

In [28]:
class ReviewDataset(Dataset):
  def __init__(self, reviews, targets, tokenizer, max_len):
    self.reviews = reviews
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  def __len__(self):
    return len(self.reviews)
  def __getitem__(self, item):
    review = str(self.reviews[item])
    target = self.targets[item]
    encoding = self.tokenizer.encode_plus(
      review,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )
    return {
      'review_text': review,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }

In [29]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=RANDOM_SEED)
df_val, df_test = train_test_split(df_test, test_size=0.5, random_state=RANDOM_SEED)

In [30]:
df_train.shape, df_val.shape, df_test.shape

((14171, 12), (787, 12), (788, 12))

In [31]:
def create_data_loader(df, tokenizer, max_len, batch_size):
  ds = ReviewDataset(
    reviews=df.content.to_numpy(),
    targets=df.sentiment.to_numpy(),
    tokenizer=tokenizer,
    max_len=max_len
  )

  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=4
  )

In [32]:
BATCH_SIZE = 16

train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)

In [33]:
data = next(iter(train_data_loader))
data.keys()

dict_keys(['review_text', 'input_ids', 'attention_mask', 'targets'])

In [34]:
data['review_text']

["You can't have your to do list as notifications",
 "I haven't been using this very long and it's not as user friendly as the Calender that came on my last phone.  I wish instead of dots you can just see what you have entered for that day immediately without tapping on that day.  Otherwise it is easy to use and functional.",
 '1. Кривой и неработающий экспорт и импорт. 2. Нельзя удалить базовые навыки, заданий и характеристики. 3. Нельзя удалять сразу несколько навыков, заданий и характеристик. 4. Кнопка отчислить данные не удаляет всё, а только откатывает к базовым. Спустя пару месяцев снова установил приложение. Загрузил с автосохронерия данных - загрузилось всё базовое и с диска тоже самое (настройки тоже). Проверял после установки изменяя данные и сохраняя их потом. Итог: после загрузки зброс к базовым.',
 'Love it so far. Wish it had a widget for Android though.',
 "Liked the prioritization by dragging and the highlighting, but there does not seem to be a way to create recurring 

In [35]:
data['input_ids']

tensor([[  101,  1192,  1169,  ...,     0,     0,     0],
        [  101,   146,  3983,  ...,     0,     0,     0],
        [  101,   122,   119,  ..., 17424, 28404,   102],
        ...,
        [  101,  2543,  3014,  ...,     0,     0,     0],
        [  101,  1960,  1614,  ...,     0,     0,     0],
        [  101,   138,  8661,  ...,     0,     0,     0]])

In [36]:
data['attention_mask']

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])

In [37]:
data['targets']

tensor([0, 1, 1, 1, 0, 2, 1, 1, 2, 2, 0, 0, 0, 0, 2, 0])

In [38]:
print(data['input_ids'].shape)
print(data['attention_mask'].shape)
print(data['targets'].shape)

torch.Size([16, 160])
torch.Size([16, 160])
torch.Size([16])


In [41]:
bert_model =BertModel.from_pretrained("bert-base-cased")

In [43]:
last_hidden_state, pooled_output = bert_model(input_ids=encoding['input_ids'], attention_mask=encoding['attention_mask'])

In [44]:
last_hidden_state.shape

torch.Size([1, 32, 768])

In [45]:
bert_model.config.hidden_size

768

In [46]:
pooled_output.shape

torch.Size([1, 768])

In [47]:
class SentimentClassifier(nn.Module):

  def __init__(self, n_classes):
    super(SentimentClassifier, self).__init__()
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = nn.Dropout(p=0.3)
    self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
  
  def forward(self, input_ids, attention_mask):
    _, pooled_output = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask
    )
    output = self.drop(pooled_output)
    return self.out(output)

In [48]:
#Ignore warnings
import warnings
warnings.filterwarnings('ignore')
class_names = ['negative', 'neutral', 'positive']
model = SentimentClassifier(len(class_names))
model = model.to(device)

In [49]:
input_ids = data['input_ids'].to(device)
attention_mask = data['attention_mask'].to(device)

print(input_ids.shape) # batch size x seq length
print(attention_mask.shape) # batch size x seq length

torch.Size([16, 160])
torch.Size([16, 160])


In [50]:
input_ids


tensor([[  101,  1192,  1169,  ...,     0,     0,     0],
        [  101,   146,  3983,  ...,     0,     0,     0],
        [  101,   122,   119,  ..., 17424, 28404,   102],
        ...,
        [  101,  2543,  3014,  ...,     0,     0,     0],
        [  101,  1960,  1614,  ...,     0,     0,     0],
        [  101,   138,  8661,  ...,     0,     0,     0]], device='cuda:0')

In [52]:
import torch.nn.functional as F
F.softmax(model(input_ids, attention_mask), dim=1)

tensor([[0.3009, 0.3976, 0.3015],
        [0.1419, 0.5635, 0.2946],
        [0.0952, 0.6887, 0.2162],
        [0.1942, 0.5287, 0.2772],
        [0.3200, 0.4220, 0.2579],
        [0.1822, 0.5380, 0.2799],
        [0.1258, 0.6787, 0.1955],
        [0.1466, 0.6336, 0.2198],
        [0.2893, 0.4409, 0.2698],
        [0.1859, 0.5751, 0.2390],
        [0.1739, 0.5150, 0.3111],
        [0.2419, 0.4852, 0.2728],
        [0.2253, 0.6274, 0.1473],
        [0.1541, 0.6173, 0.2286],
        [0.1841, 0.5939, 0.2220],
        [0.2074, 0.4421, 0.3505]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [53]:
EPOCHS = 10
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=0,
  num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)

In [54]:
def train_epoch(
  model,
  data_loader,
  loss_fn,
  optimizer,
  device,
  scheduler,
  n_examples
):
  model = model.train()
  losses = []
  correct_predictions = 0
  for d in data_loader:
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)
    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask
    )
    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)
    correct_predictions += torch.sum(preds == targets)
    losses.append(loss.item())
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
  return correct_predictions.double() / n_examples, np.mean(losses)

In [56]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)
      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask
      )
      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)
      correct_predictions += torch.sum(preds == targets)
      losses.append(loss.item())
  return correct_predictions.double() / n_examples, np.mean(losses)

In [57]:
history = defaultdict(list)
best_accuracy = 0
for epoch in range(EPOCHS):
  print(f'Epoch {epoch + 1}/{EPOCHS}')
  print('-' * 10)
  train_acc, train_loss = train_epoch(
    model,
    train_data_loader,
    loss_fn,
    optimizer,
    device,
    scheduler,
    len(df_train)
  )
  print(f'Train loss {train_loss} accuracy {train_acc}')
  val_acc, val_loss = eval_model(
    model,
    val_data_loader,
    loss_fn,
    device,
    len(df_val)
  )
  print(f'Val   loss {val_loss} accuracy {val_acc}')
  print()
  history['train_acc'].append(train_acc)
  history['train_loss'].append(train_loss)
  history['val_acc'].append(val_acc)
  history['val_loss'].append(val_loss)
  if val_acc > best_accuracy:
    torch.save(model.state_dict(), 'best_model_state.bin')
    best_accuracy = val_acc

Epoch 1/10
----------
Train loss 0.722731132875985 accuracy 0.6724296097664244
Val   loss 0.5706080323457718 accuracy 0.770012706480305

Epoch 2/10
----------
Train loss 0.3984482690700357 accuracy 0.8473643356149884
Val   loss 0.5139238685369492 accuracy 0.8487928843710293

Epoch 3/10
----------
Train loss 0.21709875583687574 accuracy 0.9290099498976784
Val   loss 0.6756102313753217 accuracy 0.8564167725540026

Epoch 4/10
----------
Train loss 0.1476871773139253 accuracy 0.9561781102251077
Val   loss 0.669687732605962 accuracy 0.8716645489199493

Epoch 5/10
----------
Train loss 0.10546839814204785 accuracy 0.9704325735657329
Val   loss 0.7881995533849113 accuracy 0.8653113087674714

Epoch 6/10
----------
Train loss 0.0829648284347855 accuracy 0.9773481052854421
Val   loss 0.7649704495767946 accuracy 0.8805590851334181

Epoch 7/10
----------
Train loss 0.060116252481596306 accuracy 0.9818643708983135
Val   loss 0.8171172561001732 accuracy 0.8767471410419314

Epoch 8/10
----------
Trai

In [58]:
test_acc, _ = eval_model(
  model,
  test_data_loader,
  loss_fn,
  device,
  len(df_test)
)
test_acc.item()
0.883248730964467

0.883248730964467

In [59]:
def get_predictions(model, data_loader):
  model = model.eval()
  review_texts = []
  predictions = []
  prediction_probs = []
  real_values = []
  with torch.no_grad():
    for d in data_loader:
      texts = d["review_text"]
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)
      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask
      )
      _, preds = torch.max(outputs, dim=1)
      review_texts.extend(texts)
      predictions.extend(preds)
      prediction_probs.extend(outputs)
      real_values.extend(targets)
  predictions = torch.stack(predictions).cpu()
  prediction_probs = torch.stack(prediction_probs).cpu()
  real_values = torch.stack(real_values).cpu()
  return review_texts, predictions, prediction_probs, real_values

In [60]:
y_review_texts, y_pred, y_pred_probs, y_test = get_predictions(
  model,
  test_data_loader
)

In [61]:
print(classification_report(y_test, y_pred, target_names=class_names))

              precision    recall  f1-score   support

    negative       0.90      0.85      0.87       245
     neutral       0.82      0.87      0.84       254
    positive       0.92      0.92      0.92       289

    accuracy                           0.88       788
   macro avg       0.88      0.88      0.88       788
weighted avg       0.88      0.88      0.88       788



In [62]:
idx = 2
review_text = y_review_texts[idx]
true_sentiment = y_test[idx]
pred_df = pd.DataFrame({
  'class_names': class_names,
  'values': y_pred_probs[idx]
})

In [63]:
review_text = "I love completing my todos! Best app ever!!!"

In [64]:
encoded_review = tokenizer.encode_plus(
  review_text,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [65]:
input_ids = encoded_review['input_ids'].to(device)
attention_mask = encoded_review['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {review_text}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: I love completing my todos! Best app ever!!!
Sentiment  : positive


In [75]:
data2='Absolute must see documentary for anyone interested in getting to the bottom of this story. Told with unflinching eye and with gripping style. If you think conspiracy theories are for paranoid disturbed people, this could change your mind. Something for you feds too: A good model for government coverups! If you like your news all tidy and easy to consume this is not for you.'

In [78]:
data3="I have decided to not believe what famous movie critics say. Even though this movie did not get the best comments, this movie made my day. It got me thinking. What a false world this is.<br /><br />What do you do when your most loved ones deceive you. It's said that no matter how often you feed milk to a snake, it can never be loyal and will bite when given a chance. Same way some people are such that they are never grateful. This movie is about how selfish people can be and how everyone is ultimately just thinking about oneself and working for oneself. <br /><br />A brother dies inadvertently at the hands of a gangster. The surviving brother decides to take revenge. Through this process, we learn about the futility of this world. Nothing is real and no one is loyal to anyone.<br /><br />Amitabh gave the performance of his life. The new actor Aryan gave a good performance. The actress who played the wife of Amitabh stole the show. Her role was small but she portrayed her role so diligently that one is moved by her performance. Chawla had really great face expressions but her role was very limited and was not given a chance to fully express herself.<br /><br />A great movie by Raj Kumar Santoshi. His movies always give some message to the audience. His movies are like novels of Nanak Singh (a Punjabi novelist who's novels always had a purpose and targeted a social evil) because they have a real message for the audience. They are entertaining as well as lesson-giving."

In [77]:
data4='After a very scary, crude opening which gives you that creepy "Chainsaw massacre"-feeling, everything falls apart.<br /><br />SPOILER ALERT: As soon as the two FBI-officers start jabbing, you know they are the real killers. Anyone who have seen enough of these "fooled-ya"-movies can figure this out.<br /><br />This movie is mader with one thing in mind: To depict brutal murders. Why, then, is not the little girl tortured and murdered as well? Will this be next for us movie-goers? The torture and abuse of children? Whats wrong with you people? Lynch is truly has a disgusting, ugly mind.'


In [80]:
data5="I can't say much about this film. I think it speaks for itself (as do the current ratings on here). I rented this about two years ago and I totally regretted it. I even /tried/ to like it by watching it twice, but I just couldn't. I can safely say that I have absolutely no desire to see this waste of time ever, ever again. And I'm not one to trash a movie, but I truly believe this was awful. It wasn't even funny in the slightest. The only bits I enjoyed were the few scenes with Christopher Walken in them. I think this film ruined both Jack Black and Ben Stiller for me. All I can think of when I see one of their films now-a-days is this terrible movie, and it reminds me not to waste my money. Amy Poehler is so very annoying, too.<br /><br />Overall, well, I think you get my point. The stars are for Walken, by the way."

In [82]:
data6="I am not so old that I can't remember laughing at Bobcat Goldthwait a couple times. But some where in all his years of drug abuse he lost his sense of humor as well as his brain cells.<br /><br />From the moment this film opens you can have no sympathy nor empathy for the female lead. Neither will you find anything remotely funny after hearing the opening line. Goldthwait obviously hates himself so much that he needs to degrade in order to feel better- even if it is his own imaginary characters he degrades. <br /><br />If you ever saw Shakes the Clown you know how unfunny Bobcat was 15 years ago...this movie is worse. It was not even funny by accident It is sad, pathetic and a total waste of time. May Goldthwaits' hands be rendered paralyzed so he can not write another script. Strike his tongue so he can not dictate another unfunny scene. He is sad and pathetic and needs to make room for a new talent dying to get into Hollywood"


In [85]:
data7='In New Orleans, an illegal immigrant feels sick and leaves a poker game while winning the smalltime criminal Blackie (Walter Jack Palance). He is chased by Blackie and his men Raymond Fitch (Zero Mostel) and Poldi (Guy Thomajan), killed by Blackie and his body is dumped in the sea. During the autopsy, the family man Lieutenant Commander Dr. Clinton Reed (Richard Widmark) of the U.S. Public Health Service finds that the dead man had pneumonic plague caused by rats and he needs to find who had any type of contact with the man within forty-eight hours to avoid an epidemic. The City Mayor assigns the skeptical Captain Tom Warren (Paul Douglas) to help Dr. Clint to find the killers that are infected with the plague and inoculate them.<br /><br />"Panic in the Streets" discloses a simple story, but it is still effective and with a great villain. The engaging plot has not become dated after fifty-seven years. Jack Palance performs a despicable scum in his debut, and the camera work while he tries to escape with Zero Mostel is still very impressive.'

In [87]:
data8="The box is why I originally picked up this movie and the back is why I rented it. But I soon learned that I had been duped. I had thought this movie would be something like a Road Trip/Eurotrip/American Pie deal. But I was wrong. This movie is one of the dumbest I've seen in a long time. The unrated version teases you in to watching but will completely disappoint you. The acting was terrible and sound effects just gaudy. It appeared very low budget with the entire setting taking place in the same building. Go out and get Eurotrip or Road Trip instead. I can't believe National Lampoon put its name on this. DON'T BUY IT, DON'T RENT IT. Don't waste 2 hours of your life on this."


In [89]:
data9="There have been many documentaries that I have seen in which it appeared that the law was on the wrong side of the fence - The Thin Blue Line and Paradise Lost come to mind first and foremost. But this is the first film that had me seething with anger after I saw it. It seems blatantly clear to me from the evidence presented in this film that what happened at Waco was at the very least an unprofessional and sloppy mess on the part of the FBI and AFI, and at the very worst an act of murder. Like most people, when the siege at Waco was occurring I assumed that David Koresh was a completely evil madman who was leading a violent cult. After seeing this, I think that Koresh was more likely a slightly unbalanced and confused guy who inadvertently caught the attention of the U.S. government through his eccentric actions. Sure, there were lots of weapons at the Branch Davidian compound. But none of it was illegal. It was absolutely heartbreaking to see the video footage of the people inside the compound, all of them seeming to be very nice and harmless. And it was angering to see the callous testimony of the men in charge of the government forces on the Waco site, the clueless testimony of Janet Reno, and the partisan defense of the attack on Waco, a defense led by a few of the committee Democrats. Standing out most in my mind was NY representative and current U.S. senator from NY Charles Schumer. I voted for the man when I lived in NY state - I'm a Democrat, pretty left-leaning too. After seeing his actions on this committee, I wish I could go back in time and vote for D'Amato instead! For anyone remotely interested in the government, this is a very crucial film, a must see. I even think this should be shown in classes - it's that important."

In [90]:
data10="I saw this movie last night and thought it was decent. It has it's moments I guess you would say. Some of the scenes with the special ops forces were cool, and some of the location shots were very authentic. I won't be putting this movie in my DVD collection but it is fair enough to recommend for renting. I guess nothing set the movie at another level compared to others of the same genre. The action is good, the acting is decent, the women are extremely seductive and exotic in my opinion, and the story is pretty interesting. 7 out of ten"

In [92]:
data11="Yes, I call this a perfect movie. Not one boring second, a fantastic cast of mostly little known actresses and actors, a great array of characters who are all well defined and who all have understandable motives I could sympathize with, perfect lighting, crisp black and white photography, a fitting soundtrack, an intelligent and harmonious set design and a story that is engaging and works. It's one of those prime quality pictures on which all the pride of Hollywood should rest, the mark everyone should endeavor to reach.<br /><br />Barbara Stanwyck is simply stunning. There was nothing this actress couldn't do, and she always went easy on the melodramatic side. No hysterical outbursts with this lady - I always thought she was a better actress than screen goddesses like Bette Davis or Joan Crawford, and this movie confirmed my opinion. Always as tough as nails and at the same time conveying true sentiments. It is fair to add that she also got many good parts during her long career, and this one is by far the least interesting.<br /><br />The title fits this movie very well. It is about desires, human desires I think everyone can understand. Actually, no one seems to be scheming in this movie, all characters act on impulse, everybody wants to be happy without hurting anybody else. The sad fact that this more often than not leads to complications makes for the dramatic content into which I will not go here.<br /><br />I liked what this movie has to say about youth, about maturing and about the necessity to compromise. The movie I associate most with this one is Alfred Hitchcock's Shadow of a Doubt, it creates a similar atmosphere of idealized and at the same time caricatured Small Town America. The story has a certain similarity with Fritz Lang's considerably harsher movie Clash by Night, made one year earlier, where Stanywck stars in a similar part. I can also recommend it."


In [97]:
data12="I would like to know if anyone know how I can get a copy of the movie,  It's been about 30 years since I've seen this movie, and I would like to see it again. Earth Wind & Fire transcend the nation globally with their inspirational music and themes. It was unfortunate that this group didn't take off like their counterparts in the early 70's, but as previously stated, racial tension existed in the United States which prohibited equalized exposure for the African American musical groups. It is good to see that Earth Wind & Fire continuing their success. I would like to add this movie to my collection. Someone please help me if possible. Thank you for your attention."


In [100]:
data13="I saw the movie Hoot and then I immediately decided to comment it. The truth is that NATURE needs protection from us because we are the dominant specie of this planet. Some people think that if they have money, they can do whatever they want to, which probably is like, but if they think about the future more then they think about themselves they would do something useful! This movie is not just about kids, this movie is showing us that the kids are usually the ones that care more about it then the adults do. When I was twelve, I saw some waterlilies and I knew they are protected by law and didn't even dare to touch them not fearing of the law, but fearing that I might harm them actually. (I am currently 15) What so ever, the acting was great, the 3 main characters are well interpreted and we all have to learn from them. I hope you all think about what you saw in that movie!!! and Enjoy!"


In [101]:
data14='I had some reservations about this movie, I figured it would be the usual bill of fare --- a formula movie about Christmas. Being in the middle of a heat wave in late June, we decided to give it a shot anyway, maybe we would see some snow.<br /><br />This movie turned out to be one laugh after another. Ben Affleck was believable in his character, but the real star of this one is James Gandofini. He delivered his lines with a real wit about him and made a great "dad".<br /><br />If you want to have an enjoyable couple of hours, definitely check this one out.'


In [103]:
data15="When I first saw this film around 6 months ago, I considered it interesting, but little more. But it stuck with me. That interest grew and grew, and I wondered whether my initial boredom and response had more to do with the actual VHS quality rather than the film itself. I purchased the Criterion DVD box set, and it turns out that I was right the second time. Alexander Nevsky is a great film. It is rousing, and I'm sure it succeeded in its main aim: propaganda against the Germans.<br /><br />That is the most common criticism against this film, and against Eisenstein, that it is merely propagandist and nothing else. It's untrue. He is an amazing film artist, one of the most important whoever lived. By now, the world is far enough beyond Joseph Stalin to be able to watch Eisenstein's films as art."


In [130]:
encoded_review1 = tokenizer.encode_plus(
  data2,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [131]:
encoded_review2 = tokenizer.encode_plus(
  data3,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [132]:
encoded_review3 = tokenizer.encode_plus(
  data4,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [133]:
encoded_review4 = tokenizer.encode_plus(
  data5,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [134]:
encoded_review5 = tokenizer.encode_plus(
  data6,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [135]:
encoded_review6 = tokenizer.encode_plus(
  data7,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [136]:
encoded_review7 = tokenizer.encode_plus(
  data8,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [137]:
encoded_review8 = tokenizer.encode_plus(
  data9,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [138]:
encoded_review9 = tokenizer.encode_plus(
  data10,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [139]:
encoded_review10 = tokenizer.encode_plus(
  data11,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

In [140]:
input_ids = encoded_review1['input_ids'].to(device)
attention_mask = encoded_review1['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data2}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: Absolute must see documentary for anyone interested in getting to the bottom of this story. Told with unflinching eye and with gripping style. If you think conspiracy theories are for paranoid disturbed people, this could change your mind. Something for you feds too: A good model for government coverups! If you like your news all tidy and easy to consume this is not for you.
Sentiment  : neutral


In [141]:
input_ids = encoded_review2['input_ids'].to(device)
attention_mask = encoded_review2['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data3}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: I have decided to not believe what famous movie critics say. Even though this movie did not get the best comments, this movie made my day. It got me thinking. What a false world this is.<br /><br />What do you do when your most loved ones deceive you. It's said that no matter how often you feed milk to a snake, it can never be loyal and will bite when given a chance. Same way some people are such that they are never grateful. This movie is about how selfish people can be and how everyone is ultimately just thinking about oneself and working for oneself. <br /><br />A brother dies inadvertently at the hands of a gangster. The surviving brother decides to take revenge. Through this process, we learn about the futility of this world. Nothing is real and no one is loyal to anyone.<br /><br />Amitabh gave the performance of his life. The new actor Aryan gave a good performance. The actress who played the wife of Amitabh stole the show. Her role was small but she portrayed her r

In [142]:
input_ids = encoded_review3['input_ids'].to(device)
attention_mask = encoded_review3['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data4}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: After a very scary, crude opening which gives you that creepy "Chainsaw massacre"-feeling, everything falls apart.<br /><br />SPOILER ALERT: As soon as the two FBI-officers start jabbing, you know they are the real killers. Anyone who have seen enough of these "fooled-ya"-movies can figure this out.<br /><br />This movie is mader with one thing in mind: To depict brutal murders. Why, then, is not the little girl tortured and murdered as well? Will this be next for us movie-goers? The torture and abuse of children? Whats wrong with you people? Lynch is truly has a disgusting, ugly mind.
Sentiment  : negative


In [143]:
input_ids = encoded_review4['input_ids'].to(device)
attention_mask = encoded_review4['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data5}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: I can't say much about this film. I think it speaks for itself (as do the current ratings on here). I rented this about two years ago and I totally regretted it. I even /tried/ to like it by watching it twice, but I just couldn't. I can safely say that I have absolutely no desire to see this waste of time ever, ever again. And I'm not one to trash a movie, but I truly believe this was awful. It wasn't even funny in the slightest. The only bits I enjoyed were the few scenes with Christopher Walken in them. I think this film ruined both Jack Black and Ben Stiller for me. All I can think of when I see one of their films now-a-days is this terrible movie, and it reminds me not to waste my money. Amy Poehler is so very annoying, too.<br /><br />Overall, well, I think you get my point. The stars are for Walken, by the way.
Sentiment  : negative


In [144]:
input_ids = encoded_review5['input_ids'].to(device)
attention_mask = encoded_review5['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data6}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: I am not so old that I can't remember laughing at Bobcat Goldthwait a couple times. But some where in all his years of drug abuse he lost his sense of humor as well as his brain cells.<br /><br />From the moment this film opens you can have no sympathy nor empathy for the female lead. Neither will you find anything remotely funny after hearing the opening line. Goldthwait obviously hates himself so much that he needs to degrade in order to feel better- even if it is his own imaginary characters he degrades. <br /><br />If you ever saw Shakes the Clown you know how unfunny Bobcat was 15 years ago...this movie is worse. It was not even funny by accident It is sad, pathetic and a total waste of time. May Goldthwaits' hands be rendered paralyzed so he can not write another script. Strike his tongue so he can not dictate another unfunny scene. He is sad and pathetic and needs to make room for a new talent dying to get into Hollywood
Sentiment  : negative


In [145]:
input_ids = encoded_review6['input_ids'].to(device)
attention_mask = encoded_review6['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data7}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: In New Orleans, an illegal immigrant feels sick and leaves a poker game while winning the smalltime criminal Blackie (Walter Jack Palance). He is chased by Blackie and his men Raymond Fitch (Zero Mostel) and Poldi (Guy Thomajan), killed by Blackie and his body is dumped in the sea. During the autopsy, the family man Lieutenant Commander Dr. Clinton Reed (Richard Widmark) of the U.S. Public Health Service finds that the dead man had pneumonic plague caused by rats and he needs to find who had any type of contact with the man within forty-eight hours to avoid an epidemic. The City Mayor assigns the skeptical Captain Tom Warren (Paul Douglas) to help Dr. Clint to find the killers that are infected with the plague and inoculate them.<br /><br />"Panic in the Streets" discloses a simple story, but it is still effective and with a great villain. The engaging plot has not become dated after fifty-seven years. Jack Palance performs a despicable scum in his debut, and the camera wo

In [146]:
input_ids = encoded_review7['input_ids'].to(device)
attention_mask = encoded_review7['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data8}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: The box is why I originally picked up this movie and the back is why I rented it. But I soon learned that I had been duped. I had thought this movie would be something like a Road Trip/Eurotrip/American Pie deal. But I was wrong. This movie is one of the dumbest I've seen in a long time. The unrated version teases you in to watching but will completely disappoint you. The acting was terrible and sound effects just gaudy. It appeared very low budget with the entire setting taking place in the same building. Go out and get Eurotrip or Road Trip instead. I can't believe National Lampoon put its name on this. DON'T BUY IT, DON'T RENT IT. Don't waste 2 hours of your life on this.
Sentiment  : negative


In [147]:
input_ids = encoded_review8['input_ids'].to(device)
attention_mask = encoded_review8['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data9}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: There have been many documentaries that I have seen in which it appeared that the law was on the wrong side of the fence - The Thin Blue Line and Paradise Lost come to mind first and foremost. But this is the first film that had me seething with anger after I saw it. It seems blatantly clear to me from the evidence presented in this film that what happened at Waco was at the very least an unprofessional and sloppy mess on the part of the FBI and AFI, and at the very worst an act of murder. Like most people, when the siege at Waco was occurring I assumed that David Koresh was a completely evil madman who was leading a violent cult. After seeing this, I think that Koresh was more likely a slightly unbalanced and confused guy who inadvertently caught the attention of the U.S. government through his eccentric actions. Sure, there were lots of weapons at the Branch Davidian compound. But none of it was illegal. It was absolutely heartbreaking to see the video footage of the peo

In [148]:
input_ids = encoded_review9['input_ids'].to(device)
attention_mask = encoded_review9['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data10}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: I saw this movie last night and thought it was decent. It has it's moments I guess you would say. Some of the scenes with the special ops forces were cool, and some of the location shots were very authentic. I won't be putting this movie in my DVD collection but it is fair enough to recommend for renting. I guess nothing set the movie at another level compared to others of the same genre. The action is good, the acting is decent, the women are extremely seductive and exotic in my opinion, and the story is pretty interesting. 7 out of ten
Sentiment  : positive


In [149]:
input_ids = encoded_review10['input_ids'].to(device)
attention_mask = encoded_review10['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'Review text: {data11}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: Yes, I call this a perfect movie. Not one boring second, a fantastic cast of mostly little known actresses and actors, a great array of characters who are all well defined and who all have understandable motives I could sympathize with, perfect lighting, crisp black and white photography, a fitting soundtrack, an intelligent and harmonious set design and a story that is engaging and works. It's one of those prime quality pictures on which all the pride of Hollywood should rest, the mark everyone should endeavor to reach.<br /><br />Barbara Stanwyck is simply stunning. There was nothing this actress couldn't do, and she always went easy on the melodramatic side. No hysterical outbursts with this lady - I always thought she was a better actress than screen goddesses like Bette Davis or Joan Crawford, and this movie confirmed my opinion. Always as tough as nails and at the same time conveying true sentiments. It is fair to add that she also got many good parts during her long