In [1]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Sun Apr 25 05:51:47 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P0    43W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!pip install transformers



In [4]:
import tensorflow as tf

# get the GPU device name
device_name = tf.test.gpu_device_name()

if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


In [6]:
import torch

if torch.cuda.is_available():    

    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('The GPU will be used:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
The GPU will be used: Tesla P100-PCIE-16GB


In [7]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader,SequentialSampler,RandomSampler,TensorDataset,random_split
import os
%matplotlib inline
sns.set(color_codes=True)

In [8]:
#import training data from gdrive
from google.colab import drive
import os
drive.mount("/content/drive")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [47]:
data = pd.read_csv('/content/drive/My Drive/training.1600000.processed.noemoticon.csv',  encoding = 'latin-1', header=None)
print(data.shape)
data = data[[0, 5]]
data.columns = ['Sentiment', 'Tweet']
print(data.head())


(1600000, 6)
   Sentiment                                              Tweet
0          0  @switchfoot http://twitpic.com/2y1zl - Awww, t...
1          0  is upset that he can't update his Facebook by ...
2          0  @Kenichan I dived many times for the ball. Man...
3          0    my whole body feels itchy and like its on fire 
4          0  @nationwideclass no, it's not behaving at all....


In [48]:
# 0 as negative, 1 as positive
data.loc[data["Sentiment"] == 4, "Sentiment"] = 1
print('Number of training tweets: {:,}\n'.format(data.shape[0]))
data.sample(10)

Number of training tweets: 1,600,000



Unnamed: 0,Sentiment,Tweet
630748,0,@DonnieWahlberg gonna see you tomorrow night i...
1566560,1,"i'm in love with my medical card. yes doctor, ..."
580782,0,Tired from volleyball nice nap sounds good bu...
1520901,1,Good morning! Nice to sleep for about 12hrs. ...
499038,0,watching Jon and Kate plus 8. FIVE MORE DAYS! ...
1582331,1,@Ayla_F Oh sweetie that's ALWAYS the way. Once...
194070,0,is amused by all the tweets from @Jason_Manfor...
155720,0,Why do i have to work tomorrow!?
707847,0,@Lark_in_Forks [Text] Sorry
365014,0,@mediatemple No dice on the upgrade - still $2...


In [49]:
# clean the dataset
import re
#remove hashtags
hashtags = re.compile(r"^#\S+|\s#\S+")
#remove @ mentions 
mentions = re.compile(r"^@\S+|\s@\S+")
#remove urls
urls = re.compile(r"https?://\S+")

def text_process(text):
    text = re.sub(r'http\S+', '', text)
    text = hashtags.sub(' hashtag', text)
    text = mentions.sub(' entity', text)
    return text.strip().lower()

In [50]:
data['Tweet'] = data['Tweet'].apply(text_process)
data.head()

Unnamed: 0,Sentiment,Tweet
0,0,"entity - awww, that's a bummer. you shoulda ..."
1,0,is upset that he can't update his facebook by ...
2,0,entity i dived many times for the ball. manage...
3,0,my whole body feels itchy and like its on fire
4,0,"entity no, it's not behaving at all. i'm mad. ..."


In [51]:
labels = data['Sentiment'].values
text = data['Tweet'].values

In [52]:
# import BertTokenizer, convert text into tokens corresponding to BERT lib
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [53]:
print(' Original: ', text[0])
print()
print('Tokenized: ', tokenizer.tokenize(text[0]))
print()
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text[0])))

 Original:  entity  - awww, that's a bummer.  you shoulda got david carr of third day to do it. ;d

Tokenized:  ['entity', '-', 'aw', '##w', '##w', ',', 'that', "'", 's', 'a', 'bum', '##mer', '.', 'you', 'should', '##a', 'got', 'david', 'carr', 'of', 'third', 'day', 'to', 'do', 'it', '.', ';', 'd']

Token IDs:  [9178, 1011, 22091, 2860, 2860, 1010, 2008, 1005, 1055, 1037, 26352, 5017, 1012, 2017, 2323, 2050, 2288, 2585, 12385, 1997, 2353, 2154, 2000, 2079, 2009, 1012, 1025, 1040]


In [None]:

# to decide which max_length we are gonna use for padding/truncating
# run one full tokenization pass to measure the maximum tweet length
max_len = 0

for i in text:
    # Tokenize the text and add `[CLS]` and `[SEP]` tokens
    input_ids = tokenizer.encode(i, add_special_tokens=True)
    # Update the max length
    max_len = max(max_len, len(input_ids))

print('Max tweet length: ', max_len)


In [17]:
# Tokenize all the texts and map the tokens to their word IDs.
input_ids = []
attention_masks = []

for i in text:

    encoded_dict = tokenizer.encode_plus(
                        i,                        
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 256,          # Pad & truncate all tweets
                        pad_to_max_length = True,
                        return_attention_mask = True, # Construct attention masks
                        return_tensors = 'pt',     
                   )
    
    # Add the encoded tweet to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # Add attention mask 
    attention_masks.append(encoded_dict['attention_mask'])

# Convert lists into tensors
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Print a tweet as a list of IDs.
print('Original: ', text[0])
print('Token IDs:', input_ids[0])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Original:  entity  - awww, that's a bummer.  you shoulda got david carr of third day to do it. ;d
Token IDs: tensor([  101,  9178,  1011, 22091,  2860,  2860,  1010,  2008,  1005,  1055,
         1037, 26352,  5017,  1012,  2017,  2323,  2050,  2288,  2585, 12385,
         1997,  2353,  2154,  2000,  2079,  2009,  1012,  1025,  1040,   102,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     

In [18]:
# Combine the training inputs into a TensorDataset.
dataset = TensorDataset(input_ids, attention_masks, labels)

# Create a 80-20 train-val split
# Number of samples to be included in train/val set
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Divide the dataset by randomly selecting samples.
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

1,280,000 training samples
320,000 validation samples


In [19]:
# batch size for training
# the author of the paper recommend 32 or 64
# we choose 32 here.
batch_size = 32

# Create the DataLoaders for our train/val sets.
train_dataloader = DataLoader(
            train_dataset,  
            sampler = RandomSampler(train_dataset), 
            batch_size = batch_size)

validation_dataloader = DataLoader(
            val_dataset,
            sampler = SequentialSampler(val_dataset), 
            batch_size = batch_size) 

In [20]:
# our classification model - BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained(
        'bert-base-uncased',
        num_labels = 2,
        output_attentions = False,
        output_hidden_states = False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [21]:
# Get all of the model's parameters as a list of tuples.
params = list(model.named_parameters())

print('The BERT model has {:} different named parameters.\n'.format(len(params)))

print('==== Embedding Layer ====\n')

for p in params[0:5]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

print('\n==== First Transformer ====\n')

for p in params[5:21]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

print('\n==== Output Layer ====\n')

for p in params[-4:]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (30522, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

### Optimizer and learning rate

In [22]:
optimizer = AdamW(model.parameters(),
                  lr = 2e-5, 
                  eps = 1e-8, 
                  correct_bias=True)

# Number of training epochs. 
epochs = 2

total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, 
                                            num_training_steps = total_steps)



In [23]:
# Helper Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(device)

cuda


In [25]:
torch.cuda.empty_cache()
torch.cuda.memory_summary(device=None, abbreviated=False)



In [26]:
import random
import numpy as np

# Referring: 'run_glue.py':
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 138
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# We'll store  training and validation loss, validation accuracy, and timings.
training_stats = []

total_t0 = time.time()

for epoch_i in range(0, epochs):
    
    #               Training
    
    # Perform one full epoch over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()
    total_train_loss = 0

    model.train()

    for step, batch in enumerate(train_dataloader):

        if step % 200 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print(' Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        model.zero_grad()        

        result = model(batch[0].to(device), 
                       token_type_ids=None, 
                       attention_mask=batch[1].to(device), 
                       labels=batch[2].to(device),
                       return_dict=True)

        loss = result.loss
        logits = result.logits

        total_train_loss += loss.item()

        loss.backward()

        # Set the norm of the gradients to 1.0 to prevent "exploding gradients"
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        # Update optimizer parameters
        optimizer.step()

        # Update the learning rate
        scheduler.step()

    # Calculate the average loss over all batches.
    avg_train_loss = total_train_loss / len(train_dataloader)            
    
    training_time = format_time(time.time() - t0)

    print("")
    print("Average training loss: {0:.2f}".format(avg_train_loss))
    print("Training epoch took: {:}".format(training_time))
        
    #               Validation
    
    # After training each epoch, measure performance on validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    model.eval()

    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    for batch in validation_dataloader:
     
        with torch.no_grad():        
            result = model(batch[0].to(device), 
                           token_type_ids=None, 
                           attention_mask=batch[1].to(device),
                           labels=batch[2].to(device),
                           return_dict=True)
        loss = result.loss
        logits = result.logits
            
        # Accumulate the validation loss.
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = batch[2].to(device).to('cpu').numpy()
        
        total_eval_accuracy += flat_accuracy(logits, label_ids)
        

    # Average validation accuracy 
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    # Average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    
    # Validation Time
    validation_time = format_time(time.time() - t0)
    
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training completed!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...
 Batch   200  of  40,000.    Elapsed: 0:02:33.
 Batch   400  of  40,000.    Elapsed: 0:05:06.
 Batch   600  of  40,000.    Elapsed: 0:07:39.
 Batch   800  of  40,000.    Elapsed: 0:10:13.
 Batch 1,000  of  40,000.    Elapsed: 0:12:46.
 Batch 1,200  of  40,000.    Elapsed: 0:15:19.
 Batch 1,400  of  40,000.    Elapsed: 0:17:52.
 Batch 1,600  of  40,000.    Elapsed: 0:20:25.
 Batch 1,800  of  40,000.    Elapsed: 0:22:59.
 Batch 2,000  of  40,000.    Elapsed: 0:25:32.
 Batch 2,200  of  40,000.    Elapsed: 0:28:05.
 Batch 2,400  of  40,000.    Elapsed: 0:30:38.
 Batch 2,600  of  40,000.    Elapsed: 0:33:12.
 Batch 2,800  of  40,000.    Elapsed: 0:35:45.
 Batch 3,000  of  40,000.    Elapsed: 0:38:18.
 Batch 3,200  of  40,000.    Elapsed: 0:40:52.
 Batch 3,400  of  40,000.    Elapsed: 0:43:25.
 Batch 3,600  of  40,000.    Elapsed: 0:45:58.
 Batch 3,800  of  40,000.    Elapsed: 0:48:31.
 Batch 4,000  of  40,000.    Elapsed: 0:51:05.
 Batch 4,200  of  40,000.    Elapsed: 0:53:38.


In [41]:
pd.set_option('precision', 2)
stats = pd.DataFrame(data=training_stats)
stats = stats.set_index('epoch')

stats

Unnamed: 0_level_0,Training Loss,Valid. Loss,Valid. Accur.,Training Time,Validation Time
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.33,0.3,0.87,8:31:22,0:42:20
2,0.26,0.3,0.88,8:30:42,0:42:19


### Evaluate on Test Set

In [28]:
# Load the dataset into a pandas dataframe.
test_data = pd.read_csv('/content/drive/My Drive/testdata.manual.2009.06.14.csv',  encoding = 'latin-1', header=None)
print(test_data.shape)
test_data = test_data[[0, 5]]
test_data.columns = ['Sentiment', 'Tweet']
test_data.loc[test_data["Sentiment"] == 4, "Sentiment"] = 1
print(test_data.head())


# Report the number of sentences.
print('Number of test tweets: {:,}\n'.format(test_data.shape[0]))

# preprocess the test data:
# clean the dataset
import re
#remove hashtags
hashtags = re.compile(r"^#\S+|\s#\S+")
#remove @ mentions 
mentions = re.compile(r"^@\S+|\s@\S+")
#remove urls
urls = re.compile(r"https?://\S+")

def text_process(text):
    text = re.sub(r'http\S+', '', text)
    text = hashtags.sub(' hashtag', text)
    text = mentions.sub(' entity', text)
    return text.strip().lower()

test_data['Tweet'] = test_data.Tweet.apply(text_process)
test_data.head() 


# Create text and label lists
labels = test_data['Sentiment'].values
text = test_data['Tweet'].values

# Tokenize all of the sentences and map the tokens to their word IDs.
input_ids = []
attention_masks = []

for i in text:
    encoded_dict = tokenizer.encode_plus(
                        i,                     
                        add_special_tokens = True, 
                        max_length = 256,           
                        pad_to_max_length = True,
                        return_attention_mask = True,   
                        return_tensors = 'pt',     
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Set the batch size.  
batch_size = 32  

# Create the DataLoader.
prediction_data = TensorDataset(input_ids, attention_masks, labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

(498, 6)
   Sentiment                                              Tweet
0          1  @stellargirl I loooooooovvvvvveee my Kindle2. ...
1          1  Reading my kindle2...  Love it... Lee childs i...
2          1  Ok, first assesment of the #kindle2 ...it fuck...
3          1  @kenburbary You'll love your Kindle2. I've had...
4          1  @mikefish  Fair enough. But i have the Kindle2...
Number of test tweets: 498





In [42]:
# Prediction on test set
print('Predicting labels for {:,} test tweets...'.format(len(input_ids)))

model.eval()

predictions , true_labels = [], []

for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  
  with torch.no_grad():
      # Forward pass, calculate logit predictions.
      result = model(batch[0], 
                     token_type_ids=None, 
                     attention_mask=batch[1],
                     return_dict=True)

  logits = result.logits

  # Move logits and labels to CPU
  logits = logits.detach().cpu().numpy()
  label_ids = batch[2].to('cpu').numpy()
  
  # Store predictions and true labels
  predictions.append(logits)
  true_labels.append(label_ids)

print('DONE.')

Predicting labels for 498 test tweets...
DONE.


In [40]:
import os
output_dir = './model_save/'

# Create output directory if needed
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print("Saving model to %s" % output_dir)

model_to_save = model.module if hasattr(model, 'module') else model  
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)


Saving model to ./model_save/


('./model_save/tokenizer_config.json',
 './model_save/special_tokens_map.json',
 './model_save/vocab.txt',
 './model_save/added_tokens.json')

In [31]:
from transformers import BertTokenizer,BertForSequenceClassification
import torch
# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
output_dir = './'
tokenizer = BertTokenizer.from_pretrained(output_dir)
model_loaded = BertForSequenceClassification.from_pretrained(output_dir)

Loading BERT tokenizer...


In [32]:
# explore the model layers
model.state_dict

<bound method Module.state_dict of BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): L

In [33]:
def Sentiment(sent):
    output_dir = './'
    tokenizer = BertTokenizer.from_pretrained(output_dir)
    model_loaded = BertForSequenceClassification.from_pretrained(output_dir)
    encoded_dict = tokenizer.encode_plus(
                        sent, 
                        add_special_tokens = True,
                        max_length = 64,
                        pad_to_max_length = True,
                        return_attention_mask = True,
                        return_tensors = 'pt',
                   )
        
    input_id = encoded_dict['input_ids']

    attention_mask = encoded_dict['attention_mask']
    input_id = torch.LongTensor(input_id)
    attention_mask = torch.LongTensor(attention_mask)

    model_loaded = model_loaded.to(device)
    input_id = input_id.to(device)
    attention_mask = attention_mask.to(device)

    with torch.no_grad():
        outputs = model_loaded(input_id, token_type_ids=None, attention_mask=attention_mask)

    logits = outputs[0]
    index = logits.argmax()
    return index

In [43]:
ans = Sentiment('Baby Yoda is so cuteeee')

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [44]:
if ans == 1:
    print("Positive")
else:
    print("Negative")

Positive
