In [None]:
!pip install transformers
!pip install sentencepiece

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 4.0MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 39.4MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 36.7MB/s 
Installing collected packages: sacremoses, tokenizers, transformers
Successfully installed sacremoses-0.0.45 tokenizers-0.10.2 transformers-4.5.1
Collecting sentencepiece
[?25l  Downloading https://files.p

In [None]:
import torch
#torch.cuda.empty_cache()
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences

from transformers import XLNetModel, XLNetTokenizer, XLNetForSequenceClassification
from transformers import AdamW

import json
import numpy as np

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)
print(device)
print(torch.cuda.get_device_name(0))

cuda
Tesla P100-PCIE-16GB


In [None]:
from google.colab import drive
drive.mount('/content/drive')
root_folder = "/content/drive/My Drive/CS182-Spring2020-NLP-Project/"

Mounted at /content/drive


In [None]:
data = []
with open(root_folder + 'dataset/training_data.jsonl', 'r') as file:
    data = [json.loads(jline) for jline in file.read().splitlines()]

In [None]:
labels = [int(review["stars"])-1 for review in data]

In [None]:
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=False, remove_space=False)
data = [tokenizer.tokenize(review["text"] + " [SEP] [CLS]") for review in data]
print ("Tokenize the first sentence:")
print (data[0])

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=798011.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1382015.0, style=ProgressStyle(descript…


Tokenize the first sentence:
['▁total', '▁bill', '▁horrible', '▁service', '▁over', '▁8', 'gs', '▁these', '▁crook', 's', '▁actually', '▁nerve', '▁charge', '▁us', '▁69', '▁3', '▁pills', '▁', 'i', '▁checked', '▁online', '▁pills', '▁19', '▁cents', '▁each', '▁avoid', '▁hospital', '▁', 'ers', '▁costs', '▁[', 'S', 'EP', ']', '▁[', 'CL', 'S', ']']


In [None]:
MAX_LEN = 512

In [None]:
data = [tokenizer.convert_tokens_to_ids(x) for x in data]

In [None]:
data = pad_sequences(data, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

In [None]:
attention_masks = []

for seq in data:
  seq_mask = [float(i>0) for i in seq]
  attention_masks.append(seq_mask)

In [None]:
data = torch.tensor(data)
labels = torch.tensor(labels)
attention_masks = torch.tensor(attention_masks)

In [None]:
batch_size = 8

train_data = TensorDataset(data, attention_masks, labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [None]:
# model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=5)
model = XLNetForSequenceClassification.from_pretrained("/content/drive/My Drive/XLNet_lr_76", num_labels=5)
model.cuda()

XLNetForSequenceClassification(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e

In [None]:
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

In [None]:
optimizer = AdamW(optimizer_grouped_parameters,
                     lr=7e-6)

In [None]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [None]:
epochs = 4

model.train()

for _ in trange(epochs, desc="Epoch"):
  
  tr_loss = 0
  nb_tr_examples, nb_tr_steps = 0, 0
  
  for step, batch in enumerate(train_dataloader):
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_labels = batch

    optimizer.zero_grad()
    outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
    loss = outputs[0]
    logits = outputs[1]

    loss.backward()
    optimizer.step()
    
    
    tr_loss += loss.item()
    nb_tr_examples += b_input_ids.size(0)
    nb_tr_steps += 1

    if step % 100 == 0:
      print(step, "\t", "Train loss: {}".format(tr_loss/nb_tr_steps))
    if step % 1000 == 0:
      model.save_pretrained("/content/drive/My Drive/XLNet_lr_76")

  model.save_pretrained("/content/drive/My Drive/XLNet_lr_76")

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

0 	 Train loss: 0.3639225959777832
100 	 Train loss: 0.3834574612060396
200 	 Train loss: 0.4155336827484529
300 	 Train loss: 0.41860145924495307
400 	 Train loss: 0.4321881761760486
500 	 Train loss: 0.43872813296740404
600 	 Train loss: 0.4378790036835607
700 	 Train loss: 0.43690837102380187
800 	 Train loss: 0.44020770846885837
900 	 Train loss: 0.4367081239575584
1000 	 Train loss: 0.4324252548833172
1100 	 Train loss: 0.43128606210463594
1200 	 Train loss: 0.43025648321872645
1300 	 Train loss: 0.43021742834335197
1400 	 Train loss: 0.43186480438949637
1500 	 Train loss: 0.4326938718512684
1600 	 Train loss: 0.4342515572142333
1700 	 Train loss: 0.43561069129898855
1800 	 Train loss: 0.4386738298676995
1900 	 Train loss: 0.4383193216884782
2000 	 Train loss: 0.43959688902392735
2100 	 Train loss: 0.4386522833688778
2200 	 Train loss: 0.43867674048335
2300 	 Train loss: 0.4391129840082808
2400 	 Train loss: 0.4361605055262488
2500 	 Train loss: 0.43793770097090345
2600 	 Train lo