In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
import torch
print(torch.__version__)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import random
random.seed(0)
import numpy as np
np.random.seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

1.1.0


In [2]:
import re
from tqdm import tqdm
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader

In [3]:
MAX_SEQ_LENGTH = 256
BATCH_SIZE = 16
NUM_EPOCHS = 10
LEARNING_RATE_MODEL = 1e-5
LEARNING_RATE_CLASSIFIER = 1e-3
WARMUP_STEPS = 0
MAX_GRAD_NORM = 1

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model = torch.nn.DataParallel(model)
model.to(device)

DataParallel(
  (module): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
                

In [5]:
def _parse_imdb_line(line):
    line = re.sub(r"<br \/>", " ", line)
    return line


def load_data(path):
    indices, sentiments = [], []
    for folder, sentiment in (('neg', 0), ('pos', 1)):
        folder = os.path.join(path, folder)
        for name in tqdm(os.listdir(folder)):
            with open(os.path.join(folder, name), 'r') as reader:
                  text = _parse_imdb_line(reader.read())
            ids = tokenizer.encode(text, max_length=MAX_SEQ_LENGTH, pad_to_max_length=True)
            indices.append(ids)
            sentiments.append(sentiment)
    return np.array(indices), np.array(sentiments)


train_path = os.path.join("../datasets", 'aclImdb', 'train')
test_path = os.path.join("../datasets", 'aclImdb', 'test')
X_train, y_train = load_data(train_path)
X_test, y_test = load_data(test_path)

100%|██████████| 12500/12500 [00:52<00:00, 240.33it/s]
100%|██████████| 12500/12500 [00:53<00:00, 232.19it/s]
100%|██████████| 12500/12500 [00:51<00:00, 241.02it/s]
100%|██████████| 12500/12500 [00:52<00:00, 238.03it/s]


In [6]:
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [7]:
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
optimizer_grouped_parameters = [
        {"params": model.module.bert.parameters(), "lr": LEARNING_RATE_MODEL},
        {"params": model.module.classifier.parameters(), "lr": LEARNING_RATE_CLASSIFIER}
    ]
optimizer = AdamW(optimizer_grouped_parameters)
scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=WARMUP_STEPS,
                num_training_steps=len(train_loader) * NUM_EPOCHS)
total_step = len(train_loader)
for epoch in range(NUM_EPOCHS):
    model.train()
    for i, (cur_X_train, cur_y_train) in enumerate(train_loader):
        cur_X_train = cur_X_train.to(device)
        cur_y_train = cur_y_train.to(device)
        outputs = model(cur_X_train)
        loss = nn.CrossEntropyLoss()(outputs[0], cur_y_train)
        model.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), MAX_GRAD_NORM)
        optimizer.step()
        scheduler.step()
        if (i + 1) % 50 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch + 1, NUM_EPOCHS, i + 1, total_step, loss.item()))
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for cur_X_test, cur_y_test in tqdm(test_loader):
            cur_X_test = cur_X_test.to(device)
            cur_y_test = cur_y_test.to(device)
            outputs = model(cur_X_test)
            _, predicted = torch.max(outputs[0], 1)
            total += cur_y_test.size(0)
            correct += (predicted == cur_y_test).sum().item()
        print('Accuracy: {} %'.format(100 * correct / total))

Epoch [1/10], Step [50/1563], Loss: 0.6748
Epoch [1/10], Step [100/1563], Loss: 0.3804
Epoch [1/10], Step [150/1563], Loss: 0.1760
Epoch [1/10], Step [200/1563], Loss: 0.0883
Epoch [1/10], Step [250/1563], Loss: 0.0476
Epoch [1/10], Step [300/1563], Loss: 0.1480
Epoch [1/10], Step [350/1563], Loss: 0.3010
Epoch [1/10], Step [400/1563], Loss: 0.1892
Epoch [1/10], Step [450/1563], Loss: 0.3828
Epoch [1/10], Step [500/1563], Loss: 0.2707
Epoch [1/10], Step [550/1563], Loss: 0.1930
Epoch [1/10], Step [600/1563], Loss: 0.0106
Epoch [1/10], Step [650/1563], Loss: 0.1821
Epoch [1/10], Step [700/1563], Loss: 0.4990
Epoch [1/10], Step [750/1563], Loss: 0.2486
Epoch [1/10], Step [800/1563], Loss: 0.2398
Epoch [1/10], Step [850/1563], Loss: 0.3049
Epoch [1/10], Step [900/1563], Loss: 0.3510
Epoch [1/10], Step [950/1563], Loss: 0.3500
Epoch [1/10], Step [1000/1563], Loss: 0.3195
Epoch [1/10], Step [1050/1563], Loss: 0.3566
Epoch [1/10], Step [1100/1563], Loss: 0.1134
Epoch [1/10], Step [1150/1563]

100%|██████████| 1563/1563 [02:51<00:00,  9.12it/s]


Accuracy: 91.712 %
Epoch [2/10], Step [50/1563], Loss: 0.0069
Epoch [2/10], Step [100/1563], Loss: 0.3882
Epoch [2/10], Step [150/1563], Loss: 0.2752
Epoch [2/10], Step [200/1563], Loss: 0.2802
Epoch [2/10], Step [250/1563], Loss: 0.0122
Epoch [2/10], Step [300/1563], Loss: 0.1538
Epoch [2/10], Step [350/1563], Loss: 0.0549
Epoch [2/10], Step [400/1563], Loss: 0.2466
Epoch [2/10], Step [450/1563], Loss: 0.0294
Epoch [2/10], Step [500/1563], Loss: 0.0706
Epoch [2/10], Step [550/1563], Loss: 0.2060
Epoch [2/10], Step [600/1563], Loss: 0.3670
Epoch [2/10], Step [650/1563], Loss: 0.0623
Epoch [2/10], Step [700/1563], Loss: 0.0569
Epoch [2/10], Step [750/1563], Loss: 0.1075
Epoch [2/10], Step [800/1563], Loss: 0.1997
Epoch [2/10], Step [850/1563], Loss: 0.1145
Epoch [2/10], Step [900/1563], Loss: 0.0629
Epoch [2/10], Step [950/1563], Loss: 0.6871
Epoch [2/10], Step [1000/1563], Loss: 0.0057
Epoch [2/10], Step [1050/1563], Loss: 0.9322
Epoch [2/10], Step [1100/1563], Loss: 0.0361
Epoch [2/10

100%|██████████| 1563/1563 [02:51<00:00,  9.13it/s]


Accuracy: 91.984 %
Epoch [3/10], Step [50/1563], Loss: 0.2733
Epoch [3/10], Step [100/1563], Loss: 0.0220
Epoch [3/10], Step [150/1563], Loss: 0.1103
Epoch [3/10], Step [200/1563], Loss: 0.0947
Epoch [3/10], Step [250/1563], Loss: 0.0008
Epoch [3/10], Step [300/1563], Loss: 0.4428
Epoch [3/10], Step [350/1563], Loss: 0.0070
Epoch [3/10], Step [400/1563], Loss: 0.0375
Epoch [3/10], Step [450/1563], Loss: 0.3324
Epoch [3/10], Step [500/1563], Loss: 0.0392
Epoch [3/10], Step [550/1563], Loss: 0.0056
Epoch [3/10], Step [600/1563], Loss: 0.2844
Epoch [3/10], Step [650/1563], Loss: 0.0040
Epoch [3/10], Step [700/1563], Loss: 0.0025
Epoch [3/10], Step [750/1563], Loss: 0.5286
Epoch [3/10], Step [800/1563], Loss: 0.1759
Epoch [3/10], Step [850/1563], Loss: 0.0824
Epoch [3/10], Step [900/1563], Loss: 0.0070
Epoch [3/10], Step [950/1563], Loss: 0.0005
Epoch [3/10], Step [1000/1563], Loss: 0.0022
Epoch [3/10], Step [1050/1563], Loss: 0.3147
Epoch [3/10], Step [1100/1563], Loss: 0.3112
Epoch [3/10

100%|██████████| 1563/1563 [02:51<00:00,  9.14it/s]


Accuracy: 91.9 %
Epoch [4/10], Step [50/1563], Loss: 0.0005
Epoch [4/10], Step [100/1563], Loss: 0.0005
Epoch [4/10], Step [150/1563], Loss: 0.0003
Epoch [4/10], Step [200/1563], Loss: 0.0007
Epoch [4/10], Step [250/1563], Loss: 0.0003
Epoch [4/10], Step [300/1563], Loss: 0.2257
Epoch [4/10], Step [350/1563], Loss: 0.0067
Epoch [4/10], Step [400/1563], Loss: 0.1701
Epoch [4/10], Step [450/1563], Loss: 0.0592
Epoch [4/10], Step [500/1563], Loss: 0.4043
Epoch [4/10], Step [550/1563], Loss: 0.0007
Epoch [4/10], Step [600/1563], Loss: 0.0015
Epoch [4/10], Step [650/1563], Loss: 0.0013
Epoch [4/10], Step [700/1563], Loss: 0.0001
Epoch [4/10], Step [750/1563], Loss: 0.0014
Epoch [4/10], Step [800/1563], Loss: 0.0082
Epoch [4/10], Step [850/1563], Loss: 0.4735
Epoch [4/10], Step [900/1563], Loss: 0.0012
Epoch [4/10], Step [950/1563], Loss: 0.0009
Epoch [4/10], Step [1000/1563], Loss: 0.0024
Epoch [4/10], Step [1050/1563], Loss: 0.0013
Epoch [4/10], Step [1100/1563], Loss: 0.0011
Epoch [4/10],

100%|██████████| 1563/1563 [02:51<00:00,  9.14it/s]


Accuracy: 91.92 %
Epoch [5/10], Step [50/1563], Loss: 0.0021
Epoch [5/10], Step [100/1563], Loss: 0.0004
Epoch [5/10], Step [150/1563], Loss: 0.0007
Epoch [5/10], Step [200/1563], Loss: 0.0091
Epoch [5/10], Step [250/1563], Loss: 0.0008
Epoch [5/10], Step [300/1563], Loss: 0.0006
Epoch [5/10], Step [350/1563], Loss: 0.0002
Epoch [5/10], Step [400/1563], Loss: 0.0010
Epoch [5/10], Step [450/1563], Loss: 0.0001
Epoch [5/10], Step [500/1563], Loss: 0.9529
Epoch [5/10], Step [550/1563], Loss: 0.0042
Epoch [5/10], Step [600/1563], Loss: 0.0001
Epoch [5/10], Step [650/1563], Loss: 0.0005
Epoch [5/10], Step [700/1563], Loss: 0.0002
Epoch [5/10], Step [750/1563], Loss: 0.0006
Epoch [5/10], Step [800/1563], Loss: 0.0003
Epoch [5/10], Step [850/1563], Loss: 0.0002
Epoch [5/10], Step [900/1563], Loss: 0.3374
Epoch [5/10], Step [950/1563], Loss: 0.0009
Epoch [5/10], Step [1000/1563], Loss: 0.0001
Epoch [5/10], Step [1050/1563], Loss: 0.0002
Epoch [5/10], Step [1100/1563], Loss: 0.0101
Epoch [5/10]

100%|██████████| 1563/1563 [02:50<00:00,  9.17it/s]


Accuracy: 91.984 %
Epoch [6/10], Step [50/1563], Loss: 0.0000
Epoch [6/10], Step [100/1563], Loss: 0.5985
Epoch [6/10], Step [150/1563], Loss: 0.0003
Epoch [6/10], Step [200/1563], Loss: 0.0007
Epoch [6/10], Step [250/1563], Loss: 0.0004
Epoch [6/10], Step [300/1563], Loss: 0.0001
Epoch [6/10], Step [350/1563], Loss: 0.0001
Epoch [6/10], Step [400/1563], Loss: 0.0011
Epoch [6/10], Step [450/1563], Loss: 0.0000
Epoch [6/10], Step [500/1563], Loss: 0.0000
Epoch [6/10], Step [550/1563], Loss: 0.0000
Epoch [6/10], Step [600/1563], Loss: 0.0000
Epoch [6/10], Step [650/1563], Loss: 0.0000
Epoch [6/10], Step [700/1563], Loss: 0.0000
Epoch [6/10], Step [750/1563], Loss: 0.0002
Epoch [6/10], Step [800/1563], Loss: 0.0000
Epoch [6/10], Step [850/1563], Loss: 0.0001
Epoch [6/10], Step [900/1563], Loss: 0.0399
Epoch [6/10], Step [950/1563], Loss: 0.0002
Epoch [6/10], Step [1000/1563], Loss: 0.0001
Epoch [6/10], Step [1050/1563], Loss: 0.0030
Epoch [6/10], Step [1100/1563], Loss: 0.0002
Epoch [6/10

100%|██████████| 1563/1563 [02:50<00:00,  9.18it/s]


Accuracy: 92.276 %
Epoch [7/10], Step [50/1563], Loss: 0.0001
Epoch [7/10], Step [100/1563], Loss: 0.0000
Epoch [7/10], Step [150/1563], Loss: 0.0000
Epoch [7/10], Step [200/1563], Loss: 0.0033
Epoch [7/10], Step [250/1563], Loss: 0.0001
Epoch [7/10], Step [300/1563], Loss: 0.0001
Epoch [7/10], Step [350/1563], Loss: 0.0002
Epoch [7/10], Step [400/1563], Loss: 0.0001
Epoch [7/10], Step [450/1563], Loss: 0.0005
Epoch [7/10], Step [500/1563], Loss: 0.0000
Epoch [7/10], Step [550/1563], Loss: 0.0001
Epoch [7/10], Step [600/1563], Loss: 0.0002
Epoch [7/10], Step [650/1563], Loss: 0.0000
Epoch [7/10], Step [700/1563], Loss: 0.0000
Epoch [7/10], Step [750/1563], Loss: 0.0013
Epoch [7/10], Step [800/1563], Loss: 0.0000
Epoch [7/10], Step [850/1563], Loss: 0.0000
Epoch [7/10], Step [900/1563], Loss: 0.0000
Epoch [7/10], Step [950/1563], Loss: 0.0000
Epoch [7/10], Step [1000/1563], Loss: 0.0000
Epoch [7/10], Step [1050/1563], Loss: 0.0002
Epoch [7/10], Step [1100/1563], Loss: 0.0000
Epoch [7/10

100%|██████████| 1563/1563 [02:50<00:00,  9.17it/s]


Accuracy: 92.332 %
Epoch [8/10], Step [50/1563], Loss: 0.0000
Epoch [8/10], Step [100/1563], Loss: 0.0000
Epoch [8/10], Step [150/1563], Loss: 0.0000
Epoch [8/10], Step [200/1563], Loss: 0.0000
Epoch [8/10], Step [250/1563], Loss: 0.0000
Epoch [8/10], Step [300/1563], Loss: 0.0000
Epoch [8/10], Step [350/1563], Loss: 0.0000
Epoch [8/10], Step [400/1563], Loss: 0.0000
Epoch [8/10], Step [450/1563], Loss: 0.0000
Epoch [8/10], Step [500/1563], Loss: 0.0000
Epoch [8/10], Step [550/1563], Loss: 0.0000
Epoch [8/10], Step [600/1563], Loss: 0.0000
Epoch [8/10], Step [650/1563], Loss: 0.0028
Epoch [8/10], Step [700/1563], Loss: 0.0000
Epoch [8/10], Step [750/1563], Loss: 0.0000
Epoch [8/10], Step [800/1563], Loss: 0.0000
Epoch [8/10], Step [850/1563], Loss: 0.0000
Epoch [8/10], Step [900/1563], Loss: 0.0000
Epoch [8/10], Step [950/1563], Loss: 0.0000
Epoch [8/10], Step [1000/1563], Loss: 0.0000
Epoch [8/10], Step [1050/1563], Loss: 0.0000
Epoch [8/10], Step [1100/1563], Loss: 0.0000
Epoch [8/10

100%|██████████| 1563/1563 [02:50<00:00,  9.18it/s]


Accuracy: 92.236 %
Epoch [9/10], Step [50/1563], Loss: 0.0000
Epoch [9/10], Step [100/1563], Loss: 0.0000
Epoch [9/10], Step [150/1563], Loss: 0.0000
Epoch [9/10], Step [200/1563], Loss: 0.0000
Epoch [9/10], Step [250/1563], Loss: 0.0000
Epoch [9/10], Step [300/1563], Loss: 0.0000
Epoch [9/10], Step [350/1563], Loss: 0.0000
Epoch [9/10], Step [400/1563], Loss: 0.0000
Epoch [9/10], Step [450/1563], Loss: 0.0000
Epoch [9/10], Step [500/1563], Loss: 0.0000
Epoch [9/10], Step [550/1563], Loss: 0.0000
Epoch [9/10], Step [600/1563], Loss: 0.0000
Epoch [9/10], Step [650/1563], Loss: 0.0000
Epoch [9/10], Step [700/1563], Loss: 0.0000
Epoch [9/10], Step [750/1563], Loss: 0.0000
Epoch [9/10], Step [800/1563], Loss: 0.0000
Epoch [9/10], Step [850/1563], Loss: 0.0000
Epoch [9/10], Step [900/1563], Loss: 0.0000
Epoch [9/10], Step [950/1563], Loss: 0.0000
Epoch [9/10], Step [1000/1563], Loss: 0.0000
Epoch [9/10], Step [1050/1563], Loss: 0.0000
Epoch [9/10], Step [1100/1563], Loss: 0.0000
Epoch [9/10

100%|██████████| 1563/1563 [02:50<00:00,  9.16it/s]


Accuracy: 92.264 %
Epoch [10/10], Step [50/1563], Loss: 0.0000
Epoch [10/10], Step [100/1563], Loss: 0.0000
Epoch [10/10], Step [150/1563], Loss: 0.0000
Epoch [10/10], Step [200/1563], Loss: 0.0000
Epoch [10/10], Step [250/1563], Loss: 0.0000
Epoch [10/10], Step [300/1563], Loss: 0.0000
Epoch [10/10], Step [350/1563], Loss: 0.0000
Epoch [10/10], Step [400/1563], Loss: 0.0013
Epoch [10/10], Step [450/1563], Loss: 0.0000
Epoch [10/10], Step [500/1563], Loss: 0.0000
Epoch [10/10], Step [550/1563], Loss: 0.0000
Epoch [10/10], Step [600/1563], Loss: 0.0000
Epoch [10/10], Step [650/1563], Loss: 0.0000
Epoch [10/10], Step [700/1563], Loss: 0.0423
Epoch [10/10], Step [750/1563], Loss: 0.0000
Epoch [10/10], Step [800/1563], Loss: 0.0000
Epoch [10/10], Step [850/1563], Loss: 0.0000
Epoch [10/10], Step [900/1563], Loss: 0.0000
Epoch [10/10], Step [950/1563], Loss: 0.0000
Epoch [10/10], Step [1000/1563], Loss: 0.0000
Epoch [10/10], Step [1050/1563], Loss: 0.0000
Epoch [10/10], Step [1100/1563], Lo

100%|██████████| 1563/1563 [02:50<00:00,  9.17it/s]

Accuracy: 92.232 %





In [9]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for cur_X_test, cur_y_test in tqdm(test_loader):
        cur_X_test = cur_X_test.to(device)
        cur_y_test = cur_y_test.to(device)
        outputs = model(cur_X_test)
        _, predicted = torch.max(outputs[0], 1)
        total += cur_y_test.size(0)
        correct += (predicted == cur_y_test).sum().item()
    print('Accuracy: {} %'.format(100 * correct / total))

100%|██████████| 1563/1563 [02:50<00:00,  9.17it/s]

Accuracy: 92.232 %



