In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
import torch
print(torch.__version__)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import random
random.seed(0)
import numpy as np
np.random.seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

1.1.0


In [2]:
from tqdm import tqdm
from transformers.tokenization_bert import BertTokenizer
from transformers.modeling_bert import BertForSequenceClassification
from torch.utils.data import TensorDataset, DataLoader

In [3]:
MAX_SEQ_LENGTH = 128
BATCH_SIZE = 64
NUM_EPOCHS = 5
LEARNING_RATE = 1e-4

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model = torch.nn.DataParallel(model)
model.to(device)

DataParallel(
  (module): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
                

In [5]:
def load_data(path):
    indices, sentiments = [], []
    for folder, sentiment in (('neg', 0), ('pos', 1)):
        folder = os.path.join(path, folder)
        for name in tqdm(os.listdir(folder)):
            with open(os.path.join(folder, name), 'r') as reader:
                  text = reader.read()
            ids = tokenizer.encode(text, max_length=MAX_SEQ_LENGTH, pad_to_max_length=True)
            indices.append(ids)
            sentiments.append(sentiment)
    return np.array(indices), np.array(sentiments)

train_path = os.path.join("../datasets", 'aclImdb', 'train')
test_path = os.path.join("../datasets", 'aclImdb', 'test')
X_train, y_train = load_data(train_path)
X_test, y_test = load_data(test_path)

100%|██████████| 12500/12500 [00:55<00:00, 224.65it/s]
100%|██████████| 12500/12500 [00:57<00:00, 218.20it/s]
100%|██████████| 12500/12500 [00:55<00:00, 226.56it/s]
100%|██████████| 12500/12500 [00:55<00:00, 223.78it/s]


In [6]:
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [7]:
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
total_step = len(train_loader)
for epoch in range(NUM_EPOCHS):
    model.train()
    for i, (cur_X_train, cur_y_train) in enumerate(train_loader):
        cur_X_train = cur_X_train.to(device)
        cur_y_train = cur_y_train.to(device)
        outputs = model(cur_X_train)
        loss = nn.CrossEntropyLoss()(outputs[0], cur_y_train)
        model.zero_grad()
        loss.backward()
        optimizer.step()
        if (i + 1) % 50 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch + 1, NUM_EPOCHS, i + 1, total_step, loss.item()))
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for cur_X_test, cur_y_test in tqdm(test_loader):
            cur_X_test = cur_X_test.to(device)
            cur_y_test = cur_y_test.to(device)
            outputs = model(cur_X_test)
            _, predicted = torch.max(outputs[0], 1)
            total += cur_y_test.size(0)
            correct += (predicted == cur_y_test).sum().item()
        print('Accuracy: {} %'.format(100 * correct / total))

Epoch [1/5], Step [50/391], Loss: 0.4841
Epoch [1/5], Step [100/391], Loss: 0.3819
Epoch [1/5], Step [150/391], Loss: 0.2778
Epoch [1/5], Step [200/391], Loss: 0.2617
Epoch [1/5], Step [250/391], Loss: 0.3129
Epoch [1/5], Step [300/391], Loss: 0.2833
Epoch [1/5], Step [350/391], Loss: 0.2166


100%|██████████| 391/391 [01:07<00:00,  5.82it/s]


Accuracy: 86.596 %
Epoch [2/5], Step [50/391], Loss: 0.1761
Epoch [2/5], Step [100/391], Loss: 0.2047
Epoch [2/5], Step [150/391], Loss: 0.2803
Epoch [2/5], Step [200/391], Loss: 0.1970
Epoch [2/5], Step [250/391], Loss: 0.1345
Epoch [2/5], Step [300/391], Loss: 0.1038
Epoch [2/5], Step [350/391], Loss: 0.2731


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]


Accuracy: 87.636 %
Epoch [3/5], Step [50/391], Loss: 0.1172
Epoch [3/5], Step [100/391], Loss: 0.0646
Epoch [3/5], Step [150/391], Loss: 0.0558
Epoch [3/5], Step [200/391], Loss: 0.1415
Epoch [3/5], Step [250/391], Loss: 0.3666
Epoch [3/5], Step [300/391], Loss: 0.0659
Epoch [3/5], Step [350/391], Loss: 0.0746


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]


Accuracy: 86.58 %
Epoch [4/5], Step [50/391], Loss: 0.1607
Epoch [4/5], Step [100/391], Loss: 0.0277
Epoch [4/5], Step [150/391], Loss: 0.1559
Epoch [4/5], Step [200/391], Loss: 0.1276
Epoch [4/5], Step [250/391], Loss: 0.0249
Epoch [4/5], Step [300/391], Loss: 0.0860
Epoch [4/5], Step [350/391], Loss: 0.0210


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]


Accuracy: 86.88 %
Epoch [5/5], Step [50/391], Loss: 0.0418
Epoch [5/5], Step [100/391], Loss: 0.0823
Epoch [5/5], Step [150/391], Loss: 0.0114
Epoch [5/5], Step [200/391], Loss: 0.0278
Epoch [5/5], Step [250/391], Loss: 0.0043
Epoch [5/5], Step [300/391], Loss: 0.0906
Epoch [5/5], Step [350/391], Loss: 0.0778


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]

Accuracy: 86.332 %





In [9]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for cur_X_test, cur_y_test in tqdm(test_loader):
        cur_X_test = cur_X_test.to(device)
        cur_y_test = cur_y_test.to(device)
        outputs = model(cur_X_test)
        _, predicted = torch.max(outputs[0], 1)
        total += cur_y_test.size(0)
        correct += (predicted == cur_y_test).sum().item()
    print('Accuracy: {} %'.format(100 * correct / total))

100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Accuracy: 86.332 %



