In [1]:
import torch
import torch.nn as nn
import torchmetrics
from torch.utils.data import DataLoader

from dataset import get_sentiment_detection_dataset, get_sentiment_classification_dataset
from models import ALMBert, BasicBert
from trainer import Trainer

# Basic Bert model experiments

### Aspect sentimen detection and classification

In [17]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(5000)

BATCH = 128

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.0001

model = BasicBert(4, 0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 1.461	 metrics: <function accuracy at 0x7f032008ba30>: 0.2707	 
 Validation loss: 1.331	 metrics: <function accuracy at 0x7f032008ba30>: 0.4201	 
Epoch: 1	 Train loss: 1.364	 metrics: <function accuracy at 0x7f032008ba30>: 0.3552	 
 Validation loss: 1.223	 metrics: <function accuracy at 0x7f032008ba30>: 0.5712	 
Epoch: 2	 Train loss: 1.29	 metrics: <function accuracy at 0x7f032008ba30>: 0.4217	 
 Validation loss: 1.17	 metrics: <function accuracy at 0x7f032008ba30>: 0.6172	 
Epoch: 3	 Train loss: 1.23	 metrics: <function accuracy at 0x7f032008ba30>: 0.4758	 
 Validation loss: 1.122	 metrics: <function accuracy at 0x7f032008ba30>: 0.6406	 
Epoch: 4	 Train loss: 1.189	 metrics: <function accuracy at 0x7f032008ba30>: 0.5243	 
 Validation loss: 1.078	 metrics: <function accuracy at 0x7f032008ba30>: 0.6519	 
Epoch: 5	 Train loss: 1.15	 metrics: <function accuracy at 0x7f032008ba30>: 0.5551	 
 Validation loss: 1.053	 metrics: <function accuracy at 0x7f032008ba30>: 0.653

### Aspect's sentimen classification

In [12]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(0)

BATCH = 256

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = BasicBert(3, 0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(60)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 1.048	 metrics: <function accuracy at 0x7f032008ba30>: 0.4537	 
 Validation loss: 1.004	 metrics: <function accuracy at 0x7f032008ba30>: 0.5312	 
Epoch: 1	 Train loss: 0.9484	 metrics: <function accuracy at 0x7f032008ba30>: 0.5396	 
 Validation loss: 0.949	 metrics: <function accuracy at 0x7f032008ba30>: 0.5312	 
Epoch: 2	 Train loss: 0.9176	 metrics: <function accuracy at 0x7f032008ba30>: 0.5446	 
 Validation loss: 0.9039	 metrics: <function accuracy at 0x7f032008ba30>: 0.5625	 
Epoch: 3	 Train loss: 0.8749	 metrics: <function accuracy at 0x7f032008ba30>: 0.5904	 
 Validation loss: 0.8554	 metrics: <function accuracy at 0x7f032008ba30>: 0.7383	 
Epoch: 4	 Train loss: 0.8267	 metrics: <function accuracy at 0x7f032008ba30>: 0.6434	 
 Validation loss: 0.7865	 metrics: <function accuracy at 0x7f032008ba30>: 0.7266	 
Epoch: 5	 Train loss: 0.769	 metrics: <function accuracy at 0x7f032008ba30>: 0.6925	 
 Validation loss: 0.7087	 metrics: <function accuracy at 0x7f032008

### Aspect sentimen detection

In [3]:
train_ds, valid_ds, test_ds = get_sentiment_detection_dataset(5000)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

model = BasicBert(2, 0.6)

LR = 0.001

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.6688	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6197	 <function recall at 0x7f79289e88b0>: 0.6197	 <function precision at 0x7f79289e8700>: 0.6197	 
 Validation loss: 0.6426	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.658	 <function recall at 0x7f79289e88b0>: 0.658	 <function precision at 0x7f79289e8700>: 0.658	 
Epoch: 1	 Train loss: 0.6526	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6368	 <function recall at 0x7f79289e88b0>: 0.6368	 <function precision at 0x7f79289e8700>: 0.6368	 
 Validation loss: 0.6414	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6458	 <function recall at 0x7f79289e88b0>: 0.6458	 <function precision at 0x7f79289e8700>: 0.6458	 
Epoch: 2	 Train loss: 0.6446	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6448	 <function recall at 0x7f79289e88b0>: 0.6448	 <function precision at 0x7f79289e8700>: 0.6448	 
 Validation loss: 0.643	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6415	 <function recall at 0x7f79289e

# ALM-Bert model experiments

### Aspect sentimen detection and classification

In [10]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(5000)

BATCH = 512

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.0001

model = ALMBert(4, 0.8)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 1.392	 metrics: <function accuracy at 0x7f032008ba30>: 0.2162	 
 Validation loss: 1.36	 metrics: <function accuracy at 0x7f032008ba30>: 0.1338	 
Epoch: 1	 Train loss: 1.379	 metrics: <function accuracy at 0x7f032008ba30>: 0.2742	 
 Validation loss: 1.329	 metrics: <function accuracy at 0x7f032008ba30>: 0.6064	 
Epoch: 2	 Train loss: 1.352	 metrics: <function accuracy at 0x7f032008ba30>: 0.3742	 
 Validation loss: 1.281	 metrics: <function accuracy at 0x7f032008ba30>: 0.6504	 
Epoch: 3	 Train loss: 1.32	 metrics: <function accuracy at 0x7f032008ba30>: 0.4689	 
 Validation loss: 1.234	 metrics: <function accuracy at 0x7f032008ba30>: 0.6523	 
Epoch: 4	 Train loss: 1.288	 metrics: <function accuracy at 0x7f032008ba30>: 0.5322	 
 Validation loss: 1.207	 metrics: <function accuracy at 0x7f032008ba30>: 0.6523	 
Epoch: 5	 Train loss: 1.268	 metrics: <function accuracy at 0x7f032008ba30>: 0.551	 
 Validation loss: 1.184	 metrics: <function accuracy at 0x7f032008ba30>: 0.65

### Aspect's sentimen classification

In [3]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(0)

BATCH = 256

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = ALMBert(3, 0.60)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 1.035	 metrics: <function accuracy at 0x7f032008ba30>: 0.4794	 
 Validation loss: 1.051	 metrics: <function accuracy at 0x7f032008ba30>: 0.6328	 
Epoch: 1	 Train loss: 0.858	 metrics: <function accuracy at 0x7f032008ba30>: 0.7148	 
 Validation loss: 0.9907	 metrics: <function accuracy at 0x7f032008ba30>: 0.5039	 
Epoch: 2	 Train loss: 0.7123	 metrics: <function accuracy at 0x7f032008ba30>: 0.7567	 
 Validation loss: 0.8985	 metrics: <function accuracy at 0x7f032008ba30>: 0.5547	 
Epoch: 3	 Train loss: 0.5999	 metrics: <function accuracy at 0x7f032008ba30>: 0.8103	 
 Validation loss: 0.5996	 metrics: <function accuracy at 0x7f032008ba30>: 0.793	 
Epoch: 4	 Train loss: 0.516	 metrics: <function accuracy at 0x7f032008ba30>: 0.8337	 
 Validation loss: 0.5646	 metrics: <function accuracy at 0x7f032008ba30>: 0.7969	 
Epoch: 5	 Train loss: 0.4645	 metrics: <function accuracy at 0x7f032008ba30>: 0.8504	 
 Validation loss: 0.6365	 metrics: <function accuracy at 0x7f032008b

In [4]:
torch.save(trainer.model, "./trained/ALMBert_sentiment_classification.pth")

### Aspect sentimen detection

In [18]:
train_ds, valid_ds, test_ds = get_sentiment_detection_dataset(5000)

BATCH = 128

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.0001

model = ALMBert(2, 0.6)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.6946	 metrics: <function accuracy at 0x7f032008ba30>: 0.5176	 
 Validation loss: 0.6634	 metrics: <function accuracy at 0x7f032008ba30>: 0.6285	 
Epoch: 1	 Train loss: 0.6628	 metrics: <function accuracy at 0x7f032008ba30>: 0.6303	 
 Validation loss: 0.6383	 metrics: <function accuracy at 0x7f032008ba30>: 0.658	 
Epoch: 2	 Train loss: 0.6493	 metrics: <function accuracy at 0x7f032008ba30>: 0.639	 
 Validation loss: 0.6333	 metrics: <function accuracy at 0x7f032008ba30>: 0.6684	 
Epoch: 3	 Train loss: 0.6422	 metrics: <function accuracy at 0x7f032008ba30>: 0.6484	 
 Validation loss: 0.6372	 metrics: <function accuracy at 0x7f032008ba30>: 0.6502	 
Epoch: 4	 Train loss: 0.6346	 metrics: <function accuracy at 0x7f032008ba30>: 0.6475	 
 Validation loss: 0.6475	 metrics: <function accuracy at 0x7f032008ba30>: 0.638	 
Epoch: 5	 Train loss: 0.6306	 metrics: <function accuracy at 0x7f032008ba30>: 0.6552	 
 Validation loss: 0.6373	 metrics: <function accuracy at 0x7f03200

In [20]:
from transformers import BertTokenizer
import torch
from models import ALMBert


tokenizer = BertTokenizer.from_pretrained("bert-base-cased")

model = trainer.model

review = "This laptop is bad"
aspect = "laptop general"

review_encoding = tokenizer.encode_plus(
  review,
  add_special_tokens=True,
  max_length=75,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)
aspect_encoding = tokenizer.encode_plus(
  review,
  add_special_tokens=True,
  max_length=4,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)

labels = {
    0: "positive",
    1: "negative",
    2: "neutral",
}
with torch.no_grad():
    preds = model(
        review_encoding['input_ids'],
        review_encoding['attention_mask'],
        aspect_encoding['input_ids'],
        aspect_encoding['attention_mask'],
    )
print(preds)
print(labels[torch.argmax(preds)])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper__index_select)