In [1]:
import torch
import torch.nn as nn
import torchmetrics
from torch.utils.data import DataLoader

from dataset import get_sentiment_detection_dataset, get_sentiment_classification_dataset
from models import ALMBert, BasicBert
from trainer import Trainer

# Basic Bert model experiments

### Aspect sentimen detection and classification

In [4]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(5000)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = BasicBert(4)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerN

Epoch: 0	 Train loss: 1.14	 metrics: <function accuracy at 0x7f63249a3e20>: 0.5412	 
 Validation loss: 0.9534	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6571	 
Epoch: 1	 Train loss: 0.9419	 metrics: <function accuracy at 0x7f63249a3e20>: 0.619	 
 Validation loss: 0.883	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6293	 
Epoch: 2	 Train loss: 0.8774	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6234	 
 Validation loss: 0.8705	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6293	 
Epoch: 3	 Train loss: 0.8459	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6275	 
 Validation loss: 0.8919	 metrics: <function accuracy at 0x7f63249a3e20>: 0.5842	 
Epoch: 4	 Train loss: 0.8257	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6314	 
 Validation loss: 0.8719	 metrics: <function accuracy at 0x7f63249a3e20>: 0.605	 
Epoch: 5	 Train loss: 0.8082	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6364	 
 Validation loss: 0.8744	 metrics: <function accuracy at 0x7f63249a3

### Aspect's sentimen classification

In [5]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(0)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = BasicBert(3)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.849	 metrics: <function accuracy at 0x7f63249a3e20>: 0.6522	 
 Validation loss: 0.6222	 metrics: <function accuracy at 0x7f63249a3e20>: 0.7786	 
Epoch: 1	 Train loss: 0.5666	 metrics: <function accuracy at 0x7f63249a3e20>: 0.7928	 
 Validation loss: 0.5324	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8099	 
Epoch: 2	 Train loss: 0.517	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8251	 
 Validation loss: 0.5252	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8151	 
Epoch: 3	 Train loss: 0.4738	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8311	 
 Validation loss: 0.5744	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8047	 
Epoch: 4	 Train loss: 0.4252	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8533	 
 Validation loss: 0.5238	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8229	 
Epoch: 5	 Train loss: 0.3862	 metrics: <function accuracy at 0x7f63249a3e20>: 0.8624	 
 Validation loss: 0.5253	 metrics: <function accuracy at 0x7f6324

### Aspect sentimen detection

In [3]:
train_ds, valid_ds, test_ds = get_sentiment_detection_dataset(5000)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

model = BasicBert(2)

LR = 0.001

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.6688	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6197	 <function recall at 0x7f79289e88b0>: 0.6197	 <function precision at 0x7f79289e8700>: 0.6197	 
 Validation loss: 0.6426	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.658	 <function recall at 0x7f79289e88b0>: 0.658	 <function precision at 0x7f79289e8700>: 0.658	 
Epoch: 1	 Train loss: 0.6526	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6368	 <function recall at 0x7f79289e88b0>: 0.6368	 <function precision at 0x7f79289e8700>: 0.6368	 
 Validation loss: 0.6414	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6458	 <function recall at 0x7f79289e88b0>: 0.6458	 <function precision at 0x7f79289e8700>: 0.6458	 
Epoch: 2	 Train loss: 0.6446	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6448	 <function recall at 0x7f79289e88b0>: 0.6448	 <function precision at 0x7f79289e8700>: 0.6448	 
 Validation loss: 0.643	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6415	 <function recall at 0x7f79289e

# ALM-Bert model experiments

### Aspect sentimen detection and classification

In [4]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(5000)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = ALMBert(4)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.9571	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6194	 
 Validation loss: 0.8872	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6432	 
Test loss: 0.952	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.5996	


### Aspect's sentimen classification

In [5]:
train_ds, valid_ds, test_ds = get_sentiment_classification_dataset(0)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = ALMBert(3)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.706	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.7303	 <function recall at 0x7f79289e88b0>: 0.7303	 <function precision at 0x7f79289e8700>: 0.7303	 
 Validation loss: 0.6449	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.7943	 <function recall at 0x7f79289e88b0>: 0.7943	 <function precision at 0x7f79289e8700>: 0.7943	 
Epoch: 1	 Train loss: 0.4579	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.8397	 <function recall at 0x7f79289e88b0>: 0.8397	 <function precision at 0x7f79289e8700>: 0.8397	 
 Validation loss: 0.603	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.8125	 <function recall at 0x7f79289e88b0>: 0.8125	 <function precision at 0x7f79289e8700>: 0.8125	 
Epoch: 2	 Train loss: 0.4371	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.8342	 <function recall at 0x7f79289e88b0>: 0.8342	 <function precision at 0x7f79289e8700>: 0.8342	 
 Validation loss: 0.5229	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.8333	 <function recall at 0x7f7928

### Aspect sentimen detection

In [8]:
train_ds, valid_ds, test_ds = get_sentiment_detection_dataset(5000)

BATCH = 64

train_dl = DataLoader(dataset=train_ds, batch_size=BATCH, drop_last=True, shuffle=True)
valid_dl = DataLoader(dataset=valid_ds, batch_size=BATCH, drop_last=True, shuffle=False)
test_dl = DataLoader(dataset=test_ds, batch_size=BATCH, drop_last=False, shuffle=False)

LR = 0.001

model = ALMBert(2)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.functional.cross_entropy
metrics = [
    torchmetrics.functional.accuracy,
]

trainer = Trainer(
    model=model,
    optim=optimizer,
    loss=criterion,
    verbose=True,
    metrics=metrics,
    train_dataloader=train_dl,
    valid_dataloader=valid_dl,
    test_dataloader=test_dl
)

trainer.train(100)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max le

Epoch: 0	 Train loss: 0.6624	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6157	 
 Validation loss: 0.6377	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6406	 
Epoch: 1	 Train loss: 0.6496	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6372	 
 Validation loss: 0.647	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.638	 
Epoch: 2	 Train loss: 0.6421	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6395	 
 Validation loss: 0.6474	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.658	 
Epoch: 3	 Train loss: 0.6371	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6484	 
 Validation loss: 0.6555	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6536	 
Epoch: 4	 Train loss: 0.6293	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.651	 
 Validation loss: 0.6834	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6398	 
Test loss: 0.7111	 metrics: <function accuracy at 0x7f79289a3ac0>: 0.6183	
