In [8]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

class BertRegression(nn.Module):
    def __init__(self, bert_path, num_labels=2):
        super().__init__()
        self.bert = BertModel.from_pretrained(bert_path)
        self.dropout = nn.Dropout(0.1)
        self.linear = nn.Linear(self.bert.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        logits = self.linear(pooled_output)
        return logits

# 初始化tokenizer和模型
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertRegression(bert_path='bert-base-uncased', num_labels=2)

# 假設我們的dataset中有100個樣本，每個樣本包含一段句子、一個valence標籤和一個arousal標籤
dataset = [
    {
        'sentence': 'I love this movie!',
        'valence': 0.9,
        'arousal': 0.8
    },
    {
        'sentence': 'I hate this movie!',
        'valence': 0.1,
        'arousal': 0.2
    },
    # ...
    {
        'sentence': 'This is a boring movie.',
        'valence': 0.4,
        'arousal': 0.3
    }
]

# 將dataset轉換為特徵和標籤的形式
features = []
labels = []
for example in dataset:
    sentence = example['sentence']
    valence = example['valence']
    arousal = example['arousal']

    # 將句子轉換為BERT的輸入
    inputs = tokenizer(sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    features.append((input_ids, attention_mask))
    labels.append((valence, arousal))

# 將特徵和標籤打包成TensorDataset
features = torch.utils.data.TensorDataset(torch.stack([f[0] for f in features]), torch.stack([f[1] for f in features]))
labels = torch.tensor(labels, dtype=torch.float32)
print('features',features[0])
print('features',features[1])
print('features',features[2])
print('labels',labels)
data = torch.utils.data.TensorDataset(features, labels)
print('-'*30)
print('data',data)

# 使用DataLoader進行批次訓練
dataloader = torch.utils.data.DataLoader(data, batch_size=16, shuffle=True)

# 定義優化器和損失函數
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = nn.MSELoss()

# 訓練模型
for epoch in range(10):
    for step, batch in enumerate(dataloader):
        batch_features, batch_labels = batch
        optimizer.zero_grad()
        logits = model(batch_features[0], batch_features[1])
        loss = loss_fn(logits, batch_labels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


features (tensor([ 101, 1045, 2293, 2023, 3185,  999,  102,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0]), tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

AttributeError: 'TensorDataset' object has no attribute 'size'

In [9]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

class BertRegression(nn.Module):
    def __init__(self, bert_path, num_labels=2):
        super().__init__()
        self.bert = BertModel.from_pretrained(bert_path)
        self.dropout = nn.Dropout(0.1)
        self.linear = nn.Linear(self.bert.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        logits = self.linear(pooled_output)
        return logits

# 初始化tokenizer和模型
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertRegression(bert_path='bert-base-uncased', num_labels=2)

# 假設我們的dataset中有100個樣本，每個樣本包含一段句子、一個valence標籤和一個arousal標籤
dataset = [
    {
        'sentence': 'I love this movie!',
        'valence': 0.9,
        'arousal': 0.8
    },
    {
        'sentence': 'I hate this movie!',
        'valence': 0.1,
        'arousal': 0.2
    },
    # ...
    {
        'sentence': 'This is a boring movie.',
        'valence': 0.4,
        'arousal': 0.3
    }
]

# 將dataset轉換為特徵和標籤的形式
features = []
labels = []
for example in dataset:
    sentence = example['sentence']
    valence = example['valence']
    arousal = example['arousal']

    # 將句子轉換為BERT的輸入
    inputs = tokenizer(sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    features.append((input_ids, attention_mask))
    labels.append((valence, arousal))

# 將特徵和標籤打包成TensorDataset
features = torch.utils.data.TensorDataset(torch.stack([f[0] for f in features]), torch.stack([f[1] for f in features]))
labels = torch.tensor(labels, dtype=torch.float32)
data = torch.utils.data.TensorDataset(features, labels)

# 使用DataLoader進行批次訓練
dataloader = torch.utils.data.DataLoader(data, batch_size=16, shuffle=True)

# 定義優化器和損失函數
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = nn.MSELoss()

# 訓練模型
for epoch in range(10):
    for step, batch in enumerate(dataloader):
        batch_features, batch_labels = batch
        optimizer.zero_grad()
        logits = model(batch_features[0], batch_features[1])
       


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


AttributeError: 'TensorDataset' object has no attribute 'size'

In [12]:
import torch
import torch.nn as nn
from transformers import BertModel

class BertRegression(nn.Module):
    def __init__(self, bert_path, num_labels=1):
        super().__init__()
        self.bert = BertModel.from_pretrained(bert_path)
        self.dropout = nn.Dropout(0.1)
        self.linear = nn.Linear(self.bert.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        logits = self.linear(pooled_output)
        return logits

# 假設我們的訓練數據有2個特徵，1個標籤
train_features = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.float32)
train_labels = torch.tensor([10, 20, 30], dtype=torch.float32)

# 將特徵和標籤打包成TensorDataset
train_data = torch.utils.data.TensorDataset(train_features, train_labels)

# 使用DataLoader進行批次訓練
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=2, shuffle=True)

# 初始化模型
model = BertRegression(bert_path='bert-base-uncased', num_labels=1)

# 定義優化器
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

# 定義損失函數
loss_fn = nn.MSELoss()

# 訓練模型
for epoch in range(10):
    for step, batch in enumerate(train_dataloader):
        batch_features, batch_labels = batch
        optimizer.zero_grad()
        logits = model(batch_features[:, 0], batch_features[:, 1])
        loss = loss_fn(logits.view(-1), batch_labels)
        loss.backward()
        optimizer.step()

# 使用訓練好的模型進行預測
test_features = torch.tensor([[7, 8], [9, 10]], dtype=torch.float32)
logits = model(test_features[:, 0], test_features[:, 1])
predictions = logits.detach().numpy().squeeze()
print(predictions)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ValueError: not enough values to unpack (expected 2, got 1)