In [2]:
import torch
from torch import nn
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset = load_dataset("mteb/tweet_sentiment_extraction")


In [5]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=3)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        self.fc = nn.Linear(768, 768)

    def forward(self, x):
        return self.fc(x)


In [23]:
class CombinedModel(nn.Module):
    def __init__(self, transformer_model):
        super(CombinedModel, self).__init__()
        self.feature_extractor = FeatureExtractor()
        self.transformer_model = transformer_model

    def forward(self, input_ids, attention_mask):
        # Transformerモデルの出力を取得
        transformer_outputs = self.transformer_model(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = transformer_outputs.last_hidden_state

        # 特徴抽出ネットワークを適用
        features = self.feature_extractor(hidden_states)
        
        # 分類ヘッドに渡す
        logits = self.transformer_model.classifier(features)
        return logits

combined_model = CombinedModel(model)


In [24]:
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
   return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [25]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

In [26]:
import evaluate
import numpy as np
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   return metric.compute(predictions=predictions, references=labels)

In [47]:
class CombinedModel(nn.Module):
    def __init__(self, transformer_model):
        super(CombinedModel, self).__init__()
        self.feature_extractor = FeatureExtractor()
        self.transformer_model = transformer_model

    def forward(self, input_ids, attention_mask, labels=None):
        # Transformerモデルの出力を取得
        transformer_outputs = self.transformer_model(input_ids=input_ids, attention_mask=attention_mask)

        logits = transformer_outputs.logits

        # if labels is not None:
        #     loss_fct = nn.CrossEntropyLoss()
        #     loss = loss_fct(logits.view(-1, self.transformer_model.config.num_labels), labels.view(-1))
        #     return loss
        
        return [5,3]

combined_model = CombinedModel(model)

In [48]:
training_args = TrainingArguments(
    output_dir="test_trainer2",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=10,
)

trainer = Trainer(
    model=combined_model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics  # 必要に応じてメトリクスを定義
)

In [49]:
trainer.train()

AttributeError: 'float' object has no attribute 'backward'