事前学習なしで最初からトレーニングをしてみる

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertModel, BertConfig
import seaborn as sns
from transformers import TrainingArguments, Trainer

#import utils

# For reproducibility
np.random.seed(42)
torch.manual_seed(42) # 乱数生成シード
cudnn.benchmark = True

# Grab a GPU if there is one
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using {} device: {}".format(device, torch.cuda.current_device()))
else:
    device = torch.device("cpu")
    print("Using {}".format(device))

Using cuda device: 0


In [44]:
import numpy as np
fold = "../../train_raw_npy/"
acc_x = np.loadtxt("../../train_raw/Acc_x.txt")
label = np.load(f"{fold}sampled_label.npy")

print(acc_x.shape)
print(label.shape)


(196072, 500)
(196072,)


In [4]:
# ビンの数を設定
num_bins = 5000  # 必要に応じて調整

# ビンの境界を等頻度で設定
bins = np.percentile(acc_x, np.linspace(0, 100, num_bins + 1))

# データをビンに分割
discrete_data = np.digitize(acc_x, bins) - 1  # ビンのインデックスを取得し、0から始まるように調整

# 予約トークンを避けるためにシフト
discrete_data += 104

# 最大インデックスを確認
max_index = discrete_data.max()
print(f"Max index after shifting: {max_index}")

# vocab_sizeを確認（シフト後の最大値を考慮）
vocab_size = max_index + 1
print(f"Vocab size: {vocab_size}")

# 確認のための一部データ
print("Sample discrete data:", discrete_data[:10])
print("Discrete data range:", np.min(discrete_data), "to", np.max(discrete_data))

"""# 分布の視覚化
plt.figure(figsize=(12, 6))
sns.histplot(discrete_data.flatten(), bins=num_bins, kde=False)
plt.title(f"Distribution of Discretized Accelerometer X-axis Data ({num_bins} bins, equal-frequency)")
plt.xlabel("Discrete Value")
plt.ylabel("Frequency")
plt.show()"""

# 離散化されたデータを保存
#np.save("train_token_ids_rebinned.npy", discrete_data)


Max index after shifting: 5104
Vocab size: 5105
Sample discrete data: [[4091 4092 4099 ... 1129 1375 1485]
 [1559 1577 1382 ... 1602 1609 1585]
 [1499 1394 1255 ... 4283 4272 4299]
 ...
 [2760 2514 3253 ... 4438 3632 1623]
 [ 728 2148 2772 ... 3927 4211 4065]
 [2219 2147 3659 ... 1336 2174 3062]]
Discrete data range: 104 to 5104


'# 分布の視覚化\nplt.figure(figsize=(12, 6))\nsns.histplot(discrete_data.flatten(), bins=num_bins, kde=False)\nplt.title(f"Distribution of Discretized Accelerometer X-axis Data ({num_bins} bins, equal-frequency)")\nplt.xlabel("Discrete Value")\nplt.ylabel("Frequency")\nplt.show()'

In [5]:
#説明変数
X = discrete_data
#目的変数
Y = label

#おしり3万データを使う  
X_30000 = X[-40000:-10000,:]
label_30000 = label[-40000:-10000]
print(X_30000.shape)
print(label_30000.shape)

(30000, 500)
(30000,)


In [6]:
""""# X_30000のトークンの頻度を見てみる
sns.histplot(X_30000.flatten(), bins=num_bins, kde=False)
plt.title(f"Distribution of Discretized Accelerometer X-axis Data ({num_bins} bins, equal-frequency)")
plt.xlabel("Discrete Value")
plt.ylabel("Frequency")
plt.show()"""

'"# X_30000のトークンの頻度を見てみる\nsns.histplot(X_30000.flatten(), bins=num_bins, kde=False)\nplt.title(f"Distribution of Discretized Accelerometer X-axis Data ({num_bins} bins, equal-frequency)")\nplt.xlabel("Discrete Value")\nplt.ylabel("Frequency")\nplt.show()'

In [11]:
print(X_30000.dtype)
print(label_30000.dtype)
X_30000 =X_30000.astype(np.int32)
label_30000 = label_30000.astype(np.int32)
print(X_30000.dtype)
print(label_30000.dtype)

int64
float64
int32
int32


In [13]:
import numpy as np
from sklearn.model_selection import train_test_split

# ランダムサンプリング
train_x, test_x, train_label, test_label = train_test_split(X_30000, label_30000, test_size=0.2,random_state=40)

# 結果の確認
print(f"Train data shape: {train_x.shape}")
print(f"Test data shape: {test_x.shape}")
print(f"Train label shape: {train_label.shape}")
print(f"Test label shape: {test_label.shape}")
print(pd.Series(train_label).value_counts())
print(pd.Series(test_label).value_counts())

Train data shape: (24000, 500)
Test data shape: (6000, 500)
Train label shape: (24000,)
Test label shape: (6000,)
1    4235
4    3917
7    3893
6    3582
2    3010
5    2964
8    1445
3     954
Name: count, dtype: int64
1    1041
7     964
4     960
6     926
5     774
2     719
8     373
3     243
Name: count, dtype: int64


In [14]:
train_label.dtype

dtype('int32')

In [15]:
train_x.dtype

dtype('int32')

In [16]:
import torch
from torch.utils.data import Dataset, DataLoader

class SensorDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        input_ids = torch.tensor(self.data[idx], dtype=torch.long)
        labels = torch.tensor(self.labels[idx], dtype=torch.long)

        # [CLS] と [SEP] トークンを追加
        input_ids = torch.cat([torch.tensor([101]), input_ids, torch.tensor([102])])

        attention_mask = torch.ones(input_ids.shape, dtype=torch.long)

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

In [17]:
vocab_size

5105

In [18]:
print(train_label)
train_label -=1
print(train_label)

[4 6 4 ... 5 5 5]
[3 5 3 ... 4 4 4]


In [19]:
print(test_label)
test_label -=1
print(test_label)

[7 4 3 ... 6 6 7]
[6 3 2 ... 5 5 6]


In [31]:

# データセットとデータローダの作成
train_dataset = SensorDataset(train_x, train_label)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_dataset = SensorDataset(test_x, test_label)
eval_dataloader = DataLoader(eval_dataset)
"""# BERTの設定を定義
config = BertConfig(
    vocab_size=vocab_size,  # ビンの数 + 104
    hidden_size=768,
    num_hidden_layers=6,
    num_attention_heads=12,
    intermediate_size=3072,
    max_position_embeddings=502,  # シーケンス長 + 1 (CLSトークン)
    type_vocab_size=2  # 通常、2つのトークンタイプ (タイプ0とタイプ1) を使用
)
from transformers import BertConfig, BertForMaskedLM
# モデルの準備
model = BertForMaskedLM(config)
#model.bert.embeddings = CustomBertEmbeddings(config)
print(model)"""

'# BERTの設定を定義\nconfig = BertConfig(\n    vocab_size=vocab_size,  # ビンの数 + 104\n    hidden_size=768,\n    num_hidden_layers=6,\n    num_attention_heads=12,\n    intermediate_size=3072,\n    max_position_embeddings=502,  # シーケンス長 + 1 (CLSトークン)\n    type_vocab_size=2  # 通常、2つのトークンタイプ (タイプ0とタイプ1) を使用\n)\nfrom transformers import BertConfig, BertForMaskedLM\n# モデルの準備\nmodel = BertForMaskedLM(config)\n#model.bert.embeddings = CustomBertEmbeddings(config)\nprint(model)'

In [32]:
from transformers import BertConfig, BertForSequenceClassification

# BERTの設定を定義
config = BertConfig(
    vocab_size=vocab_size,  # 必要に応じて調整
    hidden_size=768,
    num_hidden_layers=6, # 9が限界なのでは？(vram12gbでは)
    num_attention_heads=12,
    intermediate_size=3072,
    max_position_embeddings=512,
    num_labels=8,  # 8クラス分類
)

# BERTモデルを8クラス分類用に定義
model = BertForSequenceClassification(config)
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(5105, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-5): 6 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [33]:
# トレーニング設定
training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=32,
    save_steps=0,
    save_total_limit=0,
    evaluation_strategy="steps",
    eval_steps=250,
    logging_steps=100,
)

# トレーナーの定義
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# トレーニングの実行
trainer.train()



  0%|          | 0/2250 [00:00<?, ?it/s]

{'loss': 2.0563, 'grad_norm': 3.1014015674591064, 'learning_rate': 4.7777777777777784e-05, 'epoch': 0.13}
{'loss': 1.9142, 'grad_norm': 7.507814884185791, 'learning_rate': 4.555555555555556e-05, 'epoch': 0.27}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.611985683441162, 'eval_runtime': 18.9229, 'eval_samples_per_second': 317.076, 'eval_steps_per_second': 39.635, 'epoch': 0.33}
{'loss': 1.6425, 'grad_norm': 5.710427284240723, 'learning_rate': 4.3333333333333334e-05, 'epoch': 0.4}
{'loss': 1.5745, 'grad_norm': 10.342754364013672, 'learning_rate': 4.111111111111111e-05, 'epoch': 0.53}
{'loss': 1.5339, 'grad_norm': 6.573219299316406, 'learning_rate': 3.888888888888889e-05, 'epoch': 0.67}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.4952894449234009, 'eval_runtime': 20.3602, 'eval_samples_per_second': 294.692, 'eval_steps_per_second': 36.836, 'epoch': 0.67}
{'loss': 1.5375, 'grad_norm': 3.9385385513305664, 'learning_rate': 3.6666666666666666e-05, 'epoch': 0.8}
{'loss': 1.4486, 'grad_norm': 9.228249549865723, 'learning_rate': 3.444444444444445e-05, 'epoch': 0.93}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.4346240758895874, 'eval_runtime': 20.2138, 'eval_samples_per_second': 296.827, 'eval_steps_per_second': 37.103, 'epoch': 1.0}
{'loss': 1.4255, 'grad_norm': 11.086310386657715, 'learning_rate': 3.222222222222223e-05, 'epoch': 1.07}
{'loss': 1.3931, 'grad_norm': 8.170818328857422, 'learning_rate': 3e-05, 'epoch': 1.2}
{'loss': 1.3797, 'grad_norm': 13.066079139709473, 'learning_rate': 2.777777777777778e-05, 'epoch': 1.33}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.3841625452041626, 'eval_runtime': 20.3641, 'eval_samples_per_second': 294.636, 'eval_steps_per_second': 36.83, 'epoch': 1.33}
{'loss': 1.3595, 'grad_norm': 15.026509284973145, 'learning_rate': 2.5555555555555554e-05, 'epoch': 1.47}
{'loss': 1.3859, 'grad_norm': 7.65357780456543, 'learning_rate': 2.3333333333333336e-05, 'epoch': 1.6}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.3595472574234009, 'eval_runtime': 20.2267, 'eval_samples_per_second': 296.638, 'eval_steps_per_second': 37.08, 'epoch': 1.67}
{'loss': 1.3561, 'grad_norm': 9.646368980407715, 'learning_rate': 2.111111111111111e-05, 'epoch': 1.73}
{'loss': 1.2992, 'grad_norm': 6.726350784301758, 'learning_rate': 1.888888888888889e-05, 'epoch': 1.87}
{'loss': 1.3375, 'grad_norm': 8.690300941467285, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.3243721723556519, 'eval_runtime': 20.4239, 'eval_samples_per_second': 293.773, 'eval_steps_per_second': 36.722, 'epoch': 2.0}
{'loss': 1.2733, 'grad_norm': 4.490359306335449, 'learning_rate': 1.4444444444444444e-05, 'epoch': 2.13}
{'loss': 1.2239, 'grad_norm': 9.885929107666016, 'learning_rate': 1.2222222222222222e-05, 'epoch': 2.27}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.3117179870605469, 'eval_runtime': 18.9516, 'eval_samples_per_second': 316.597, 'eval_steps_per_second': 39.575, 'epoch': 2.33}
{'loss': 1.25, 'grad_norm': 6.758425712585449, 'learning_rate': 1e-05, 'epoch': 2.4}
{'loss': 1.2055, 'grad_norm': 19.807043075561523, 'learning_rate': 7.777777777777777e-06, 'epoch': 2.53}
{'loss': 1.2173, 'grad_norm': 8.534171104431152, 'learning_rate': 5.555555555555556e-06, 'epoch': 2.67}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.2983232736587524, 'eval_runtime': 19.0474, 'eval_samples_per_second': 315.004, 'eval_steps_per_second': 39.376, 'epoch': 2.67}
{'loss': 1.2291, 'grad_norm': 8.149041175842285, 'learning_rate': 3.3333333333333333e-06, 'epoch': 2.8}
{'loss': 1.2206, 'grad_norm': 10.221332550048828, 'learning_rate': 1.1111111111111112e-06, 'epoch': 2.93}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.287358045578003, 'eval_runtime': 20.2271, 'eval_samples_per_second': 296.632, 'eval_steps_per_second': 37.079, 'epoch': 3.0}
{'train_runtime': 915.9493, 'train_samples_per_second': 78.607, 'train_steps_per_second': 2.456, 'train_loss': 1.4175110100640191, 'epoch': 3.0}


TrainOutput(global_step=2250, training_loss=1.4175110100640191, metrics={'train_runtime': 915.9493, 'train_samples_per_second': 78.607, 'train_steps_per_second': 2.456, 'total_flos': 9352371034368000.0, 'train_loss': 1.4175110100640191, 'epoch': 3.0})

In [34]:
from sklearn.metrics import classification_report

predictions = trainer.predict(eval_dataset)
preds = np.argmax(predictions.predictions, axis=1)
# 分類レポートの生成
report = classification_report(test_label, preds, target_names=[str(i) for i in range(1, 9)])
print(report)

  0%|          | 0/750 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           1       0.63      0.49      0.55      1041
           2       0.39      0.55      0.46       719
           3       0.95      0.95      0.95       243
           4       0.68      0.78      0.73       960
           5       0.43      0.48      0.46       774
           6       0.48      0.39      0.43       926
           7       0.38      0.47      0.42       964
           8       0.21      0.01      0.02       373

    accuracy                           0.51      6000
   macro avg       0.52      0.52      0.50      6000
weighted avg       0.51      0.51      0.50      6000



In [40]:
del model
del train_dataset
del trainer
del eval_dataset

import gc
gc.collect()
torch.cuda.empty_cache()

# num_filter_layersを増やしてみる  
初期値は12であるが上の例では6にしてあった。もしかしたらそのせいで精度が出ないだけなのかも

In [41]:

# データセットとデータローダの作成
train_dataset = SensorDataset(train_x, train_label)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_dataset = SensorDataset(test_x, test_label)
eval_dataloader = DataLoader(eval_dataset)
"""# BERTの設定を定義
config = BertConfig(
    vocab_size=vocab_size,  # ビンの数 + 104
    hidden_size=768,
    num_hidden_layers=6,
    num_attention_heads=12,
    intermediate_size=3072,
    max_position_embeddings=502,  # シーケンス長 + 1 (CLSトークン)
    type_vocab_size=2  # 通常、2つのトークンタイプ (タイプ0とタイプ1) を使用
)
from transformers import BertConfig, BertForMaskedLM
# モデルの準備
model = BertForMaskedLM(config)
#model.bert.embeddings = CustomBertEmbeddings(config)
print(model)"""

'# BERTの設定を定義\nconfig = BertConfig(\n    vocab_size=vocab_size,  # ビンの数 + 104\n    hidden_size=768,\n    num_hidden_layers=6,\n    num_attention_heads=12,\n    intermediate_size=3072,\n    max_position_embeddings=502,  # シーケンス長 + 1 (CLSトークン)\n    type_vocab_size=2  # 通常、2つのトークンタイプ (タイプ0とタイプ1) を使用\n)\nfrom transformers import BertConfig, BertForMaskedLM\n# モデルの準備\nmodel = BertForMaskedLM(config)\n#model.bert.embeddings = CustomBertEmbeddings(config)\nprint(model)'

In [42]:
from transformers import BertConfig, BertForSequenceClassification

# BERTの設定を定義
config = BertConfig(
    vocab_size=vocab_size,  # 必要に応じて調整
    hidden_size=768,
    num_hidden_layers=10, # 10が限界なのでは？(vram12gbでは)
    num_attention_heads=12,
    intermediate_size=3072,
    max_position_embeddings=512,
    num_labels=8,  # 8クラス分類
)

# BERTモデルを8クラス分類用に定義
model = BertForSequenceClassification(config)
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(5105, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-9): 10 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-1

In [43]:
# トレーニング設定
training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=32,
    save_steps=0,
    save_total_limit=0,
    evaluation_strategy="steps",
    eval_steps=500,
    logging_steps=100,
)

# トレーナーの定義
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# トレーニングの実行
trainer.train()

  0%|          | 0/2250 [00:00<?, ?it/s]

{'loss': 2.0536, 'grad_norm': 2.71994948387146, 'learning_rate': 4.7777777777777784e-05, 'epoch': 0.13}
{'loss': 2.0183, 'grad_norm': 4.676105499267578, 'learning_rate': 4.555555555555556e-05, 'epoch': 0.27}
{'loss': 2.0127, 'grad_norm': 3.8067429065704346, 'learning_rate': 4.3333333333333334e-05, 'epoch': 0.4}
{'loss': 2.0243, 'grad_norm': 4.196624755859375, 'learning_rate': 4.111111111111111e-05, 'epoch': 0.53}
{'loss': 2.0389, 'grad_norm': 2.929673194885254, 'learning_rate': 3.888888888888889e-05, 'epoch': 0.67}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 2.0107414722442627, 'eval_runtime': 32.7227, 'eval_samples_per_second': 183.359, 'eval_steps_per_second': 22.92, 'epoch': 0.67}
{'loss': 2.012, 'grad_norm': 2.4816131591796875, 'learning_rate': 3.6666666666666666e-05, 'epoch': 0.8}
{'loss': 2.0159, 'grad_norm': 2.413952350616455, 'learning_rate': 3.444444444444445e-05, 'epoch': 0.93}
{'loss': 2.0087, 'grad_norm': 3.337390422821045, 'learning_rate': 3.222222222222223e-05, 'epoch': 1.07}
{'loss': 2.0187, 'grad_norm': 2.5023200511932373, 'learning_rate': 3e-05, 'epoch': 1.2}
{'loss': 1.8152, 'grad_norm': 6.377017498016357, 'learning_rate': 2.777777777777778e-05, 'epoch': 1.33}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.6860272884368896, 'eval_runtime': 173.2491, 'eval_samples_per_second': 34.632, 'eval_steps_per_second': 4.329, 'epoch': 1.33}
{'loss': 1.6614, 'grad_norm': 7.186163425445557, 'learning_rate': 2.5555555555555554e-05, 'epoch': 1.47}
{'loss': 1.6076, 'grad_norm': 6.248212814331055, 'learning_rate': 2.3333333333333336e-05, 'epoch': 1.6}
{'loss': 1.5519, 'grad_norm': 6.305764198303223, 'learning_rate': 2.111111111111111e-05, 'epoch': 1.73}
{'loss': 1.4749, 'grad_norm': 4.788961887359619, 'learning_rate': 1.888888888888889e-05, 'epoch': 1.87}
{'loss': 1.4868, 'grad_norm': 9.401933670043945, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.4533944129943848, 'eval_runtime': 172.1621, 'eval_samples_per_second': 34.851, 'eval_steps_per_second': 4.356, 'epoch': 2.0}
{'loss': 1.4407, 'grad_norm': 9.720457077026367, 'learning_rate': 1.4444444444444444e-05, 'epoch': 2.13}
{'loss': 1.4019, 'grad_norm': 5.269026279449463, 'learning_rate': 1.2222222222222222e-05, 'epoch': 2.27}
{'loss': 1.4129, 'grad_norm': 3.9075980186462402, 'learning_rate': 1e-05, 'epoch': 2.4}
{'loss': 1.3965, 'grad_norm': 8.5674467086792, 'learning_rate': 7.777777777777777e-06, 'epoch': 2.53}
{'loss': 1.3689, 'grad_norm': 7.356326103210449, 'learning_rate': 5.555555555555556e-06, 'epoch': 2.67}


  0%|          | 0/750 [00:00<?, ?it/s]

{'eval_loss': 1.4132689237594604, 'eval_runtime': 167.0233, 'eval_samples_per_second': 35.923, 'eval_steps_per_second': 4.49, 'epoch': 2.67}
{'loss': 1.3802, 'grad_norm': 4.176865100860596, 'learning_rate': 3.3333333333333333e-06, 'epoch': 2.8}
{'loss': 1.3757, 'grad_norm': 3.233841896057129, 'learning_rate': 1.1111111111111112e-06, 'epoch': 2.93}
{'train_runtime': 3071.363, 'train_samples_per_second': 23.442, 'train_steps_per_second': 0.733, 'train_loss': 1.7009070502387154, 'epoch': 3.0}


TrainOutput(global_step=2250, training_loss=1.7009070502387154, metrics={'train_runtime': 3071.363, 'train_samples_per_second': 23.442, 'train_steps_per_second': 0.733, 'total_flos': 1.5500788128e+16, 'train_loss': 1.7009070502387154, 'epoch': 3.0})

In [45]:
from sklearn.metrics import classification_report

predictions = trainer.predict(eval_dataset)
preds = np.argmax(predictions.predictions, axis=1)
# 分類レポートの生成
report = classification_report(test_label, preds, target_names=[str(i) for i in range(1, 9)])
print(report)

  0%|          | 0/750 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           1       0.47      0.45      0.46      1041
           2       0.31      0.49      0.38       719
           3       0.93      0.91      0.92       243
           4       0.68      0.70      0.69       960
           5       0.40      0.46      0.43       774
           6       0.44      0.39      0.41       926
           7       0.34      0.33      0.34       964
           8       0.00      0.00      0.00       373

    accuracy                           0.46      6000
   macro avg       0.45      0.47      0.45      6000
weighted avg       0.44      0.46      0.45      6000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
def validate_data_loader(data_loader, vocab_size):
    for batch in data_loader:
        input_ids = batch["input_ids"]
        labels = batch["labels"]

        if input_ids.max() >= vocab_size or labels.max() >= vocab_size:
            print(f"Error: Found out of range index in input_ids or labels. Max value should be less than {vocab_size}.")
            print(f"Max value in input_ids: {input_ids.max()}")
            print(f"Max value in labels: {labels.max()}")
            return False

        if input_ids.dtype != torch.long or labels.dtype != torch.long:
            print("Error: Data type mismatch. input_ids and labels should be of type torch.long.")
            print(f"input_ids dtype: {input_ids.dtype}")
            print(f"labels dtype: {labels.dtype}")
            return False

    print("Data validation passed.")
    return True

# データローダの検証
validate_data_loader(train_dataloader, vocab_size)


Data validation passed.


True