In [1]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

class AccentDataset(Dataset):
    def __init__(self, file_paths, max_len=768):
        self.file_paths = file_paths
        self.max_len = max_len

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        df = pd.read_csv(file_path)

        # 特徴量とラベルの抽出
        features = df.filter(regex="Feature_").values
        label = df["Accent"].values

        # 長さ調整
        if len(features) > self.max_len:
            features = features[:self.max_len]
            label = label[:self.max_len]
        elif len(features) < self.max_len:
            pad_len = self.max_len - len(features)
            features = torch.cat([torch.tensor(features), torch.zeros((pad_len, features.shape[1]))], dim=0)
            label = torch.cat([torch.tensor(label), torch.full((pad_len,), -1)])  # パディング部分を-1で埋める

        return {"features": torch.tensor(features, dtype=torch.float32), "labels": torch.tensor(label, dtype=torch.long)}

# データ分割
def split_data(input_dir, test_size=0.2, val_size=0.1):
    files = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".csv")])
    train_files, test_files = train_test_split(files, test_size=test_size, random_state=42)
    train_files, val_files = train_test_split(train_files, test_size=val_size/(1-test_size), random_state=42)
    return train_files, val_files, test_files

# データ分割
input_data_dir = "./output_merged_results"
train_files, val_files, test_files = split_data(input_data_dir)

train_dataset = AccentDataset(train_files)
val_dataset = AccentDataset(val_files)
test_dataset = AccentDataset(test_files)

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class AccentTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=512, nhead=8, num_layers=4):
        super(AccentTransformer, self).__init__()
        self.input_fc = nn.Linear(input_dim, d_model)  # 特徴量次元をモデル次元に変換
        self.positional_encoding = nn.Parameter(torch.zeros(1, 768, d_model))  # 固定長のポジショナルエンコーディング

        self.transformer = nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=1024,
            dropout=0.1,
            activation="relu"
        )

        self.output_fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        # 入力の変換
        x = self.input_fc(x) + self.positional_encoding[:, :x.size(1), :]
        x = x.permute(1, 0, 2)  # (batch_size, seq_len, d_model) → (seq_len, batch_size, d_model)

        # Transformerのエンコーダー
        output = self.transformer.encoder(x)

        # クラス予測
        output = self.output_fc(output.permute(1, 0, 2))  # (seq_len, batch_size, d_model) → (batch_size, seq_len, num_classes)
        return output

In [5]:
from torch.utils.data import DataLoader
import torch.optim as optim

# モデル、損失関数、オプティマイザ
input_dim = 768
num_classes = 5  # クラス数
model = AccentTransformer(input_dim, num_classes, d_model=256, nhead=4, num_layers=2)
criterion = nn.CrossEntropyLoss(ignore_index=-1)  # パディング部分は無視
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# データローダー
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# 学習ループ
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        features, labels = batch["features"].to(device), batch["labels"].to(device)
        optimizer.zero_grad()

        # 前向き伝播
        outputs = model(features)

        # 損失計算
        loss = criterion(outputs.view(-1, num_classes), labels.view(-1))
        total_loss += loss.item()

        # 勾配計算と重み更新
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader)}")

    # 検証
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            features, labels = batch["features"].to(device), batch["labels"].to(device)
            outputs = model(features)
            loss = criterion(outputs.view(-1, num_classes), labels.view(-1))
            val_loss += loss.item()
    print(f"Validation Loss: {val_loss / len(val_loader)}")

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 23.69 GiB of which 3.75 MiB is free. Process 1117887 has 22.70 GiB memory in use. Including non-PyTorch memory, this process has 1000.00 MiB memory in use. Of the allocated memory 693.13 MiB is allocated by PyTorch, and 893.50 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
from sklearn.metrics import classification_report

# テストデータでの推論
model.eval()
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
all_labels = []
all_preds = []

with torch.no_grad():
    for batch in test_loader:
        features, labels = batch["features"].to(device), batch["labels"].to(device)
        outputs = model(features)
        preds = torch.argmax(outputs, dim=-1)

        # パディング部分を除外
        for i in range(len(labels)):
            valid_idx = labels[i] != -1
            all_labels.extend(labels[i][valid_idx].tolist())
            all_preds.extend(preds[i][valid_idx].tolist())

# 評価結果
print(classification_report(all_labels, all_preds))