In [18]:
import os
import random

# ディレクトリ内のCSVファイルを取得
data_dir = "basic5000_features_with_accent"
csv_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".csv")]

# 学習データと評価データに分割（80%学習、20%評価）
random.shuffle(csv_files)
train_files = csv_files[:int(0.8 * len(csv_files))]
eval_files = csv_files[int(0.8 * len(csv_files)):]

print(f"Training files: {len(train_files)}")
print(f"Evaluation files: {len(eval_files)}")

Training files: 3980
Evaluation files: 996


In [20]:
import csv

def process_csv(file_path):
    """
    CSVファイルを読み込み、音素、特徴量、アクセントに分割
    """
    moras = []  # 音素リスト
    features = []  # 特徴量リスト
    accents = []  # アクセントリスト

    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)  # ヘッダー行をスキップ
        for row in reader:
            # 音素は1列目
            mora = row[0]
            # 特徴量は2列目から最後の1つ手前まで
            feature = list(map(float, row[1:-1]))
            # アクセントは最後の列
            accent = int(row[-1])

            # データを格納
            moras.append(mora)
            features.append(feature)
            accents.append(accent)
    
    return moras, features, accents

In [21]:
def load_data(file_list):
    """
    ファイルリストから特徴量とアクセントをまとめてロード
    """
    all_features = []
    all_accents = []

    for file_path in file_list:
        _, features, accents = process_csv(file_path)
        all_features.extend(features)
        all_accents.extend(accents)
    
    return all_features, all_accents

# データロード
train_features, train_accents = load_data(train_files)
eval_features, eval_accents = load_data(eval_files)

print(f"Training data: {len(train_features)} samples")
print(f"Evaluation data: {len(eval_features)} samples")

Training data: 141414 samples
Evaluation data: 35696 samples


In [22]:
import torch
from torch.utils.data import Dataset, DataLoader

class AccentDataset(Dataset):
    def __init__(self, features, accents):
        self.features = features
        self.accents = accents
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float32), torch.tensor(self.accents[idx], dtype=torch.long)

# データセット作成
train_dataset = AccentDataset(train_features, train_accents)
eval_dataset = AccentDataset(eval_features, eval_accents)

# データローダー作成
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)

In [23]:
import torch.nn as nn

class AccentTransformer(nn.Module):
    def __init__(self, feature_dim, num_classes=2, num_heads=4, num_layers=2, hidden_dim=128):
        super(AccentTransformer, self).__init__()
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=feature_dim, nhead=num_heads, dim_feedforward=hidden_dim),
            num_layers=num_layers
        )
        self.fc = nn.Linear(feature_dim, num_classes)  # 出力層
    
    def forward(self, x):
        # Transformerにデータを通す
        x = self.encoder(x.unsqueeze(1))  # バッチサイズ x 1 x 特徴量次元
        x = x.squeeze(1)  # バッチサイズ x 特徴量次元
        out = self.fc(x)
        return out

In [25]:
import torch.optim as optim
from tqdm import tqdm

# モデル定義
feature_dim = len(train_features[0])  # 特徴量の次元数
model = AccentTransformer(feature_dim)

# 最適化器と損失関数
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 学習ループ
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    # tqdm を使用して進捗バーを表示
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
        for features, labels in pbar:
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            # tqdm の表示を更新
            pbar.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

Epoch 1/10:   0%|          | 0/4420 [00:00<?, ?batch/s]

Epoch 1/10: 100%|██████████| 4420/4420 [01:52<00:00, 39.44batch/s, loss=0.617]


Epoch 1, Loss: 0.7007573174405421


Epoch 2/10: 100%|██████████| 4420/4420 [01:49<00:00, 40.34batch/s, loss=0.688]


Epoch 2, Loss: 0.6944000630626851


Epoch 3/10: 100%|██████████| 4420/4420 [01:39<00:00, 44.63batch/s, loss=0.681]


Epoch 3, Loss: 0.6936208206334266


Epoch 4/10: 100%|██████████| 4420/4420 [01:29<00:00, 49.14batch/s, loss=0.714]


Epoch 4, Loss: 0.6935250591233845


Epoch 5/10: 100%|██████████| 4420/4420 [01:28<00:00, 49.67batch/s, loss=0.691]


Epoch 5, Loss: 0.6932381208126361


Epoch 6/10: 100%|██████████| 4420/4420 [01:26<00:00, 51.30batch/s, loss=0.667]


Epoch 6, Loss: 0.6931737520171506


Epoch 7/10: 100%|██████████| 4420/4420 [01:25<00:00, 51.50batch/s, loss=0.698]


Epoch 7, Loss: 0.693161761086451


Epoch 8/10: 100%|██████████| 4420/4420 [01:24<00:00, 52.54batch/s, loss=0.697]


Epoch 8, Loss: 0.6931368832135093


Epoch 9/10: 100%|██████████| 4420/4420 [01:25<00:00, 51.93batch/s, loss=0.675]


Epoch 9, Loss: 0.693089071873626


Epoch 10/10: 100%|██████████| 4420/4420 [01:23<00:00, 52.97batch/s, loss=0.704]

Epoch 10, Loss: 0.6930611639136103





In [27]:
# ここから！！
import os
import random
import csv
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# ディレクトリ内のCSVファイルを取得
data_dir = "basic5000_features_with_accent"
csv_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".csv")]

# 学習データと評価データに分割（80%学習、20%評価）
random.shuffle(csv_files)
train_files = csv_files[:int(0.8 * len(csv_files))]
eval_files = csv_files[int(0.8 * len(csv_files)):]

print(f"Training files: {len(train_files)}")
print(f"Evaluation files: {len(eval_files)}")

Training files: 3980
Evaluation files: 996


In [31]:
def process_csv(file_path):
    """
    CSVファイルを読み込み、音素、特徴量、アクセントに分割
    """
    moras = []  # 音素リスト
    features = []  # 特徴量リスト
    accents = []  # アクセントリスト

    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)  # ヘッダー行をスキップ
        for row in reader:
            # 音素は1列目
            mora = row[0]
            # 特徴量は2列目から最後の1つ手前まで
            feature = list(map(float, row[1:-1]))
            # アクセントは最後の列
            accent = int(row[-1])

            # データを格納
            moras.append(mora)
            features.append(feature)
            accents.append(accent)
    
    return moras, features, accents

def load_data(file_list):
    """
    ファイルリストから特徴量とアクセントをまとめてロード
    """
    all_features = []
    all_accents = []

    for file_path in file_list:
        _, features, accents = process_csv(file_path)
        all_features.append(features)
        all_accents.append(accents)
    
    return all_features, all_accents

# データロード
train_features, train_accents = load_data(train_files)
eval_features, eval_accents = load_data(eval_files)

print(f"Training data: {len(train_features)} samples")
print(f"Evaluation data: {len(eval_features)} samples")

Training data: 3980 samples
Evaluation data: 996 samples


In [39]:
from torch.nn.utils.rnn import pad_sequence
import torch

# カスタムcollate_fn
def collate_fn(batch):
    features, accents = zip(*batch)
    
    # pad_sequenceでパディング
    features_padded = pad_sequence(features, batch_first=True, padding_value=0.0)  # バッチサイズ x 最大シーケンス長 x 特徴量次元
    
    # アクセントラベル
    accents = torch.stack(accents, dim=0)
    
    return features_padded, accents

# AccentDatasetクラス
class AccentDataset(Dataset):
    def __init__(self, features, accents):
        self.features = features
        self.accents = accents
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx], dtype=torch.float32)
        accent = torch.tensor(self.accents[idx], dtype=torch.long)
        return feature, accent

# データローダー作成（collate_fnを使用）
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

class AccentTransformer(nn.Module):
    def __init__(self, feature_dim, num_classes=2, num_heads=4, num_layers=2, hidden_dim=128):
        super(AccentTransformer, self).__init__()
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=feature_dim, nhead=num_heads, dim_feedforward=hidden_dim),
            num_layers=num_layers
        )
        self.fc = nn.Linear(feature_dim, num_classes)  # 出力層
    
    def forward(self, x):
        # x: バッチサイズ x 最大シーケンス長 x 特徴量次元
        src_key_padding_mask = (x == 0).all(dim=-1)  # パディング部分をTrueにする
        x = self.encoder(x, src_key_padding_mask=src_key_padding_mask)
        x = x.mean(dim=1)  # シーケンス全体の平均を取る
        out = self.fc(x)
        return out

# モデル定義
feature_dim = len(train_features[0][0])  # 特徴量の次元数
model = AccentTransformer(feature_dim)

# 最適化器と損失関数
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [40]:
# 学習ループ
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    # tqdm を使用して進捗バーを表示
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
        for features, labels in pbar:
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            # tqdm の表示を更新
            pbar.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

    # 評価データでの評価
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in eval_loader:
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}, Evaluation Accuracy: {accuracy:.2f}%")

# モデルの保存
torch.save(model.state_dict(), "accent_transformer_model.pth")
print("Model saved as 'accent_transformer_model.pth'")

# 学習が完了した後に、最終的な評価結果を出力
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in eval_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

final_accuracy = 100 * correct / total
print(f"Final Evaluation Accuracy: {final_accuracy:.2f}%")

Epoch 1/100:   0%|          | 0/125 [00:00<?, ?batch/s]


RuntimeError: stack expects each tensor to be equal size, but got [32] at entry 0 and [40] at entry 1

In [None]:
# 学習データの次元数が揃わないと学習できないっぽい！
# 次元を揃える必要がある
