In [1]:
import pandas as pd
import numpy as np

In [None]:
mst = pd.read_csv('mst_data/mst_game.csv')
df = pd.DataFrame()
for i, row in mst.iterrows():
    _df = pd.read_csv(row['csv_file'])
    _df['file_id'] = i
    df = pd.concat([df, _df])
# round当たりのcountが500以上と200以下のものを削除
df = df.groupby(['file_id', 'round']).filter(lambda x: x['round'].count() > 200 and x['round'].count() < 500)

# countを350に統一
df = df.groupby(['file_id', 'round']).tail(200)

# np.arrayに変換
grouped_df = df.groupby(['file_id', 'round'])
grouped_arrays = {name: group.drop(columns=['round', 'timestamp', 'file_id']).values for name, group in grouped_df}
grouped_arrays

In [3]:
X = np.array(list(grouped_arrays.values()))
Y = np.array([mst.loc[file]['player'] for file, round in list(grouped_arrays.keys())])
X.shape, Y.shape

((105, 200, 99), (105,))

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

n_samples = 105
time_steps = 200
n_features = 99

# Tensorに変換
X_tensor = torch.tensor(X).to(torch.float32)
y_tensor = torch.tensor(Y).to(torch.float32)
dataset = TensorDataset(X_tensor, y_tensor)

# データセットの分割
train_size = int(0.7 * n_samples)
val_size = int(0.15 * n_samples)
test_size = n_samples - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# LSTMモデルの定義
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        h_lstm, _ = self.lstm(x)
        h_lstm_last = h_lstm[:, -1, :]  # 最後のタイムステップの出力を使用
        out = self.fc(h_lstm_last)
        return torch.sigmoid(out)

# ハイパーパラメータの設定
input_dim = n_features
hidden_dim = 50
output_dim = 1
num_epochs = 20

# モデル、損失関数、最適化手法の定義
model = LSTMClassifier(input_dim, hidden_dim, output_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_losses = []
val_losses = []

# モデルの学習
for epoch in range(num_epochs):
    # 訓練モード
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        # 順伝播
        outputs = model(X_batch)
        outputs = outputs.squeeze(1)  # バッチ次元を維持しながら1次元を削除
        loss = criterion(outputs, y_batch)
        
        # 逆伝播と最適化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * X_batch.size(0)
    
    train_loss /= len(train_loader.dataset)
    train_losses.append(train_loss)
    
    # 検証モード
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            outputs = outputs.squeeze(1)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item() * X_batch.size(0)
            predicted = (outputs > 0.5).float()
            accuracy = (predicted == y_batch).float().mean()
    
    val_loss /= len(val_loader.dataset)
    val_losses.append(val_loss)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}')

# モデルの評価（テスト）
model.eval()
test_loss = 0.0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        outputs = outputs.squeeze(1)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item() * X_batch.size(0)
        predicted = (outputs > 0.5).float()
        accuracy = (predicted == y_batch).float().mean()

test_loss /= len(test_loader.dataset)
print(f'Test Loss: {test_loss:.4f} Accuracy: {accuracy:.4f}')


Epoch [1/20], Train Loss: 0.6741, Val Loss: 0.6091, Accuracy: 0.7143
Epoch [2/20], Train Loss: 0.6522, Val Loss: 0.6191, Accuracy: 0.7143
Epoch [3/20], Train Loss: 0.6443, Val Loss: 0.6184, Accuracy: 0.7143
Epoch [4/20], Train Loss: 0.6302, Val Loss: 0.5967, Accuracy: 0.7143
Epoch [5/20], Train Loss: 0.5981, Val Loss: 0.5689, Accuracy: 0.7143
Epoch [6/20], Train Loss: 0.5217, Val Loss: 0.4696, Accuracy: 1.0000
Epoch [7/20], Train Loss: 0.3420, Val Loss: 0.6226, Accuracy: 0.7143
Epoch [8/20], Train Loss: 0.3653, Val Loss: 0.5770, Accuracy: 0.7143
Epoch [9/20], Train Loss: 0.2995, Val Loss: 0.2896, Accuracy: 0.7143
Epoch [10/20], Train Loss: 0.2077, Val Loss: 0.2188, Accuracy: 1.0000
Epoch [11/20], Train Loss: 0.1465, Val Loss: 0.8239, Accuracy: 0.7143
Epoch [12/20], Train Loss: 0.3664, Val Loss: 0.3953, Accuracy: 0.8571
Epoch [13/20], Train Loss: 0.2171, Val Loss: 0.2287, Accuracy: 1.0000
Epoch [14/20], Train Loss: 0.1674, Val Loss: 0.1559, Accuracy: 1.0000
Epoch [15/20], Train Loss: 0.

In [8]:
import plotly.graph_objects as go

# 学習曲線のプロット
fig = go.Figure()

# トレーニング損失の追加
fig.add_trace(go.Scatter(
    x=list(range(1, num_epochs + 1)),
    y=train_losses,
    mode='lines+markers',
    name='Train Loss'
))

# 検証損失の追加
fig.add_trace(go.Scatter(
    x=list(range(1, num_epochs + 1)),
    y=val_losses,
    mode='lines+markers',
    name='Validation Loss'
))

# レイアウトの設定
fig.update_layout(
    title='Training and Validation Loss',
    xaxis_title='Epoch',
    yaxis_title='Loss',
    template='plotly_dark',
    width=600,
    height=500
)

# プロットの表示
fig.show()