In [29]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, confusion_matrix

from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

In [30]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [31]:
# Функция для загрузки данных
def load_data(train_csv, val_csv, test_csv):
    train_data = pd.read_csv(train_csv)
    val_data = pd.read_csv(val_csv)
    test_data = pd.read_csv(test_csv)

    # Используем только 360 признаков (координаты и скорости)
    X_train = train_data.iloc[:, :-3].values
    y_train = train_data['order0'].values

    X_val = val_data.iloc[:, :-3].values
    y_val = val_data['order0'].values

    X_test = test_data.values  # в тесте нет order0, только 360 признаков

    # Нормализация данных
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)

    return X_train, y_train, X_val, y_val, X_test

In [32]:
# Определим многослойный персептрон
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)  # Dropout для регуляризации
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)  # Dropout на скрытом слое
        out = self.fc2(out)
        return out

In [33]:
# Инициализация модели, критерия и оптимизатора
def init_model(input_size=360, hidden_size=256, output_size=3, lr=0.001):
    model = MLP(input_size, hidden_size, output_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    return model, criterion, optimizer

In [34]:
# Функция для оценки модели
def evaluate(model, X, y=None):
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X, dtype=torch.float32))
        _, predictions = torch.max(outputs, 1)

        if y is not None:
            accuracy = accuracy_score(y, predictions.numpy())
            conf_matrix = confusion_matrix(y, predictions.numpy())
            return predictions, accuracy, conf_matrix
        else:
            return predictions, None, None

In [35]:
# Функция обучения модели
def train(model, criterion, optimizer, X_train, y_train, X_val, y_val, epochs=10, batch_size=64):
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Оценка на валидационной выборке
        _, val_accuracy, _ = evaluate(model, X_val, y_val)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Val Accuracy: {val_accuracy:.4f}")

    return model

In [37]:
def main():
    train_csv = '/content/gdrive/MyDrive/ColabNotebooks/2024/train.txt'
    val_csv = '/content/gdrive/MyDrive/ColabNotebooks/2024/val.txt'
    test_csv = '/content/gdrive/MyDrive/ColabNotebooks/2024/test.txt'
    out_csv = '/content/gdrive/MyDrive/ColabNotebooks/2024/submission.csv'
    lr = 0.001
    batch_size = 64
    num_epoches = 10

    # Загрузка данных
    X_train, y_train, X_val, y_val, X_test = load_data(train_csv, val_csv, test_csv)

    # Инициализация модели
    model, criterion, optimizer = init_model(lr=lr)

    # Обучение модели
    trained_model = train(model, criterion, optimizer, X_train, y_train, X_val, y_val, epochs=num_epoches, batch_size=batch_size)

    # Предсказания на тестовом наборе
    predictions, _, _ = evaluate(trained_model, X_test, None)

    # Сохранение предсказаний в файл submission.csv
    pd.DataFrame(predictions.numpy(), columns=['order0']).to_csv(out_csv, index=False)

main()

Epoch 1/10, Loss: 0.8292, Val Accuracy: 0.7116
Epoch 2/10, Loss: 0.7751, Val Accuracy: 0.7356
Epoch 3/10, Loss: 0.7623, Val Accuracy: 0.7478
Epoch 4/10, Loss: 0.7549, Val Accuracy: 0.7311
Epoch 5/10, Loss: 0.7523, Val Accuracy: 0.7525
Epoch 6/10, Loss: 0.7485, Val Accuracy: 0.7418
Epoch 7/10, Loss: 0.7455, Val Accuracy: 0.7613
Epoch 8/10, Loss: 0.7427, Val Accuracy: 0.7511
Epoch 9/10, Loss: 0.7405, Val Accuracy: 0.7429
Epoch 10/10, Loss: 0.7395, Val Accuracy: 0.7488
