In [49]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error


In [50]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
features_data = pd.read_csv("features.csv")
# Выделение координат из train и test данных
train_coordinates = train_data[['lat', 'lon']].values
test_coordinates = test_data[['lat', 'lon']].values
# Обучение модели ближайших соседей на основе координат
nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(features_data[['lat', 'lon']])
# Поиск ближайших соседей для train и test данных
_, train_indices = nbrs.kneighbors(train_coordinates)
_, test_indices = nbrs.kneighbors(test_coordinates)
# Соединение признаков из features с train и test данными
train_features = features_data.iloc[train_indices.flatten()].reset_index(drop=True)
test_features = features_data.iloc[test_indices.flatten()].reset_index(drop=True)
# Добавление признаков к исходным данным
train_data = pd.concat([train_data, train_features.iloc[:, 2:]], axis=1)
test_data = pd.concat([test_data, test_features.iloc[:, 2:]], axis=1)  # Исключаем координаты из признаков тестового набора
# Сохранение новых данных в CSV файлы
train_data.to_csv("train_with_features.csv", index=False)
test_data.to_csv("test_with_features.csv", index=False)



In [51]:
# Загрузка данных
train_data = pd.read_csv("train_with_features.csv")

# Предобработка данных
scaler = StandardScaler()
X = scaler.fit_transform(train_data.drop(columns=['id', 'score']))
y = train_data['score'].values

# Разделение данных на тренировочную и валидационную выборки с сидированием
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Преобразование данных в тензоры PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

# Установка сида для воспроизводимости результатов
torch.manual_seed(42)

# Определение архитектуры нейронной сети
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 1)
        self.dropou=nn.Dropout(0.25)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x= self.dropou(x)
        x = torch.relu(self.fc2(x))
        x= self.dropou(x)
        x = torch.relu(self.fc3(x))
        x= self.dropou(x)
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# Инициализация модели
input_size = X_train.shape[1]
model = Net(input_size)

# Определение функции потерь и оптимизатора
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

# Обучение модели с early stopping
num_epochs = 100
batch_size = 64
early_stopping_epochs = 10  # количество эпох для early stopping
best_val_loss = float('inf')
patience = 0

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

for epoch in range(num_epochs):
    train_loss = 0.0
    val_loss = 0.0
    val_mae = 0.0
    train_mae = 0.0

    # Обучение
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * len(inputs)
        train_mae += mean_absolute_error(targets.numpy(), outputs.squeeze().detach().numpy()) * len(inputs)
    train_loss /= len(train_loader.dataset)
    train_mae /= len(train_loader.dataset)

    # Валидация
    model.eval()
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            val_loss += loss.item() * len(inputs)
            val_mae += mean_absolute_error(targets.numpy(), outputs.squeeze().detach().numpy()) * len(inputs)
        val_loss /= len(val_loader.dataset)
        val_mae /= len(val_loader.dataset)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train MAE: {train_mae:.4f}, Val Loss: {val_loss:.4f}, Val MAE: {val_mae:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience = 0
    else:
        patience += 1
        if patience >= early_stopping_epochs:
            print(f"Early stopping on epoch {epoch+1}")
            break

# Сохранение обученной модели
torch.save(model, "trained_model.pth")
print("Обученная модель сохранена в файл trained_model.pth")


Epoch 1/100, Train Loss: 0.0157, Train MAE: 0.0777, Val Loss: 0.0111, Val MAE: 0.0588
Epoch 2/100, Train Loss: 0.0131, Train MAE: 0.0710, Val Loss: 0.0103, Val MAE: 0.0667
Epoch 3/100, Train Loss: 0.0126, Train MAE: 0.0697, Val Loss: 0.0101, Val MAE: 0.0589
Epoch 4/100, Train Loss: 0.0119, Train MAE: 0.0668, Val Loss: 0.0105, Val MAE: 0.0572
Epoch 5/100, Train Loss: 0.0117, Train MAE: 0.0657, Val Loss: 0.0112, Val MAE: 0.0548
Epoch 6/100, Train Loss: 0.0114, Train MAE: 0.0644, Val Loss: 0.0104, Val MAE: 0.0678
Epoch 7/100, Train Loss: 0.0107, Train MAE: 0.0657, Val Loss: 0.0097, Val MAE: 0.0641
Epoch 8/100, Train Loss: 0.0098, Train MAE: 0.0607, Val Loss: 0.0097, Val MAE: 0.0616
Epoch 9/100, Train Loss: 0.0107, Train MAE: 0.0638, Val Loss: 0.0098, Val MAE: 0.0572
Epoch 10/100, Train Loss: 0.0099, Train MAE: 0.0620, Val Loss: 0.0091, Val MAE: 0.0577
Epoch 11/100, Train Loss: 0.0089, Train MAE: 0.0592, Val Loss: 0.0100, Val MAE: 0.0609
Epoch 12/100, Train Loss: 0.0092, Train MAE: 0.0581,

In [52]:
import torch
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader

# Загрузка тестовых данных
test_data = pd.read_csv("test_with_features.csv")

# Предобработка данных
scaler = StandardScaler()
X_test = scaler.fit_transform(test_data.drop(columns=['id']))

# Преобразование данных в тензоры PyTorch
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Загрузка обученной модели
model = torch.load("trained_model.pth")

# Оценка модели
with torch.no_grad():
    model.eval()
    predictions = model(X_test_tensor).squeeze().numpy()

# Сохранение предсказаний в файл
test_data['score'] = predictions
test_data[['id', 'score']].to_csv("submission.csv", index=False)

print("Предсказания сохранены в файл predictions.csv")

Предсказания сохранены в файл predictions.csv
