# Настройка обучения

In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
from networks import Net
from data_functions import TitanicDataset, get_loaders
from metrics_functions import compute_binary_accuracy, validation_loss

if torch.cuda.is_available():
    dev = 'cuda:0'
else:
    dev = 'cpu'
device = torch.device(dev)

model = Net()
model.to(device)
model.type(torch.cuda.FloatTensor)

# Hyper Params
num_epochs = 3000
batch_size = 32
learning_rate = 1e-2
weight_decay = 1e-2
validation_split = .1

# Загрузка данных
data_train = TitanicDataset(normalize=True)
train_loader, val_loader = get_loaders(batch_size=batch_size, data_train=data_train, validation_split=validation_split)

# Loss Function, Optimizer
loss = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# LR Annealing
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=.6, patience=100)


# Обучение

In [2]:
from tqdm import tqdm
import neptune
import configparser

config = configparser.ConfigParser()
config.read('config.ini')

run = neptune.init_run(
    project="pas-zhukov/Titanic-Kaggle",
    api_token=config['Config']['api_token'],
    source_files=['networks.py', 'learning.ipynb', 'metrics_functions.py', 'data_functions.py']
)
params = {
    'num_epochs': num_epochs,
    'batch_size': batch_size,
    'learning_rate': learning_rate,
    'weight_decay': weight_decay,
    'validation_split': validation_split,
    'optimizer': 'Adam',
    'annealing_factor': .8
}
run["parameters"] = params

loss_history = []
val_loss_history = []
train_history = []
val_history = []
lr_history = []

for epoch in tqdm(range(num_epochs)):
    model.train()

    loss_accum = 0
    correct_samples = 0
    total_samples = 0
    for i_step, (x, y) in enumerate(train_loader):

        run['train/batch/lr'].append(scheduler.optimizer.param_groups[0]['lr'])
        x = x.to(device)
        y = y.to(device)

        prediction = model(x)
        loss_value = loss(prediction, y)
        run['train/batch/loss'].append(loss)
        optimizer.zero_grad()
        loss_value.backward()
        # Обновляем веса
        optimizer.step()

        # Определяем индексы, соответствующие выбранным моделью лейблам
        _, indices = torch.max(prediction, dim=1)
        # Сравниваем с ground truth, сохраняем количество правильных ответов
        correct_samples += torch.sum(indices == y)
        # Сохраняем количество всех предсказаний
        total_samples += y.shape[0]
        run['train/batch/acc'].append(correct_samples/total_samples)

        loss_accum += loss_value

    # Среднее значение функции потерь за эпоху
    ave_loss = loss_accum / (i_step + 1)
    # Рассчитываем точность тренировочных данных на эпохе
    train_accuracy = float(correct_samples) / total_samples
    # Рассчитываем точность на валидационной выборке (вообще после этого надо бы гиперпараметры поподбирать...)
    val_accuracy = compute_binary_accuracy(model, val_loader)

    # Сохраняем значения ф-ии потерь и точности для последующего анализа и построения графиков
    loss_history.append(float(ave_loss))
    train_history.append(train_accuracy)
    val_history.append(val_accuracy)

    #Посчитаем лосс на валидационной выборке
    val_loss = validation_loss(model, val_loader, loss)
    val_loss_history.append(val_loss)

    run['train/epoch/loss'].append(ave_loss)
    run['valid/epoch/loss'].append(val_loss)
    run['train/epoch/acc'].append(train_accuracy)
    run['valid/epoch/acc'].append(val_accuracy)
    run['train/epoch/lr'].append(scheduler.optimizer.param_groups[0]['lr'])

    lr_history.append(scheduler.optimizer.param_groups[0]['lr'])
    # Уменьшаем лернинг рейт (annealing)
    scheduler.step(val_loss)

print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" % (ave_loss, train_accuracy, val_accuracy))

  run = neptune.init_run(


https://new-ui.neptune.ai/pas-zhukov/Titanic-Kaggle/e/TIT-13


  x = F.softmax(x)
/tmp/ipykernel_4168848/3515371177.py:44: NeptuneUnsupportedType: You're attempting to log a type that is not directly supported by Neptune (<class 'list'>).
        Convert the value to a supported type, such as a string or float, or use stringify_unsupported(obj)
        for dictionaries or collections that contain unsupported values.
        For more, see https://docs.neptune.ai/help/value_of_unsupported_type
  run['train/batch/loss'].append(loss)
 66%|██████▋   | 1995/3000 [12:59<06:32,  2.56it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 7.92 GiB total capacity; 2.11 GiB already allocated; 44.44 MiB free; 2.16 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# Генерация предсказаний для тестовой выборки

In [3]:
import torch
import os
import datetime
import pandas as pd
from torch.utils.data import DataLoader

test_set = TitanicDataset(test=True, normalize=True)
test_loader = DataLoader(test_set, batch_size=1)

predictions = []
labels = {}

for i_step, (x, y) in enumerate(test_loader):
    x = x.to(device)
    model.eval()
    prediction = model(x)
    predictions.append(torch.argmax(prediction, dim=1))
    labels[i_step+892] = int(torch.argmax(prediction, dim=1))


output = pd.DataFrame(labels.items(), columns = ['PassengerId', 'Survived'])
output.to_csv(os.path.join('outputs/output'+datetime.datetime.now().strftime('%d%m%y%H%M')+'.csv'), index=False)
output

  x = F.softmax(x)


Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0
