In [1]:
import torch

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
num_epochs = 50
k_folds = 4
img_dir = 'hard_mode_time:60'
workspace = 'exp-4-try'
project_name='60-1fr'
img_size = 600
n_trials = 500
filename = '6000'

In [2]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from tqdm import tqdm  # Импортируем tqdm для отображения прогресс-бара

import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from tqdm import tqdm


import torch
from torch.utils.data import Dataset

class CirclesSequenceDataset(Dataset):
    def __init__(self, img_dir, img_size, transform=None, num_images=4375, discret=7, sequence_length=1, prediction_length=35):
        self.img_dir = img_dir
        self.transform = transform or transforms.ToTensor()
        self.img_size = img_size
        self.discret = discret
        self.sequence_length = sequence_length
        self.prediction_length = prediction_length

        all_imgs = sorted(os.listdir(img_dir), key=lambda x: int(x.split('_')[0]))
        if num_images:
            self.imgs = all_imgs[6:num_images:self.discret]
        else:
            self.imgs = all_imgs[6::self.discret]

        if len(self.imgs) < self.sequence_length + self.prediction_length:
            raise ValueError(f"Недостаточно изображений для создания последовательностей (минимум {self.sequence_length + self.prediction_length} изображений).")

        self.cached_images = [None] * len(self.imgs)

        print("Кэширование изображений:")
        for idx in tqdm(range(len(self.imgs)), desc="Прогресс кэширования"):
            self._load_image(idx)

    def _load_image(self, idx):
        if self.cached_images[idx] is None:
            img_path = os.path.join(self.img_dir, self.imgs[idx])
            image = Image.open(img_path).convert("L")
            self.cached_images[idx] = self.transform(image)
        return self.cached_images[idx]

    def __len__(self):
        return len(self.imgs) - self.sequence_length - self.prediction_length + 1

    def __getitem__(self, idx):
        if idx + self.sequence_length + self.prediction_length > len(self.imgs):
            raise IndexError("Индекс выходит за пределы списка изображений.")

        # Получаем последовательность изображений
        imgs_sequence = [self._load_image(idx + i) for i in range(self.sequence_length)]
        imgs_sequence = torch.stack(imgs_sequence)

        # Собираем координаты меток в один тензор
        coords = []
        for i in range(self.prediction_length):
            label_img_name = self.imgs[idx + self.sequence_length + i]
            coord = label_img_name.split('.')[0].split('_')[1:]
            coords.extend([float(c) / self.img_size for c in coord])

        coords_tensor = torch.tensor(coords, dtype=torch.float32)

        return imgs_sequence, coords_tensor

In [3]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import timm
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
from torch.cuda.amp import GradScaler, autocast

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),  
])

In [4]:
from torch.utils.data import DataLoader, random_split

def get_data_loaders(dataset, batch_size, train_size=0.7):
    train_len = int(len(dataset) * train_size)
    val_len = len(dataset) - train_len

    # Разделение на тренировочную и валидационную выборки
    train_dataset, val_dataset = random_split(dataset, [train_len, val_len])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, 
                          num_workers=40, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, 
                          num_workers=40, pin_memory=True)

    print(f"Размер тренировочной выборки: {len(train_dataset)}")
    print(f"Размер валидационной выборки: {len(val_dataset)}")
    
    return train_loader, val_loader

dataset = CirclesSequenceDataset(transform = transform, img_dir=img_dir, img_size=img_size)


print(dataset[1])

Кэширование изображений:


Прогресс кэширования: 100%|███████████████████| 625/625 [01:22<00:00,  7.60it/s]

(tensor([[[[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]]]]), tensor([0.2950, 0.2900, 0.5983, 0.3333, 0.2417, 0.5750, 0.3000, 0.3000, 0.6100,
        0.3317, 0.2367, 0.5650, 0.3067, 0.3100, 0.6217, 0.3300, 0.2333, 0.5550,
        0.3150, 0.3183, 0.6333, 0.3283, 0.2283, 0.5433, 0.3250, 0.3250, 0.6433,
        0.3300, 0.2267, 0.5317, 0.3333, 0.3317, 0.6533, 0.3367, 0.2283, 0.5217,
        0.3433, 0.3400, 0.6600, 0.3450, 0.2333, 0.5100, 0.3483, 0.3483, 0.6683,
        0.3550, 0.2433, 0.5067, 0.3467, 0.3600, 0.6750, 0.3633, 0.2517, 0.4983,
        0.3433, 0.3717, 0.6850, 0.3683, 0.2583, 0.4883, 0.3450, 0.3833, 0.6967,
        0.3717, 0.2650, 0.4783, 0.3467, 0.3950, 0.7067, 0.3767, 0.2717, 0.4700,
        0.3533, 0.4000, 0.7183, 0.3800, 0.2683, 0.46




In [None]:
import comet_ml  # Подключаем Comet.ml для отслеживания экспериментов
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
import optuna
from optuna import Trial
from tqdm import tqdm
from thop import profile  # Для подсчета FLOPs
import numpy as np
from sklearn.model_selection import KFold
import comet_ml
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
import numpy as np
import optuna
from sklearn.model_selection import KFold
import gc
from sklearn.model_selection import ShuffleSplit
import torch
from torch.cuda.amp import autocast, GradScaler
import torch.profiler
import torch
import torch.profiler
from tqdm import tqdm
import time
import torch
from torch.cuda.amp import autocast, GradScaler
import gc
from tqdm import tqdm


# Функция для освобождения памяти
def free_memory():
    torch.cuda.empty_cache()  # Очистка неиспользуемой памяти GPU
    torch.cuda.ipc_collect()


# Лог-файл для записи результатов
log_file_path = "600_normal_4.txt"

def log_to_file(message):
    with open(log_file_path, "a") as log_file:  # Перезапись файла
        log_file.write(message + "\n")

class EncDec(nn.Module):
    def __init__(self, filters_1, filters_2, hidden_size, num_layers, dropout_rate, kernel_size_1, kernel_size_2, 
                 stride_1, stride_2, padding_1, padding_2, use_second_conv):
        super(EncDec, self).__init__()
        
        self.Conv2d_1 = nn.Conv2d(in_channels=1, out_channels=filters_1, kernel_size=kernel_size_1, stride=stride_1, padding=padding_1)
        self.BatchNorm_1 = nn.BatchNorm2d(filters_1)
        self.MaxPool_1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.High = (img_size - kernel_size_1 + 2 * padding_1) // stride_1 + 1
        self.High = (self.High - 2) // 2 + 1  # После MaxPool

        self.use_second_conv = use_second_conv
        if use_second_conv:
            self.Conv2d_2 = nn.Conv2d(in_channels=filters_1, out_channels=filters_2, kernel_size=kernel_size_2, stride=stride_2, padding=padding_2)
            self.BatchNorm_2 = nn.BatchNorm2d(filters_2)
            self.MaxPool_2 = nn.MaxPool2d(kernel_size=2, stride=2)

            self.High = (self.High - kernel_size_2 + 2 * padding_2) // stride_2 + 1
            self.High = (self.High - 2) // 2 + 1 


        conv_output_size = filters_2 if use_second_conv else filters_1
        self.LSTM = nn.LSTM(conv_output_size * self.High * self.High, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 210)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        batch_size = x.size(0)
        seq_len = x.size(1)
        
        x = x.permute(0, 2, 1, 3, 4)
        
        conv2d_outputs = []
        for i in range(seq_len):

            conv2d_out = torch.relu(self.BatchNorm_1(self.Conv2d_1(x[:, :, i, :, :])))
            conv2d_out = self.MaxPool_1(conv2d_out)

        
            if self.use_second_conv:
                conv2d_out = torch.relu(self.BatchNorm_2(self.Conv2d_2(conv2d_out)))
                conv2d_out = self.MaxPool_2(conv2d_out)
        
            conv2d_outputs.append(conv2d_out.unsqueeze(1))
        
        conv_outputs = torch.cat(conv2d_outputs, dim=1)
        
        z = conv_outputs.reshape(batch_size, seq_len, -1)
        
        output, (hidden, cell) = self.LSTM(z)
        
        pred = self.fc(output[:, -1, :])
        
        return self.sigmoid(pred)


import torch
from torch.cuda.amp import autocast, GradScaler
import gc
from tqdm import tqdm

import torch
from torch.cuda.amp import autocast, GradScaler
import gc
from tqdm import tqdm

def train_or_evaluate(model, loader, criterion, optimizer=None):

    model = model.to(device)
    is_train = optimizer is not None
    if is_train:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    predictions, ground_truths = [], []

    scaler = GradScaler()  # Инициализация GradScaler для масштабирования градиентов

    for imgs_sequence, coords_tensor in tqdm(loader, leave=False):
        imgs_sequence = imgs_sequence.to(device, non_blocking=True)
        coords_tensor = coords_tensor.to(device, non_blocking=True)

        with autocast():  # Включение смешанной точности
            with torch.set_grad_enabled(is_train):
                outputs = model(imgs_sequence)

                # Проверка соответствия размеров outputs и coords_tensor
                if outputs.size() != coords_tensor.size():
                    raise ValueError(f"Размеры outputs {outputs.size()} и coords {coords_tensor.size()} не совпадают.")

                # Вычисляем функцию потерь
                loss = criterion(outputs, coords_tensor)

        if is_train:
            optimizer.zero_grad()
            scaler.scale(loss).backward()  # Масштабирование градиентов
            scaler.step(optimizer)  # Обновление параметров
            scaler.update()  # Обновление масштабирования

        # Суммируем потери для каждого батча
        total_loss += loss.item() * imgs_sequence.size(0)
        predictions.extend(outputs.detach().cpu().numpy())
        ground_truths.extend(coords_tensor.detach().cpu().numpy())

    # Управление памятью
    del imgs_sequence, coords_tensor, outputs, loss
    torch.cuda.empty_cache()
    gc.collect()

    return total_loss / len(loader.dataset), predictions, ground_truths


def calculate_flops(model, input_size):
    
    inputs = torch.randn(input_size).to(device)
    flops, params = profile(model, inputs=(inputs,), verbose=False)
    return flops

def objective(trial: optuna.Trial, n_splits=2, img_size=600):
    """K-Fold кросс-валидация с усреднением всех метрик по фолдам."""
    # Гиперпараметры для текущего trial
    use_second_conv = trial.suggest_categorical('use_second_conv', [True, False])
    batch_size = trial.suggest_categorical('batch_size', [4, 6, 8, 10, 16])
    filters_1 = trial.suggest_int('filters_1', 2, 16)
    filters_2 = trial.suggest_int('filters_2', 2, 32) if use_second_conv else filters_1
    hidden_size = trial.suggest_int('hidden_size', 4, 256)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    kernel_size_1 = trial.suggest_int('kernel_size_1', 3, 5)
    kernel_size_2 = trial.suggest_int('kernel_size_2', 3, 5)
    stride_1 = trial.suggest_int('stride_1', 1, 2)
    stride_2 = trial.suggest_int('stride_2', 1, 2)
    padding_1 = trial.suggest_int('padding_1', 1, 2)
    padding_2 = trial.suggest_int('padding_2', 1, 2)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)

    # Инициализация K-Fold кросс-валидации
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    model = EncDec(
        filters_1=filters_1, filters_2=filters_2, hidden_size=hidden_size,
        num_layers=num_layers, dropout_rate=dropout_rate,
        kernel_size_1=kernel_size_1, kernel_size_2=kernel_size_2,
        stride_1=stride_1, stride_2=stride_2,
        padding_1=padding_1, padding_2=padding_2,
        use_second_conv=use_second_conv
    ).to(device)

    # Массивы для хранения метрик по всем фолдам
    all_train_losses, all_val_losses = [], []
    all_r2_scores, all_mae_scores, all_mape_scores = [], [], []

    for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):
        # Разделяем данные на обучающую и валидационную выборки
        train_subset = torch.utils.data.Subset(dataset, train_idx)
        val_subset = torch.utils.data.Subset(dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=20, pin_memory=True, persistent_workers=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, num_workers=20, pin_memory=True, persistent_workers=True)

        optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
        criterion = nn.MSELoss()

        for epoch in range(num_epochs):
            try:
                # Обучение
                train_loss, train_preds, train_gt = train_or_evaluate(model, train_loader, criterion, optimizer)

                # Валидация (на последней эпохе)
                if epoch == num_epochs - 1:
                    val_loss, val_preds, val_gt = train_or_evaluate(model, val_loader, criterion)
                    mae_val = mean_absolute_error(val_gt, val_preds)
                    r2_val = r2_score(val_gt, val_preds)
                    mape_val = mean_absolute_percentage_error(val_gt, val_preds)

                    # Сохраняем метрики для текущего фолда
                    all_train_losses.append(train_loss)
                    all_val_losses.append(val_loss)
                    all_r2_scores.append(r2_val)
                    all_mae_scores.append(mae_val)
                    all_mape_scores.append(mape_val)

                    del val_preds, val_gt

                free_memory()
                del train_preds, train_gt
                gc.collect()
                torch.cuda.empty_cache()

            except torch.cuda.OutOfMemoryError as e:
                print(f"Ошибка переполнения памяти: {str(e)}")
                torch.cuda.empty_cache()
                gc.collect()
                raise optuna.exceptions.TrialPruned()

    # Усреднение метрик по всем фолдам
    mean_train_loss = np.mean(all_train_losses)
    mean_val_loss = np.mean(all_val_losses)
    mean_r2 = np.mean(all_r2_scores)
    mean_mae = np.mean(all_mae_scores)
    mean_mape = np.mean(all_mape_scores)

    gc.collect()
    torch.cuda.empty_cache()
    free_memory()

    # Логируем метрики (например, в Comet.ml)
    experiment = comet_ml.Experiment(api_key="VHqjhRzLpPJbb986xCh3V3ei2", project_name=project_name, workspace=workspace)
    experiment.log_parameters(trial.params)
    experiment.log_metric("mean_train_loss", mean_train_loss)
    experiment.log_metric("mean_val_loss", mean_val_loss)
    experiment.log_metric("mean_r2", mean_r2)
    experiment.log_metric("mean_mae", mean_mae)
    experiment.log_metric("mean_mape", mean_mape)
    flops = calculate_flops(model, (batch_size, 1, 1, 600, 600))
    experiment.log_metric("flops", flops, epoch=fold)
    experiment.end()
    experiment.end()

    return mean_mape, flops


study = optuna.create_study(directions=['minimize', 'minimize'], pruner=optuna.pruners.NopPruner())
study.optimize(objective, n_trials=n_trials)
torch.cuda.set_per_process_memory_fraction(0.98) 

best_trials = study.best_trials

with open(f'{filename}', 'a') as f:
    for trial in best_trials:
        f.write(f"Лучшие параметры для trial {trial.number}:\n")
        for key, value in trial.params.items():
            f.write(f"{key}: {value}\n")
    
        f.write(f"\nЛучшие метрики для trial {trial.number}:\n")
        f.write(f"R² на валидации: {trial.values[0]}:\n")

[I 2024-11-17 22:03:26,538] A new study created in memory with name: no-name-af6d3a30-bf16-4a42-9c31-479e85db0ac2
  scaler = GradScaler()  # Инициализация GradScaler для масштабирования градиентов
  with autocast():  # Включение смешанной точности
  scaler = GradScaler()  # Инициализация GradScaler для масштабирования градиентов
  with autocast():  # Включение смешанной точности
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/exp-4-try/60-1fr/19c76dee765042f69f24cca47228ea51

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/home/maindev/new_life/for_server_gen' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1