In [1]:
import os
import cv2
import json
import torch
from math import inf
from posixpath import defpath
import wandb
import optuna
import shutil
import logging
import numpy as np
import torchvision
import torch.optim as optim
from torchvision.ops import nms
import torchvision.ops as ops
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import nn
from tqdm import tqdm
from PIL import Image
from collections import Counter
from torchsummary import summary
from sklearn.metrics import f1_score,accuracy_score,precision_score,recall_score,average_precision_score
from typing import Dict, Any, Optional
from torchvision import transforms , models
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

src = "/content/drive/MyDrive/kitti2012"
dst = "/content"
os.makedirs(dst, exist_ok=True)

!cp -r "$src" "$dst"

#src = "/content/drive/MyDrive/vkitti_sample (1)"
#dst = "/content"
#os.makedirs(dst, exist_ok=True)

#!cp -r "$src" "$dst"

wandb.login()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
class KITTI_Dataset(Dataset):
    def __init__(self, data_path, transform=None, mode='train'):
        self.data_path = data_path
        self.transform = transform
        self.mode = mode  # 'train', 'val', veya 'test'
        self.classes = ['Car', 'Van', 'Truck', 'Pedestrian', 'Cyclist', 'Tram', 'Misc', 'DontCare']
        self.class_map = {cls: idx for idx, cls in enumerate(self.classes)}
        self.data = []

        # KITTI görsel boyutları - normalizasyon için
        self.img_width = 1242
        self.img_height = 375
        self.max_depth = 80.0  # KITTI max derinlik

        image_dir = os.path.join(data_path, 'training', 'colored_0')
        label_dir = os.path.join(data_path, 'training', 'label_2')
        disp_dir = os.path.join(data_path, 'training', 'disp_noc')

        file_names = os.listdir(image_dir)
        for fname in file_names:
            if fname.endswith('.png'):
                scene_id = fname.split('_')[0]
                img_path = os.path.join(image_dir, fname)
                label_path = os.path.join(label_dir, f'{scene_id}.txt')
                disp_path = os.path.join(disp_dir, f'{scene_id}_10.png')
                if os.path.exists(label_path) and os.path.exists(disp_path):
                    self.data.append((img_path, label_path, disp_path))

    def __len__(self):
        return len(self.data)

    def convert_labels(self, label_file):

        labels = []
        with open(label_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                category = parts[0]
                if category in self.class_map :
                    x1, y1, x2, y2 = map(float, parts[4:8]) # 4-7.etiket değerleri x1,y1,x2,y2 ye denk geliyor

                    # [0,1] aralığına normalize et
                    x1_norm = x1 / self.img_width
                    y1_norm = y1 / self.img_height
                    x2_norm = x2 / self.img_width
                    y2_norm = y2 / self.img_height

                    category_num = self.class_map[category]
                    labels.append([category_num, x1_norm, y1_norm, x2_norm, y2_norm])
        return labels

    def load_disparity(self, disp_path):#konum haritasının olduğu image yüklenir
        disp_map = cv2.imread(disp_path, cv2.IMREAD_UNCHANGED) / 256.0
        return disp_map

    def calculate_depth(self, disp_map): # konum haritasından derinlik haritası çıkartır
        baseline = 0.54 # metre cinsinden kameralar arası mesafe
        focal_length = 721.5377 # piksel cinsinden kamera odak uzaklığı
        depth = (baseline * focal_length) / (disp_map + 1e-6)
        depth = np.clip(depth, 0, self.max_depth)

        # [0,1] aralığına normalizasyon
        normalized_depth = depth / self.max_depth
        return normalized_depth

    def get_depth_at_box(self, depth_map, x, y, w, h):#her nesnenin ortalama mesafesi için box içerisindeki merkez piksel depthi alınır

        x_pixel = int(x * self.img_width)
        y_pixel = int(y * self.img_height)

        # Sınır kontrolü
        x_pixel = np.clip(x_pixel, 0, self.img_width - 1)
        y_pixel = np.clip(y_pixel, 0, self.img_height - 1)

        if depth_map[y_pixel, x_pixel] == 0:
            return 0.0
        return depth_map[y_pixel, x_pixel]

    def get_disparity_at_box(self, disp_map, x, y, w, h):#her nesnenin box içerisindeki konum değerini hesaplar


        x_pixel = int(x * self.img_width)
        y_pixel = int(y * self.img_height)

        # Sınır kontrolü
        x_pixel = np.clip(x_pixel, 0, self.img_width - 1)
        y_pixel = np.clip(y_pixel, 0, self.img_height - 1)

        if disp_map[y_pixel, x_pixel] == 0:
            return 0.0

        # Disparite değerini de normalize et (max disparite ~300 civarı)
        max_disparity = 300.0
        normalized_disparity = disp_map[y_pixel, x_pixel] / max_disparity
        return np.clip(normalized_disparity, 0, 1)

    def __getitem__(self, idx):
        img_path, label_path, disp_path = self.data[idx]
        image = Image.open(img_path).convert('RGB')
        labels = self.convert_labels(label_path)
        disp_map = self.load_disparity(disp_path)
        #eğitimde doğrudan konum değerleri ile kayıp hesaplanırken test aşamasında direkt mesafe hesaplanabilir

        if self.mode != 'test':
            # Eğitim , Doğrulama: Normalize edilmiş disparite ile etiket döndür
            labels_with_disparity = []
            for label in labels:
                category_num, x, y, w, h = label
                disparity = self.get_disparity_at_box(disp_map, x, y, w, h)
                labels_with_disparity.append([category_num, x, y, w, h, disparity])
            output_labels = labels_with_disparity
        else:
            # Test: Normalize edilmiş derinlik ile etiket döndür
            depth_map = self.calculate_depth(disp_map)
            labels_with_depth = []
            for label in labels:
                category_num, x, y, w, h = label
                depth = self.get_depth_at_box(depth_map, x, y, w, h)
                labels_with_depth.append([category_num, x, y, w, h, depth])
            output_labels = labels_with_depth

        if self.transform:
            image = self.transform(image)
        output_labels = torch.tensor(output_labels, dtype=torch.float32)
        return image, output_labels

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        padding = (kernel_size - 1) // 2
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_cat = torch.cat([avg_out, max_out], dim=1)
        x_out = self.conv1(x_cat)
        attention_map = self.sigmoid(x_out)
        return x * attention_map

class EncoderBackBone(nn.Module):
    def __init__(self,İsPretreained=True):
        super(EncoderBackBone,self).__init__()
        efficient = models.efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1)
        self.features = efficient.features
        self.SAttention = SpatialAttention()

    def forward(self, x):         # B,C,H,W

        outs = []

        # Her iki frame için özellikler
        for i, block in enumerate(self.features):
            x = block(x)
            if i > 2:  # C3'ten sonrası için Spatial Attention
                x = x * self.SAttention(x)
            if i in [3,5,7]:
                out = F.interpolate(x, size=256, mode='bilinear', align_corners=False)
                outs.append(out)
        return outs

class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size,
                                 stride, padding, groups=in_channels, bias=False)
        self.pointwise = nn.Conv2d(in_channels, out_channels, 1, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.swish = nn.SiLU()

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        x = self.bn(x)
        return self.swish(x)

class BiFPNBlock(nn.Module):
    def __init__(self, channels, epsilon=1e-4):
        super(BiFPNBlock, self).__init__()
        self.epsilon = epsilon
        self.channels = channels

        # Convolution layers for each level
        self.conv_p3 = DepthwiseSeparableConv(channels, channels)
        self.conv_p4 = DepthwiseSeparableConv(channels, channels)
        self.conv_p5 = DepthwiseSeparableConv(channels, channels)
        self.conv_p6 = DepthwiseSeparableConv(channels, channels)
        self.conv_p7 = DepthwiseSeparableConv(channels, channels)

        # Weight parameters for feature fusion
        self.w1 = nn.Parameter(torch.ones(2))
        self.w2 = nn.Parameter(torch.ones(2))
        self.w3 = nn.Parameter(torch.ones(2))
        self.w4 = nn.Parameter(torch.ones(2))
        self.w5 = nn.Parameter(torch.ones(3))
        self.w6 = nn.Parameter(torch.ones(3))
        self.w7 = nn.Parameter(torch.ones(3))
        self.w8 = nn.Parameter(torch.ones(2))

    def forward(self, inputs):
        P3, P4, P5, P6, P7 = inputs

        # Bottom-up pathway
        w1 = F.relu(self.w1)
        P6_td = (w1[0] * P6 + w1[1] * self.up_sampling(P7, P6.shape[-2:])) / (w1.sum() + self.epsilon)
        P6_td = self.conv_p6(P6_td)

        w2 = F.relu(self.w2)
        P5_td = (w2[0] * P5 + w2[1] * self.up_sampling(P6_td, P5.shape[-2:])) / (w2.sum() + self.epsilon)
        P5_td = self.conv_p5(P5_td)

        w3 = F.relu(self.w3)
        P4_td = (w3[0] * P4 + w3[1] * self.up_sampling(P5_td, P4.shape[-2:])) / (w3.sum() + self.epsilon)
        P4_td = self.conv_p4(P4_td)

        # Top-down pathway
        w4 = F.relu(self.w4)
        P3_out = (w4[0] * P3 + w4[1] * self.up_sampling(P4_td, P3.shape[-2:])) / (w4.sum() + self.epsilon)
        P3_out = self.conv_p3(P3_out)

        w5 = F.relu(self.w5)
        P4_out = (w5[0] * P4 + w5[1] * P4_td + w5[2] * self.down_sampling(P3_out, P4.shape[-2:])) / (w5.sum() + self.epsilon)
        P4_out = self.conv_p4(P4_out)

        w6 = F.relu(self.w6)
        P5_out = (w6[0] * P5 + w6[1] * P5_td + w6[2] * self.down_sampling(P4_out, P5.shape[-2:])) / (w6.sum() + self.epsilon)
        P5_out = self.conv_p5(P5_out)

        w7 = F.relu(self.w7)
        P6_out = (w7[0] * P6 + w7[1] * P6_td + w7[2] * self.down_sampling(P5_out, P6.shape[-2:])) / (w7.sum() + self.epsilon)
        P6_out = self.conv_p6(P6_out)

        w8 = F.relu(self.w8)
        P7_out = (w8[0] * P7 + w8[1] * self.down_sampling(P6_out, P7.shape[-2:])) / (w8.sum() + self.epsilon)
        P7_out = self.conv_p7(P7_out)

        return [P3_out, P4_out, P5_out, P6_out, P7_out]

    def up_sampling(self, x, target_size):
        return F.interpolate(x, size=target_size, mode='nearest')

    def down_sampling(self, x, target_size):
        if x.shape[-2:] == target_size:
            return x
        stride = x.shape[-1] // target_size[-1]
        kernel_size = stride
        return F.max_pool2d(x, kernel_size=kernel_size, stride=stride)

class BiFPN(nn.Module):
    def __init__(self, in_channels_list, out_channels=256, num_blocks=3):
        super(BiFPN, self).__init__()
        self.out_channels = out_channels
        self.num_blocks = num_blocks

        # Input projection layers
        self.input_convs = nn.ModuleList([
            nn.Conv2d(in_ch, out_channels, 1, bias=False)
            for in_ch in in_channels_list
        ])

        # Additional P6 and P7 layers
        self.p6_conv = nn.Conv2d(in_channels_list[-1], out_channels, 3, stride=2, padding=1)
        self.p7_conv = nn.Conv2d(out_channels, out_channels, 3, stride=2, padding=1)

        # BiFPN blocks
        self.bifpn_blocks = nn.ModuleList([
            BiFPNBlock(out_channels) for _ in range(num_blocks)
        ])

    def forward(self, inputs):
        # Project input features
        features = []
        for i, feat in enumerate(inputs):
            features.append(self.input_convs[i](feat))

        # Create P6 and P7
        P6 = self.p6_conv(inputs[-1])
        P7 = self.p7_conv(P6)

        # Initial feature list
        pyramid_features = features + [P6, P7]

        # Apply BiFPN blocks
        for block in self.bifpn_blocks:
            pyramid_features = block(pyramid_features)

        return pyramid_features

class NNConv3UpBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)

    def forward(self, x):
        x = self.conv(x)
        x = F.interpolate(x, scale_factor=2, mode='nearest')
        return x

class FusionBlock(nn.Module):
    def __init__(self, fusion_type='add'):
        super().__init__()
        self.fusion_type = fusion_type

    def forward(self, high_level, low_level):
        if self.fusion_type == 'add':
            return high_level + low_level
        elif self.fusion_type == 'concat':
            return torch.cat([high_level, low_level], dim=1)

class PredictionDecoder(nn.Module):
    def __init__(self, in_channels, out_channels=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, in_channels//2, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels//2, out_channels, kernel_size=3, padding=1)
        self.leaky_relu = nn.LeakyReLU(0.1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.leaky_relu(self.conv1(x))
        x = self.sigmoid(self.conv2(x))
        return x

class RTMonoDepthDecoder(nn.Module):

    def __init__(self, encoder_channels=[48, 136, 384], decoder_channels=[256, 128, 64, 32]):
        super().__init__()

        # Upsampling blocks
        self.upconv2 = NNConv3UpBlock(encoder_channels[2], decoder_channels[0])  # F3 -> D2
        self.upconv1 = NNConv3UpBlock(decoder_channels[0], decoder_channels[1])  # After fusion -> D1
        self.upconv0 = NNConv3UpBlock(decoder_channels[1], decoder_channels[2])  # After fusion -> D0

        # Projection layers to match dimensions for fusion
        self.proj2 = nn.Conv2d(encoder_channels[1], decoder_channels[0], 1)  # F2 -> D2 channels
        self.proj1 = nn.Conv2d(encoder_channels[0], decoder_channels[1], 1)  # F1 -> D1 channels

        # Fusion blocks
        self.fusion1 = FusionBlock('add')
        self.fusion0 = FusionBlock('concat')

        # Prediction decoders at each scale
        self.decoder2 = PredictionDecoder(decoder_channels[0])
        self.decoder1 = PredictionDecoder(decoder_channels[1])
        # After concat: up1_resized (128) + f1_proj (128) = 256 channels
        self.decoder0 = PredictionDecoder(decoder_channels[1] + decoder_channels[1])

    def forward(self, features, inference_mode=False):
        f1, f2, f3 = features  # [low_res -> high_res]
        depth_maps = {}

        # Level 2: Start from highest level feature
        up2 = self.upconv2(f3)
        if not inference_mode:
            depth_maps['depth_2'] = self.decoder2(up2)

        # Level 1: Project F2 to match up2 channels and fuse
        f2_proj = self.proj2(f2)
        # Resize up2 to match f2 spatial dimensions
        up2_resized = F.interpolate(up2, size=f2_proj.shape[-2:], mode='bilinear', align_corners=False)
        fused1 = self.fusion1(up2_resized, f2_proj)
        up1 = self.upconv1(fused1)
        if not inference_mode:
            depth_maps['depth_1'] = self.decoder1(up1)

        # Level 0: Project F1 to match up1 channels and fuse
        f1_proj = self.proj1(f1)
        # Resize up1 to match f1 spatial dimensions
        up1_resized = F.interpolate(up1, size=f1_proj.shape[-2:], mode='bilinear', align_corners=False)
        fused0 = self.fusion0(up1_resized, f1_proj)
        depth_maps['depth_0'] = self.decoder0(fused0)

        return depth_maps

class DepthHead2(nn.Module):
    def __init__(self, in_channels=256, out_channels=1):
        super(DepthHead2, self).__init__()
        # small refinement conv stack
        self.refine = nn.Sequential(
            nn.Conv2d(in_channels * 3, in_channels, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, in_channels // 2, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels // 2, out_channels, 1),
            nn.Sigmoid()
        )

    def forward(self, depth_features):
        # Accept either dict from RTMonoDepthDecoder or list/tuple
        if isinstance(depth_features, dict):
            # prefer 'depth_0' as highest res; upsample others to its size
            keys = ['depth_0', 'depth_1', 'depth_2']
            maps = []
            # find first existing key for target size
            target = None
            for k in keys:
                if k in depth_features:
                    target = depth_features[k].shape[2:]
                    break
            if target is None:
                raise ValueError("depth_features dict empty or unexpected keys")
            for k in keys:
                if k in depth_features:
                    m = depth_features[k]
                    if m.shape[2:] != target:
                        m = F.interpolate(m, size=target, mode='bilinear', align_corners=False)
                    maps.append(m)
            # if less than 3 maps, duplicate last to keep consistent channels
            while len(maps) < 3:
                maps.append(maps[-1])
        else:
            # assume iterable: take first 3 or duplicate if fewer
            maps = list(depth_features)
            while len(maps) < 3:
                maps.append(maps[-1])
            # upsample to first map's size
            target = maps[0].shape[2:]
            maps = [m if m.shape[2:] == target else F.interpolate(m, size=target, mode='bilinear', align_corners=False) for m in maps[:3]]

        # concat along channels and refine
        concat = torch.cat(maps, dim=1)  # C= sum of channels
        out = self.refine(concat)
        return out  # [B,1,H,W] sigmoid-normalized

class DepthHead(nn.Module):
    def __init__(self, in_channels=1):  # Change to 1 to match input depth maps
        super(DepthHead, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, 1),
            nn.Sigmoid()
        )
        self.weights = nn.Parameter(torch.ones(3))  # Adjust to 3 for the number of features (depth_2, depth_1, depth_0); previously 5, which may cause issues in zip()

    def weighted_fusion(self, features, weights, target_size):
        weights = F.softmax(weights, dim=0)
        fused = None
        for feat, weight in zip(features, weights): # features liste halinde : depthmap2 B,1,512,512 depthmap1 B,1,512,512 depthmap0 B,1,256,256
            if feat.shape[2:] != target_size:
                feat = F.interpolate(feat, size=target_size, mode='bilinear', align_corners=False)
            if fused is None:
                fused = weight * feat
            else:
                fused += weight * feat
        return fused

    def forward(self, features):
        processed = [self.conv(feat) for feat in features]
        target_size = processed[0].shape[2:]  # Or use processed[-1].shape[2:] for higher resolution (e.g., 512x512) if preferred
        return self.weighted_fusion(processed, self.weights, target_size)

class DetectionHead(nn.Module):
    def __init__(self, in_channels=256, num_anchors=3, num_classes=8):
        super(DetectionHead, self).__init__()
        self.num_anchors = num_anchors
        self.num_classes = num_classes

        # Classification head: her anchor için sınıf olasılıkları (sigmoid / softmax)
        self.cls_conv = nn.Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, padding=1)

        # Regression head: her anchor için bbox 4 koordinatı
        self.reg_conv = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, padding=1)

        # istersen headlerde BatchNorm + Activation koyabilirsin

    def forward(self, features):
        """
        features: list of tensor, her biri [B, C, H, W]
        returns:
            cls_preds: [B, total_anchors, num_classes]
            reg_preds: [B, total_anchors, 4]
        """
        cls_outputs = []
        reg_outputs = []

        for feat in features:
            # feat: [B, C, H, W]
            cls_out = self.cls_conv(feat)  # [B, A*C, H, W]
            reg_out = self.reg_conv(feat)  # [B, A*4, H, W]

            B, _, H, W = cls_out.shape

            # reshape: (B, A, C, H, W) → (B, H*W*A, C)
            cls_out = cls_out.view(B, self.num_anchors, self.num_classes, H, W)
            cls_out = cls_out.permute(0, 3, 4, 1, 2).contiguous()  # B, H, W, A, C
            cls_out = cls_out.view(B, -1, self.num_classes)         # B, (H*W*A), C

            # regression benzer şekilde (B, A, 4, H, W) → (B, H*W*A, 4)
            reg_out = reg_out.view(B, self.num_anchors, 4, H, W)
            reg_out = reg_out.permute(0, 3, 4, 1, 2).contiguous()  # B, H, W, A, 4
            reg_out = reg_out.view(B, -1, 4)                       # B, (H*W*A), 4

            cls_outputs.append(cls_out)
            reg_outputs.append(reg_out)

        # Tüm seviyeleri birleştir
        cls_preds = torch.cat(cls_outputs, dim=1)  # B, total_anchors, num_classes
        reg_preds = torch.cat(reg_outputs, dim=1)  # B, total_anchors, 4

        return cls_preds, reg_preds

class MultiTaskHeads(nn.Module):
    def __init__(self, num_classes=10, in_channels=256, num_anchors=3):
        super(MultiTaskHeads, self).__init__()
        # Depth heads unchanged (they expect depth_maps dict/list)
        self.depth_head1 = DepthHead(in_channels=1)   # train fusion from depth_decoder outputs
        self.depth_head2 = DepthHead2(in_channels=1)  # inference refinement
        self.detection_head = DetectionHead(num_anchors=num_anchors)

    def forward(self, bifpn_features, depth_features, inference_mode=False):
        """
        bifpn_features: list [P3,P4,P5,P6,P7] each [B, C, H, W]
        depth_features: dict from RTMonoDepthDecoder (depth_0,1,2)
        returns:
           classification: list(len=5) of [B, num_anchors*num_classes, H, W]
           regression:     list(len=5) of [B, num_anchors*4, H, W]
           depth: [B,1,Hd,Wd]
        """
        # Depth: use decoder outputs (pixel-wise). Use depth_head1 in train, depth_head2 in inference
        depth_list = [depth_features['depth_2'], depth_features['depth_1'], depth_features['depth_0']]
        if inference_mode:
            depth = self.depth_head2(depth_list)
        else:
            depth = self.depth_head1(depth_list)

        cls_preds, reg_preds = self.detection_head(bifpn_features)

        return {
            'depth': depth,
            'classification': cls_preds,
            'regression': reg_preds
        }

class PostProcessor:
    """Model çıktısını loss fonksiyonu için hazırlar"""

    def __init__(self, num_classes=8, num_anchors=3, strides=[8, 16, 32, 64, 128]):
        self.num_classes = num_classes
        self.num_anchors = num_anchors
        self.strides = strides

    def generate_anchors_for_level(self, feat, stride, sizes=None, ratios=None):
        """
        feat: [B, C, H, W] -> generate anchors at each cell center (in input image coords)
        returns: anchors tensor [H*W*num_anchors, 4] in (x1,y1,x2,y2) pixel coords
        """
        device = feat.device
        H, W = feat.shape[2], feat.shape[3]
        if sizes is None:
            sizes = [stride, stride * 1.6, stride * 2.6]  # example scales for each anchor
        if ratios is None:
            ratios = [0.5, 1.0, 2.0]

        # Build center grid in input-image coordinates
        shift_x = (torch.arange(0, W, device=device) + 0.5) * stride
        shift_y = (torch.arange(0, H, device=device) + 0.5) * stride
        shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij')
        centers = torch.stack((shift_x.reshape(-1), shift_y.reshape(-1)), dim=1)  # [H*W, 2]

        anchors = []
        for size in sizes:
            for r in ratios:
                w = size * (r ** 0.5)
                h = size / (r ** 0.5)
                # x1,y1,x2,y2
                xy1 = centers - 0.5 * torch.tensor([w, h], device=device)
                xy2 = centers + 0.5 * torch.tensor([w, h], device=device)
                anchors.append(torch.cat([xy1, xy2], dim=1))  # [H*W,4]
        anchors = torch.cat(anchors, dim=0)  # [H*W*num_anchors, 4]
        return anchors

    def process_predictions(self, raw_preds, bifpn_features):
        """
        Raw predictions'ı format'a göre işler
        """
        cls_preds = raw_preds['classification']
        reg_preds = raw_preds['regression']

        # Check format and process accordingly
        if isinstance(cls_preds, list):
            # List of feature maps - normal case
            cls_flat, reg_flat = self.flatten_predictions(cls_preds, reg_preds)
        elif isinstance(cls_preds, torch.Tensor):
            # Already flattened
            cls_flat = cls_preds
            reg_flat = reg_preds
        else:
            raise ValueError(f"Unexpected prediction format: {type(cls_preds)}")

        # Generate anchors
        anchors_all = self.generate_all_anchors(bifpn_features)

        return cls_flat, reg_flat, anchors_all
        """
        cls_list: list of [B, num_anchors*num_classes, H, W]
        reg_list: list of [B, num_anchors*4, H, W]
        return: flattened tensors
        """
        # Debug: Check shapes first

        B = cls_list[0].shape[0]
        cls_all = []
        reg_all = []

        for i, (cls_map, reg_map) in enumerate(zip(cls_list, reg_list)):

            # Handle different input formats
            if cls_map.dim() == 4:
                # Expected format: [B, num_anchors*num_classes, H, W]
                _, C, H, W = cls_map.shape
                cls_map = cls_map.view(B, self.num_anchors, self.num_classes, H, W)
                cls_map = cls_map.permute(0, 3, 4, 1, 2).reshape(B, -1, self.num_classes)

                reg_map = reg_map.view(B, self.num_anchors, 4, H, W)
                reg_map = reg_map.permute(0, 3, 4, 1, 2).reshape(B, -1, 4)

            elif cls_map.dim() == 2:
                # Already flattened format: [B, features]
                # Try to reshape if possible
                if cls_map.shape[1] % (self.num_anchors * self.num_classes) == 0:
                    n_positions = cls_map.shape[1] // (self.num_anchors * self.num_classes)
                    cls_map = cls_map.view(B, n_positions, self.num_anchors, self.num_classes)
                    cls_map = cls_map.view(B, -1, self.num_classes)
                else:
                    raise ValueError(f"Cannot reshape cls_map with shape {cls_map.shape}")

                if reg_map.shape[1] % (self.num_anchors * 4) == 0:
                    n_positions = reg_map.shape[1] // (self.num_anchors * 4)
                    reg_map = reg_map.view(B, n_positions, self.num_anchors, 4)
                    reg_map = reg_map.view(B, -1, 4)
                else:
                    raise ValueError(f"Cannot reshape reg_map with shape {reg_map.shape}")

            else:
                raise ValueError(f"Unexpected cls_map dimensions: {cls_map.dim()}")

            cls_all.append(cls_map)
            reg_all.append(reg_map)

        cls_flat = torch.cat(cls_all, dim=1)  # [B, N_total, num_classes]
        reg_flat = torch.cat(reg_all, dim=1)  # [B, N_total, 4]

        return cls_flat, reg_flat

    def generate_all_anchors(self, bifpn_features):
        """Tüm feature map'ler için anchor'ları generate eder"""
        anchors_per_level = []
        for feat, stride in zip(bifpn_features, self.strides):
            anchors = self.generate_anchors_for_level(feat, stride)
            anchors_per_level.append(anchors)

        all_anchors = torch.cat(anchors_per_level, dim=0)  # [N_total, 4]
        return all_anchors

class CompleteMultiTaskModel(nn.Module):
    def __init__(self, İsPretreained=True, num_classes=8, bifpn_channels=256, bifpn_blocks=3,
                 confidence_threshold=0.5, max_detections=100, num_anchors=3):
        super(CompleteMultiTaskModel, self).__init__()

        # Model components
        self.encoder = EncoderBackBone(İsPretreained)
        in_channels_list = [48, 136, 384]
        self.bifpn = BiFPN(in_channels_list, bifpn_channels, bifpn_blocks)
        self.depth_decoder = RTMonoDepthDecoder()
        self.multi_head = MultiTaskHeads(num_classes, bifpn_channels, num_anchors)

        # Model parameters
        self.num_classes = num_classes
        self.conf_thresh = confidence_threshold
        self.max_detections = max_detections
        self.num_anchors = num_anchors
        self.strides = [8, 16, 32, 64, 128]

        # Post processor
        self.post_processor = PostProcessor(num_classes, num_anchors, self.strides)
        self._set_bn_momentum(0.01)

    def _set_bn_momentum(self, momentum):
        for module in self.modules():
            if isinstance(module, torch.nn.BatchNorm2d):
                module.momentum = momentum
    def forward(self, images, targets=None, mode="train"):
        """
        mode: "train" -> return loss-ready format
              "inference" -> return postprocessed results
        """
        B = images.shape[0]

        # Forward pass through model
        backbone_features = self.encoder(images)
        bifpn_features = self.bifpn(backbone_features)
        inference_mode = (mode != "train")
        depth_maps = self.depth_decoder(backbone_features, inference_mode=inference_mode)
        raw_preds = self.multi_head(bifpn_features, depth_maps, inference_mode)

        # Extract predictions with debug
        cls_list = raw_preds['classification']
        reg_list = raw_preds['regression']
        depth_pred = raw_preds['depth']



        if mode == "train":

            # Direct use - no flattening needed
            cls_flat = cls_list  # [B, N_total, num_classes]
            reg_flat = reg_list  # [B, N_total, 4]
            anchors_all = self.post_processor.generate_all_anchors(bifpn_features)

            return {
                "cls_preds": cls_flat,      # [B, N_total, num_classes]
                "reg_preds": reg_flat,      # [B, N_total, 4]
                "anchors": anchors_all,     # [N_total, 4]
                "depth_pred": depth_pred,   # [B, 1, H, W]
                "targets": targets
            }

        elif mode == "inference":
            # Inference mode - direct use of flattened predictions
            cls_flat = cls_list  # [B, N_total, num_classes]
            reg_flat = reg_list  # [B, N_total, 4]
            anchors_all = self.post_processor.generate_all_anchors(bifpn_features)

            return self._inference_postprocess(cls_flat, reg_flat, depth_pred, anchors_all, images)

    def _inference_postprocess(self, cls_flat, reg_flat, depth_pred, anchors_all, images):
        """Inference için post-processing"""
        B = images.shape[0]

        # Decode boxes
        boxes = self.decode_boxes(anchors_all, reg_flat)  # [B, N, 4]
        scores = torch.softmax(cls_flat, dim=-1)
        max_scores, labels = torch.max(scores, dim=-1)  # [B, N]

        results = []
        for b in range(B):
            score_b = max_scores[b]
            label_b = labels[b]
            box_b = boxes[b]

            keep_mask = score_b > self.conf_thresh
            if keep_mask.sum() == 0:
                results.append({
                    "boxes": torch.zeros((0, 4), device=box_b.device),
                    "scores": torch.zeros((0,), device=box_b.device),
                    "labels": torch.zeros((0,), device=box_b.device),
                    "depth": F.interpolate(depth_pred[b:b+1], size=images.shape[2:],
                                         mode='bilinear', align_corners=False)
                })
                continue

            boxes_k = box_b[keep_mask]
            scores_k = score_b[keep_mask]
            labels_k = label_b[keep_mask]

            keep_idx = nms(boxes_k, scores_k, iou_threshold=0.5)
            keep_idx = keep_idx[:self.max_detections]

            results.append({
                "boxes": boxes_k[keep_idx],
                "scores": scores_k[keep_idx],
                "labels": labels_k[keep_idx],
                "depth": F.interpolate(depth_pred[b:b+1], size=images.shape[2:],
                                     mode='bilinear', align_corners=False)
            })

        return results

    def decode_boxes(self, anchors, deltas):
        """Decode bounding boxes from anchors and deltas"""
        widths = anchors[:, 2] - anchors[:, 0]
        heights = anchors[:, 3] - anchors[:, 1]
        ctr_x = anchors[:, 0] + 0.5 * widths
        ctr_y = anchors[:, 1] + 0.5 * heights

        dx = deltas[..., 0]
        dy = deltas[..., 1]
        dw = deltas[..., 2]
        dh = deltas[..., 3]

        pred_ctr_x = dx * widths.unsqueeze(0) + ctr_x.unsqueeze(0)
        pred_ctr_y = dy * heights.unsqueeze(0) + ctr_y.unsqueeze(0)
        pred_w = torch.exp(dw) * widths.unsqueeze(0)
        pred_h = torch.exp(dh) * heights.unsqueeze(0)

        x1 = pred_ctr_x - 0.5 * pred_w
        y1 = pred_ctr_y - 0.5 * pred_h
        x2 = pred_ctr_x + 0.5 * pred_w
        y2 = pred_ctr_y + 0.5 * pred_h
        return torch.stack((x1, y1, x2, y2), dim=-1)

def calculate_iou(boxes1, boxes2):
    """
    boxes1: [N, 4]  (x1, y1, x2, y2)
    boxes2: [M, 4]  (x1, y1, x2, y2)

    Returns:
        iou: [N, M] IoU matrisi
    """

    # boxes1 ve boxes2 için alan hesapla
    area1 = (boxes1[:, 2] - boxes1[:, 0]).clamp(min=0) * (boxes1[:, 3] - boxes1[:, 1]).clamp(min=0)  # [N]
    area2 = (boxes2[:, 2] - boxes2[:, 0]).clamp(min=0) * (boxes2[:, 3] - boxes2[:, 1]).clamp(min=0)  # [M]

    # intersection (kesişim) köşe koordinatları
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N, M, 2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N, M, 2]

    wh = (rb - lt).clamp(min=0)  # [N, M, 2], genişlik ve yükseklik
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N, M] alan kesişimi

    union = area1[:, None] + area2 - inter  # [N, M]

    iou = inter / union.clamp(min=1e-6)  # bölme sıfır olmasın diye clamp

    return iou

def normalize_anchors(anchors, img_h, img_w):
    anchors_norm = anchors.clone()
    anchors_norm[:, [0, 2]] /= img_w  # x1, x2
    anchors_norm[:, [1, 3]] /= img_h  # y1, y2
    return anchors_norm

def decode_boxes(anchors, deltas):
      widths = anchors[:, 2] - anchors[:, 0]
      heights = anchors[:, 3] - anchors[:, 1]
      ctr_x = anchors[:, 0] + 0.5 * widths
      ctr_y = anchors[:, 1] + 0.5 * heights

      dx = deltas[:, 0]
      dy = deltas[:, 1]
      dw = deltas[:, 2]
      dh = deltas[:, 3]

      pred_ctr_x = dx * widths + ctr_x
      pred_ctr_y = dy * heights + ctr_y
      pred_w = torch.exp(dw) * widths
      pred_h = torch.exp(dh) * heights

      pred_boxes = torch.zeros_like(deltas)
      pred_boxes[:, 0] = pred_ctr_x - 0.5 * pred_w
      pred_boxes[:, 1] = pred_ctr_y - 0.5 * pred_h
      pred_boxes[:, 2] = pred_ctr_x + 0.5 * pred_w
      pred_boxes[:, 3] = pred_ctr_y + 0.5 * pred_h

      return pred_boxes

class MultiTaskCriterion(nn.Module):
    """Multi-task learning loss function - GPU Memory Fixed"""

    def __init__(self, num_classes=8, loss_weights=None, device='cuda',
                 pos_iou_threshold=0.5, neg_iou_threshold=0.4,img_size=256):
        super(MultiTaskCriterion, self).__init__()
        self.device = device
        self.img_size = img_size
        self.num_classes = num_classes

        # Loss weights
        self.loss_weights = loss_weights if loss_weights else {
            'classification': 1.0,
            'regression': 1.0,
            'depth': 1.0,
            'depth_map': 0.1
        }

        # Loss functions
        self.cls_criterion = nn.CrossEntropyLoss(reduction='none')
        self.reg_criterion = nn.SmoothL1Loss(reduction='none')
        self.depth_criterion = nn.MSELoss(reduction='none')

        # IoU thresholds
        self.pos_iou_threshold = pos_iou_threshold
        self.neg_iou_threshold = neg_iou_threshold

    def forward(self, model_output):
        """
        GPU Memory Optimized Forward Pass
        """
        cls_preds = model_output['cls_preds']
        reg_preds = model_output['reg_preds']
        anchors = model_output['anchors']
        depth_pred = model_output['depth_pred']
        targets = model_output['targets']
        batch_size = cls_preds.size(0)

        # Initialize losses - sadece scalar accumulation
        total_loss = 0.0
        cls_loss_sum = 0.0
        reg_loss_sum = 0.0
        depth_loss_sum = 0.0
        depth_map_loss_val = 0.0

        # Initialize metrics - sadece scalar accumulation
        accuracy_sum = 0.0
        f1_sum = 0.0
        mse_sum = 0.0
        rmse_sum = 0.0
        map_sum = 0.0
        valid_samples = 0

        for batch_idx in range(batch_size):
          # Training'de targets list, validation'da dict
          if isinstance(targets, dict):
              target_key = batch_idx
              if target_key not in targets or len(targets[target_key]) == 0:
                  continue
              target = targets[target_key].to(self.device)
          else:
              # Normal list case (training)
              if len(targets) <= batch_idx or len(targets[batch_idx]) == 0:
                  continue
              target = targets[batch_idx].to(self.device)

          target = targets[batch_idx].to(self.device)
          gt_classes = target[:, 0].long()
          gt_boxes = target[:, 1:5]
          gt_depths = target[:, 5] if target.size(1) > 5 else None
          # Get predictions for this batch item
          batch_cls_preds = cls_preds[batch_idx]
          batch_reg_preds = reg_preds[batch_idx]
          batch_depth_preds = depth_pred[batch_idx] if depth_pred is not None else None
          anchors = normalize_anchors(anchors, self.img_size, self.img_size)
          anchors_norm = anchors.clone()
          pred_boxes = decode_boxes(anchors, reg_preds[batch_idx])
          # Assign targets to anchors
          pos_indices, neg_indices, matched_gt_indices = self._assign_targets_to_anchors(
              anchors, gt_boxes, gt_classes
          )
          # Classification Loss + Metrics
          if len(pos_indices) > 0:
              cls_loss, cls_acc, cls_f1 = self._compute_classification_loss(
                  batch_cls_preds, gt_classes, pos_indices, neg_indices, matched_gt_indices
              )
              if cls_loss is not None:
                  total_loss += cls_loss * self.loss_weights['classification']
                  cls_loss_sum += cls_loss.detach().item()  # DÜZELTME 1: detach().item()
                  accuracy_sum += cls_acc  # DÜZELTME 7: append yerine sum
                  f1_sum += cls_f1  # DÜZELTME 7: append yerine sum

          # Regression Loss
          if len(pos_indices) > 0:
              reg_loss = self._compute_regression_loss(
                  batch_reg_preds, gt_boxes, anchors, pos_indices, matched_gt_indices
              )
              if reg_loss is not None:
                  total_loss += reg_loss * self.loss_weights['regression']
                  reg_loss_sum += reg_loss.detach().item()  # DÜZELTME 1: detach().item()

          # Depth Loss + Metrics
          if gt_depths is not None and batch_depth_preds is not None:
              depth_loss, depth_mse, depth_rmse = self._compute_depth_loss(
                  batch_depth_preds, gt_depths, gt_boxes
              )
              if depth_loss is not None:
                  total_loss += depth_loss * self.loss_weights['depth']
                  depth_loss_sum += depth_loss.detach().item()  # DÜZELTME 1: detach().item()
                  mse_sum += depth_mse  # DÜZELTME 7: append yerine sum
                  rmse_sum += depth_rmse  # DÜZELTME 7: append yerine sum

          # mAP calculation
          try:
              map_score = self._compute_simple_map(
                  batch_cls_preds.detach(), gt_classes, pos_indices, matched_gt_indices  # DÜZELTME 6: detach
              )
              map_sum += map_score  # DÜZELTME 7: append yerine sum
          except Exception:
              map_sum += 0.0

          valid_samples += 1

        # Depth map smoothness loss
        if depth_pred is not None:
            depth_map_loss = self._compute_depth_smoothness_loss(depth_pred)
            total_loss += depth_map_loss * self.loss_weights['depth_map']
            depth_map_loss_val = depth_map_loss.detach().item()

        # DÜZELTME 2: torch.stack kullanmak yerine ortalama hesapla
        losses = {
            'total': total_loss,
            'classification': cls_loss_sum / valid_samples if valid_samples > 0 else 0.0,
            'regression': reg_loss_sum / valid_samples if valid_samples > 0 else 0.0,
            'depth': depth_loss_sum / valid_samples if valid_samples > 0 else 0.0,
            'depth_map': depth_map_loss_val
        }

        metrics = {
            'Accuracy': accuracy_sum / valid_samples if valid_samples > 0 else 0.0,
            'F1_score': f1_sum / valid_samples if valid_samples > 0 else 0.0,
            'MSE': mse_sum / valid_samples if valid_samples > 0 else 0.0,
            'RMSE': rmse_sum / valid_samples if valid_samples > 0 else 0.0,
            'mAP': map_sum / valid_samples if valid_samples > 0 else 0.0,
            'TotalLoss': total_loss.detach().item() if isinstance(total_loss, torch.Tensor) else total_loss,
            'ClsLoss': cls_loss_sum / valid_samples if valid_samples > 0 else 0.0
        }

        return losses, metrics

    # Diğer metodlar aynı kalıyor...
    def _assign_targets_to_anchors(self, anchors, gt_boxes, gt_classes):
        """Assign ground truth to anchors based on IoU"""
        if len(gt_boxes) == 0:
            return torch.tensor([], dtype=torch.long, device=self.device), \
                   torch.tensor([], dtype=torch.long, device=self.device), \
                   torch.tensor([], dtype=torch.long, device=self.device)

        # Compute IoU matrix
        ious = self.bbox_iou(anchors, gt_boxes)  # [N_total, N_gt]

        # Find best GT for each anchor
        max_ious, matched_gt_indices = torch.max(ious, dim=1)  # [N_total]

        # Positive and negative indices
        pos_mask = max_ious >= self.pos_iou_threshold
        neg_mask = max_ious < self.neg_iou_threshold

        pos_indices = torch.where(pos_mask)[0]
        neg_indices = torch.where(neg_mask)[0]

        matched_gt_indices = matched_gt_indices[pos_indices]

        return pos_indices, neg_indices, matched_gt_indices

    def _compute_classification_loss(self, cls_preds, gt_classes, pos_indices, neg_indices, matched_gt_indices):
        """Compute focal loss for classification + return metrics"""
        if len(pos_indices) == 0:
            return None, 0.0, 0.0

        # Positive loss
        pos_cls_preds = cls_preds[pos_indices]
        pos_gt_classes = gt_classes[matched_gt_indices]
        pos_loss = self.focal_loss(pos_cls_preds, pos_gt_classes)

        # Negative loss (sınırlı negatif örnek al)
        if len(neg_indices) > 0:
            # Negatif örnek sayısını pozitif örnek sayısının 3 katı ile sınırla
            max_neg_samples = min(len(neg_indices), len(pos_indices) * 3)

            # Confidence skoruna göre en zor negatif örnekleri seç (hard negative mining)
            neg_cls_preds = cls_preds[neg_indices]
            neg_scores = torch.max(torch.softmax(neg_cls_preds, dim=1), dim=1)[0]
            _, hard_neg_indices = torch.topk(neg_scores, max_neg_samples, largest=True)

            selected_neg_indices = neg_indices[hard_neg_indices]
            selected_neg_cls_preds = cls_preds[selected_neg_indices]
            neg_gt_classes = torch.zeros(len(selected_neg_indices), dtype=torch.long, device=self.device)
            neg_loss = self.focal_loss(selected_neg_cls_preds, neg_gt_classes)

            total_cls_loss = pos_loss + neg_loss
        else:
            total_cls_loss = pos_loss

        # Compute metrics
        with torch.no_grad():
            pred_classes = torch.argmax(pos_cls_preds, dim=1)
            accuracy = accuracy_score(pos_gt_classes.cpu().numpy(), pred_classes.cpu().numpy())
            f1 = f1_score(pos_gt_classes.cpu().numpy(), pred_classes.cpu().numpy(),
                         average='weighted', zero_division=0)

        return total_cls_loss, accuracy, f1

    def _compute_regression_loss(self, reg_preds, gt_boxes, anchors, pos_indices, matched_gt_indices):
        """Compute smooth L1 loss for bounding box regression"""
        if len(pos_indices) == 0:
            return None

        pos_anchors = anchors[pos_indices]
        pos_reg_preds = reg_preds[pos_indices]
        pos_gt_boxes = gt_boxes[matched_gt_indices]

        # Encode ground truth boxes relative to anchors
        pos_gt_encoded = self._encode_boxes(pos_anchors, pos_gt_boxes)

        # Smooth L1 loss
        reg_loss = self.reg_criterion(pos_reg_preds, pos_gt_encoded).mean()

        return reg_loss

    def _compute_depth_loss(self, depth_pred, gt_depths, gt_boxes):
        """Compute depth estimation loss + return MSE, RMSE metrics"""
        if len(gt_depths) == 0:
            return None, 0.0, 0.0

        if depth_pred.dim() == 3:
            depth_map = depth_pred[0]  # [H, W]
        else:
            depth_map = depth_pred

        H, W = depth_map.shape

        # Sample depth values from GT boxes
        sampled_depths = []
        target_depths = []

        for gt_box, gt_depth in zip(gt_boxes, gt_depths):
            x1, y1, x2, y2 = gt_box

            # Convert to pixel coordinates
            x1_pix = int(x1 * W)
            y1_pix = int(y1 * H)
            x2_pix = int(x2 * W)
            y2_pix = int(y2 * H)

            # Clamp to bounds
            x1_pix = max(0, min(x1_pix, W-1))
            y1_pix = max(0, min(y1_pix, H-1))
            x2_pix = max(0, min(x2_pix, W-1))
            y2_pix = max(0, min(y2_pix, H-1))

            # Sample from center
            center_y = (y1_pix + y2_pix) // 2
            center_x = (x1_pix + x2_pix) // 2

            sampled_depth = depth_map[center_y, center_x]
            sampled_depths.append(sampled_depth)
            target_depths.append(gt_depth)

        if len(sampled_depths) == 0:
            return None, 0.0, 0.0

        sampled_depths = torch.stack(sampled_depths)
        target_depths = torch.stack(target_depths)

        depth_loss = self.depth_criterion(sampled_depths, target_depths).mean()

        # Compute metrics
        with torch.no_grad():
            mse = F.mse_loss(sampled_depths, target_depths).item()
            rmse = np.sqrt(mse)

        return depth_loss, mse, rmse

    def _compute_depth_smoothness_loss(self, depth_pred):
        """Compute depth map smoothness loss"""
        grad_x = torch.abs(depth_pred[:, :, :, :-1] - depth_pred[:, :, :, 1:])
        grad_y = torch.abs(depth_pred[:, :, :-1, :] - depth_pred[:, :, 1:, :])

        smoothness_loss = grad_x.mean() + grad_y.mean()
        return smoothness_loss

    def _encode_boxes(self, anchors, gt_boxes):
        """Encode ground truth boxes relative to anchors"""
        # Anchor properties
        anchor_widths = anchors[:, 2] - anchors[:, 0]
        anchor_heights = anchors[:, 3] - anchors[:, 1]
        anchor_ctr_x = anchors[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchors[:, 1] + 0.5 * anchor_heights

        # GT box properties
        gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0]
        gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1]
        gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_widths
        gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_heights

        # Compute deltas
        dx = (gt_ctr_x - anchor_ctr_x) / anchor_widths
        dy = (gt_ctr_y - anchor_ctr_y) / anchor_heights
        dw = torch.log(gt_widths / anchor_widths)
        dh = torch.log(gt_heights / anchor_heights)

        return torch.stack((dx, dy, dw, dh), dim=1)

    def bbox_iou(self, box1, box2):
        """Compute IoU between two sets of boxes"""
        area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
        area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])

        lt = torch.max(box1[:, None, :2], box2[:, :2])
        rb = torch.min(box1[:, None, 2:], box2[:, 2:])

        wh = (rb - lt).clamp(min=0)
        inter = wh[:, :, 0] * wh[:, :, 1]

        union = area1[:, None] + area2 - inter
        iou = inter / union
        return iou

    def focal_loss(self, inputs, targets, alpha=0.25, gamma=2.0):
        """Focal Loss implementation"""
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = alpha * (1 - pt) ** gamma * ce_loss
        return focal_loss.mean()

    def _compute_simple_map(self, cls_preds, gt_classes, pos_indices, matched_gt_indices):
        """
        Basitleştirilmiş mAP hesaplama - tensor boyut uyumsuzluğu problemini çözer
        """
        if len(pos_indices) == 0 or len(gt_classes) == 0:
            return 0.0

        with torch.no_grad():
            # Pozitif örnekler için sınıf tahminleri
            pos_cls_preds = cls_preds[pos_indices]  # [N_pos, num_classes]
            pos_gt_classes = gt_classes[matched_gt_indices]  # [N_pos]

            # Softmax ile sınıf olasılıkları
            pos_probs = F.softmax(pos_cls_preds, dim=1)  # [N_pos, num_classes]

            # Her pozitif örnek için doğru sınıfın olasılığını al
            correct_class_probs = pos_probs[torch.arange(len(pos_gt_classes)), pos_gt_classes]

            # Ortalama doğruluk oranını mAP approximation olarak kullan
            map_approx = correct_class_probs.mean().item()

            return max(0.0, min(1.0, map_approx))  # 0-1 aralığında clamp


def train_model(model, train_loader, val_loader, num_epochs=100,
                learning_rate=1e-4, device='cuda', save_path='model_checkpoint.pth',
                early_stop_patience=3,scheduler_patience=10, scheduler_factor=0.5, class_weights=None, task_weights=None,
                p_iou_threshold=0.5, n_iou_threshold=0.4):

    # WandB config'e weights ekle
    config = {
        "learning_rate": learning_rate,
        "architecture": "EfficientBasedMultiTask",
        "dataset": "KITTI-2012",
        "epochs": num_epochs,
    }

    # Task weights'i config'e ekle
    if task_weights:
        config.update({
            "task_weights": task_weights,
            "classification_weight": task_weights.get("classification", 1.0),
            "regression_weight": task_weights.get("regression", 1.0),
            "detection_depth_weight": task_weights.get("detection_depth", 1.0),
            "depth_map_weight": task_weights.get("depth_map", 1.0)
        })

    wandb.init(
        entity="mehmeteminuludag-kirikkale-university",
        project="StajProjesi",
        config=config
    )

    # Move model to device
    model = model.to(device)
    criterion = MultiTaskCriterion(loss_weights=task_weights, pos_iou_threshold=p_iou_threshold, neg_iou_threshold=n_iou_threshold).to(device)

    optimizer = optim.AdamW(
        list(model.parameters()) + list(criterion.parameters()),
        lr=learning_rate, weight_decay=1e-5, eps=1e-8
    )

    best_val_loss = float('inf')
    counter=0
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_losses = {
            'total': 0,
            'classification': 0,
            'regression': 0,
            'depth': 0,
            'depth_map': 0
        }
        train_metrics_accum = {
            'Accuracy': 0,
            'F1_score': 0,
            'MSE': 0,
            'RMSE': 0,
            'mAP': 0,
            'TotalLoss': 0,
            'ClsLoss': 0
        }

        pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} Train')

        for batch_idx, (images, targets) in enumerate(pbar):
            images = images.to(device)
            batch_size = len(targets)

            optimizer.zero_grad()
            model_outputs = model(images, mode="train")
            values = {}
            values['cls_preds'] = model_outputs['cls_preds']
            values['reg_preds'] = model_outputs['reg_preds']
            values['anchors'] = model_outputs['anchors']
            values['depth_pred'] = model_outputs['depth_pred']
            values['targets'] = targets

            losses, metrics = criterion(values)
            losses['total'].backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            # Hataları biriktirir
            for key in train_losses:
                if key in losses:
                    if isinstance(losses[key], torch.Tensor):
                        train_losses[key] += losses[key].item() * batch_size
                    else:
                        train_losses[key] += losses[key] * batch_size

            # Metrikleri biriktir
            for key in train_metrics_accum:
                if key in metrics:
                    if isinstance(metrics[key], torch.Tensor):
                        train_metrics_accum[key] += metrics[key].item() * batch_size
                    else:
                        train_metrics_accum[key] += metrics[key] * batch_size

            # DÜZELTME 4: Memory cleanup
            if batch_idx % 10 == 0:
                torch.cuda.empty_cache()

            pbar.set_postfix({
                'Acc': f"{train_metrics_accum['Accuracy']/((batch_idx+1)*batch_size): .3f}",
                'ClsLoss': f"{train_losses['classification']/((batch_idx+1)*batch_size):.3f}",
                'F1': f"{train_metrics_accum['F1_score']/((batch_idx+1)*batch_size):.3f}",
                'RMSE': f"{train_metrics_accum['RMSE']/((batch_idx+1)*batch_size):.3f}",
                'mAP': f"{train_metrics_accum['mAP']/((batch_idx+1)*batch_size):.3f}",
                'TotalLoss': f"{train_losses['total']/((batch_idx+1)*batch_size):.3f}"
            })

        # DÜZELTME 3: Loop dışına çıkarıldı
        for key in train_losses:
            train_losses[key] /= len(train_loader.dataset)

        for key in train_metrics_accum:
            train_metrics_accum[key] /= len(train_loader.dataset)

        # Validation

        val_losses = {
            'total': 0,
            'classification': 0,
            'regression': 0,
            'depth': 0,
            'depth_map': 0
        }
        val_metrics_accum = {
            'Accuracy': 0,
            'F1_score': 0,
            'MSE': 0,
            'RMSE': 0,
            'mAP': 0,
            'TotalLoss': 0,
            'ClsLoss': 0
        }

        with torch.no_grad():
            #model.train()
            pbar2 = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} Validation')

            for batch_idx, (images, targets) in enumerate(pbar2):
                images = images.to(device)
                batch_size = len(targets)


                model_outputs = model(images, mode="train")
                values = {}
                values['cls_preds'] = model_outputs['cls_preds']
                values['reg_preds'] = model_outputs['reg_preds']
                values['anchors'] = model_outputs['anchors']
                values['depth_pred'] = model_outputs['depth_pred']
                values['targets'] = targets
                losses, metrics = criterion(values)

                # Hataları biriktirir
                for key in val_losses:
                    if key in losses:
                        if isinstance(losses[key], torch.Tensor):
                            val_losses[key] += losses[key].item() * batch_size
                        else:
                            val_losses[key] += losses[key] * batch_size

                # Metrikleri biriktir
                for key in val_metrics_accum:
                    if key in metrics:
                        if isinstance(metrics[key], torch.Tensor):
                            val_metrics_accum[key] += metrics[key].item() * batch_size
                        else:
                            val_metrics_accum[key] += metrics[key] * batch_size

                pbar2.set_postfix({
                    'Acc': f"{val_metrics_accum['Accuracy']/((batch_idx+1)*batch_size): .3f}",
                    'ClsLoss': f"{val_losses['classification']/((batch_idx+1)*batch_size):.3f}",
                    'F1': f"{val_metrics_accum['F1_score']/((batch_idx+1)*batch_size):.3f}",
                    'RMSE': f"{val_metrics_accum['RMSE']/((batch_idx+1)*batch_size):.3f}",
                    'mAP': f"{val_metrics_accum['mAP']/((batch_idx+1)*batch_size):.3f}",
                    'TotalLoss': f"{val_losses['total']/((batch_idx+1)*batch_size):.3f}"
                })

        # DÜZELTME 5: Validation loop dışına çıkarıldı
        for key in val_losses:
            val_losses[key] /= len(val_loader.dataset)

        for key in val_metrics_accum:
            val_metrics_accum[key] /= len(val_loader.dataset)

        # en iyi val loss un olduğu ağırlıkları kaydeder
        if val_losses['total'] < best_val_loss:
            best_val_loss = val_losses['total']
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'criterion_state_dict': criterion.state_dict()
            }, save_path)

        # wandb logları alınıyor
        wandb.log({
            "val/accuracy": val_metrics_accum['Accuracy'],
            "val/classification_loss": val_losses['classification'],
            "val/f1_score": val_metrics_accum['F1_score'],
            "val/rmse": val_metrics_accum['RMSE'],
            "val/map": val_metrics_accum['mAP']
        }, step=epoch+1)

        wandb.log({
            "train/accuracy": train_metrics_accum['Accuracy'],
            "train/classification_loss": train_losses['classification'],
            "train/f1_score": train_metrics_accum['F1_score'],
            "train/rmse": train_metrics_accum['RMSE'],
            "train/map": train_metrics_accum['mAP']
        }, step=epoch+1)

        if val_losses['total'] +0.15 < best_val_loss:
          best_val_loss = val_losses['total']
          counter = 0  # İyileşme oldu, sıfırla
          torch.save({
              'epoch': epoch + 1,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'criterion_state_dict': criterion.state_dict()
          }, save_path)
        else:
            counter += 1
            print(f"No improvement in val loss for {counter} epochs.")

        if counter >= early_stop_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
    wandb.finish()
    return best_val_loss


def kitti_collate_fn(batch):
    images = [item[0] for item in batch]
    labels = [item[1] for item in batch]
    images = torch.stack(images, dim=0)  # [B, C, H, W]
    return images, labels

def analyze_dataset(train_dataset, device):
    """
    Veri seti analizi ve görselleştirme fonksiyonu - Optimize edilmiş versiyon
    """
    import numpy as np
    import torch
    from collections import Counter

    # === 0. Veri Setinden Özellikleri Çıkarma - TEK DÖNGÜ İLE ===
    print(f"Veri seti boyutu: {len(train_dataset)} örnek")

    # Tüm listeleri tek seferde oluştur
    cls_labels = []
    depth_maps = []
    bboxes = []
    objects_per_image = []


    for i in range(len(train_dataset)):
        image_data = train_dataset[i][1]  # Bir kez al
        objects_per_image.append(len(image_data))

        # Bu görüntüdeki tüm objeleri işle
        for obj in image_data:
            cls_labels.append(obj[0])           # sınıf
            depth_maps.append(obj[5])           # depth
            bboxes.append(obj[1:5])             # [x1,y1,x2,y2]
            print(obj)
    total_objects = len(cls_labels)
    print(f"Toplam obje sayısı: {total_objects}")
    print(f"Toplam bbox sayısı: {len(bboxes)}")
    print(f"Toplam depth değeri: {len(depth_maps)}")

    # === 1. Sınıf Dağılımı Analizi ===
    cls_distribution = Counter(cls_labels)
    sorted_cls_distribution = sorted(cls_distribution.items(), key=lambda x: x[0])

    # Class weights hesaplama
    class_weights = torch.tensor([count for _, count in sorted_cls_distribution],
                                dtype=torch.float32).to(device)
    return class_weights
    # === 2. BBox Analizi - VEKTÖRLEŞTİRİLMİŞ ===
    # NumPy array'e çevir hızlı işlem için
    bboxes_array = np.array(bboxes)

    # Format: [x1, y1, x2, y2]
    bbox_widths = bboxes_array[:, 2] - bboxes_array[:, 0]   # x2 - x1
    bbox_heights = bboxes_array[:, 3] - bboxes_array[:, 1]  # y2 - y1
    bbox_areas = bbox_widths * bbox_heights

    # Sıfıra bölme kontrolü ile aspect ratio
    bbox_aspect_ratios = np.divide(bbox_widths, bbox_heights,
                                  out=np.zeros_like(bbox_widths),
                                  where=bbox_heights!=0)

    # === 3. Görsel Başına Obje Sayısı Analizi - ZATEN HAZIR ===
    # objects_per_image yukarıda hesaplandı

    # === 4. Depth Analizi - VEKTÖRLEŞTİRİLMİŞ ===
    depth_array = np.array(depth_maps)
    valid_mask = ~np.isnan(depth_array)
    valid_depths = depth_array[valid_mask]
    invalid_depths = np.sum(~valid_mask)




    # === 5. Görselleştirme ===
    fig = plt.figure(figsize=(18, 12))

    # Sınıf dağılımı pasta grafiği
    plt.subplot(3, 4, 1)
    labels, values = zip(*sorted_cls_distribution)
    plt.pie(values, labels=labels, autopct='%1.1f%%', startangle=140)
    plt.title('Sınıf Dağılımı', fontsize=12, fontweight='bold')
    plt.axis('equal')

    # Sınıf dağılımı bar grafiği
    plt.subplot(3, 4, 2)
    plt.bar(labels, values, alpha=0.7, edgecolor='black')
    plt.title('Sınıf Dağılımı (Bar Chart)', fontsize=12, fontweight='bold')
    plt.xlabel('Sınıf')
    plt.ylabel('Örnek Sayısı')
    plt.xticks(rotation=45)

    # Görsel başına obje sayısı histogramı
    plt.subplot(3, 4, 3)
    plt.hist(objects_per_image, bins=range(1, max(objects_per_image)+2),
             alpha=0.7, edgecolor='black', color='skyblue')
    plt.title('Görsel Başına Obje Sayısı', fontsize=12, fontweight='bold')
    plt.xlabel('Obje Sayısı')
    plt.ylabel('Görsel Sayısı')
    plt.grid(True, alpha=0.3)

    # BBox genişlik histogramı
    plt.subplot(3, 4, 4)
    plt.hist(bbox_widths, bins=50, alpha=0.7, edgecolor='black', color='orange')
    plt.title('BBox Genişlik Dağılımı', fontsize=12, fontweight='bold')
    plt.xlabel('Genişlik')
    plt.ylabel('Frekans')
    plt.grid(True, alpha=0.3)

    # BBox yükseklik histogramı
    plt.subplot(3, 4, 5)
    plt.hist(bbox_heights, bins=50, alpha=0.7, edgecolor='black', color='red')
    plt.title('BBox Yükseklik Dağılımı', fontsize=12, fontweight='bold')
    plt.xlabel('Yükseklik')
    plt.ylabel('Frekans')
    plt.grid(True, alpha=0.3)

    # BBox alan histogramı
    plt.subplot(3, 4, 6)
    plt.hist(bbox_areas, bins=50, alpha=0.7, edgecolor='black', color='purple')
    plt.title('BBox Alan Dağılımı', fontsize=12, fontweight='bold')
    plt.xlabel('Alan')
    plt.ylabel('Frekans')
    plt.grid(True, alpha=0.3)

    # BBox aspect ratio histogramı
    plt.subplot(3, 4, 7)
    plt.hist(bbox_aspect_ratios, bins=50, alpha=0.7, edgecolor='black', color='brown')
    plt.title('BBox En/Boy Oranı', fontsize=12, fontweight='bold')
    plt.xlabel('En/Boy Oranı')
    plt.ylabel('Frekans')
    plt.grid(True, alpha=0.3)

    # Depth değerleri histogramı
    plt.subplot(3, 4, 8)
    if valid_depths:
        plt.hist(valid_depths, bins=50, alpha=0.7, edgecolor='black', color='lightgreen')
    plt.title('Depth Değerleri Dağılımı', fontsize=12, fontweight='bold')
    plt.xlabel('Normalize Edilmiş Depth')
    plt.ylabel('Frekans')
    plt.grid(True, alpha=0.3)

    # Depth box plot
    plt.subplot(3, 4, 9)
    if valid_depths:
        plt.boxplot(valid_depths, vert=True)
    plt.title('Depth Box Plot', fontsize=12, fontweight='bold')
    plt.ylabel('Normalize Edilmiş Depth')
    plt.grid(True, alpha=0.3)

    # Obje sayısı box plot
    plt.subplot(3, 4, 10)
    plt.boxplot(objects_per_image, vert=True)
    plt.title('Obje Sayısı Box Plot', fontsize=12, fontweight='bold')
    plt.ylabel('Obje Sayısı')
    plt.grid(True, alpha=0.3)

    # BBox alan box plot
    plt.subplot(3, 4, 11)
    plt.boxplot(bbox_areas, vert=True)
    plt.title('BBox Alan Box Plot', fontsize=12, fontweight='bold')
    plt.ylabel('Alan')
    plt.grid(True, alpha=0.3)

    # BBox aspect ratio box plot
    plt.subplot(3, 4, 12)
    plt.boxplot(bbox_aspect_ratios, vert=True)
    plt.title('En/Boy Oranı Box Plot', fontsize=12, fontweight='bold')
    plt.ylabel('En/Boy Oranı')
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # === 6. İstatistik Raporları ===
    print("=" * 80)
    print("DATASET ANALİZ RAPORU")
    print("=" * 80)

    # Genel bilgiler
    print(f"\n📋 GENEL BİLGİLER:")
    print(f"Toplam görsel sayısı:    {len(train_dataset):6d}")
    print(f"Toplam obje sayısı:      {len(cls_labels):6d}")
    print(f"Toplam bbox sayısı:      {len(bboxes):6d}")

    # Sınıf dağılımı raporu
    print(f"\n SINIF DAĞILIMI:")
    print(f"Toplam sınıf sayısı: {len(cls_distribution)}")
    for label, count in sorted_cls_distribution:
        percentage = (count / sum(cls_distribution.values())) * 100
        print(f"  Sınıf {label}: {count:4d} örnek ({percentage:5.1f}%)")

    # BBox analiz raporu
    print(f"\n BBOX ANALİZ RAPORU:")
    print(f"Ortalama genişlik:       {np.mean(bbox_widths):8.2f}")
    print(f"Ortalama yükseklik:      {np.mean(bbox_heights):8.2f}")
    print(f"Ortalama alan:           {np.mean(bbox_areas):8.2f}")
    print(f"Ortalama en/boy oranı:   {np.mean(bbox_aspect_ratios):8.2f}")
    print(f"Min genişlik:            {np.min(bbox_widths):8.2f}")
    print(f"Max genişlik:            {np.max(bbox_widths):8.2f}")
    print(f"Min yükseklik:           {np.min(bbox_heights):8.2f}")
    print(f"Max yükseklik:           {np.max(bbox_heights):8.2f}")
    print(f"Min alan:                {np.min(bbox_areas):8.2f}")
    print(f"Max alan:                {np.max(bbox_areas):8.2f}")

    # Obje sayısı raporu
    print(f"\n OBJE SAYISI İSTATİSTİKLERİ:")
    print(f"Ortalama obje sayısı: {np.mean(objects_per_image):6.2f}")
    print(f"Medyan obje sayısı:   {np.median(objects_per_image):6.2f}")
    print(f"Maksimum obje sayısı: {max(objects_per_image):6d}")
    print(f"Minimum obje sayısı:  {min(objects_per_image):6d}")
    print(f"Standart sapma:       {np.std(objects_per_image):6.2f}")

    # Depth raporu
    print(f"\n DEPTH ANALİZ RAPORU:")
    print(f"Toplam depth değeri:     {len(depth_maps):6d}")
    print(f"Geçerli depth değeri:    {len(valid_depths):6d}")
    print(f"Geçersiz depth (NaN):    {invalid_depths:6d}")
    print(f"Depth geçerlilik oranı:  {len(valid_depths)/len(depth_maps)*100:6.1f}%")

    if valid_depths:
        print(f"\n DEPTH İSTATİSTİKLERİ:")
        for key, value in depth_stats.items():
            print(f"{key:>8}: {value:8.4f}")

    print("=" * 80)

    return {
        'class_weights': class_weights,
        'cls_distribution': sorted_cls_distribution,
        'bbox_stats': {
            'mean_width': np.mean(bbox_widths),
            'mean_height': np.mean(bbox_heights),
            'mean_area': np.mean(bbox_areas),
            'mean_aspect_ratio': np.mean(bbox_aspect_ratios),
            'width_std': np.std(bbox_widths),
            'height_std': np.std(bbox_heights),
            'area_std': np.std(bbox_areas)
        },
        'objects_per_image_stats': {
            'mean': np.mean(objects_per_image),
            'median': np.median(objects_per_image),
            'max': max(objects_per_image),
            'min': min(objects_per_image),
            'std': np.std(objects_per_image)
        },
        'depth_stats': depth_stats,
        'depth_validity_ratio': len(valid_depths)/len(depth_maps)*100 if depth_maps else 0,
        'total_images': len(train_dataset),
        'total_objects': len(cls_labels)
    }

def get_labels(targets,device='cuda'):
  target_dict = {}
  max_objects = max(len(target_list) for target_list in targets) if targets else 1 # her batchdeki maksimum nesne sayısı belirlenir

  #sınıf,box ve depth etiketleri için tensörler oluşturuluyor
  cls_labels = torch.full((batch_size, max_objects), -1, dtype=torch.long)
  bbox_labels = torch.zeros((batch_size, max_objects, 4), dtype=torch.float32)
  depth_labels = torch.full((batch_size, max_objects), float('nan'), dtype=torch.float32)


  #tensörler gerçek değerler ile dolduruldu
  for b, target_list in enumerate(targets):
      for obj_idx, obj_data in enumerate(target_list):
          if obj_idx < max_objects:
              category_num, x, y, w, h, depth_or_disp = obj_data
              cls_labels[b, obj_idx] = int(category_num)
              bbox_labels[b, obj_idx] = torch.tensor([x, y, w, h], dtype=torch.float32)

              # etiket değeri nan değilse doldur
              if not np.isnan(depth_or_disp):
                  depth_labels[b, obj_idx] = depth_or_disp


  # tensörler kullanılabilir olması için 'target_dict' kütüphanesine gömüldü ve gpu belleğe taşındı
  target_dict['classification'] = cls_labels.to(device)
  target_dict['regression'] = bbox_labels.to(device)
  target_dict['depth'] = depth_labels.to(device)
  target_dict['depth_map'] = torch.zeros(batch_size, 1, 512, 512).to(device)

  return target_dict



In [None]:
data_path = "/content/kitti2012" #colab
#vkitti_path = "/content/vkitti_sample (1)" #vkitti path
#data_path = "C:/Users/Mehmet/Desktop/kitti2012" #lcoal
test_path = data_path+"/testing"

image_size=256
batch_size=1

transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])
#train_val_2_dataset = VKITTI_Dataset(root_dir=vkitti_path,transform=transform)
#train_2_size = int(0.8 * len(train_val_2_dataset))  # ~155 sahne
#val_2_size = len(train_val_2_dataset) - train_2_size  # ~39 sahne
#train_2_dataset, val_2_dataset = random_split(train_val_2_dataset, [train_2_size, val_2_size])


#train_2_loader = DataLoader(train_2_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=0,collate_fn=kitti_collate_fn)
#test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,pin_memory=True, num_workers=0,collate_fn=kitti_collate_fn)
#val_2_loader = DataLoader(val_2_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=0,collate_fn=kitti_collate_fn)


train_val_dataset = KITTI_Dataset(data_path=data_path ,transform=transform, mode='train')
train_size = int(0.8 * len(train_val_dataset))  # ~155 sahne
val_size = len(train_val_dataset) - train_size  # ~39 sahne
train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])

#test_dataset  = KITTI_Dataset(test_path,transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=0,collate_fn=kitti_collate_fn)
#test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,pin_memory=True, num_workers=0,collate_fn=kitti_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=0,collate_fn=kitti_collate_fn)

In [27]:
results = analyze_dataset(train_dataset,device)

Veri seti boyutu: 1169 örnek
Toplam obje sayısı: 0
Toplam bbox sayısı: 0
Toplam depth değeri: 0


In [None]:
class_weights = torch.tensor([w / sum([1 / w for w in [1201,113,39,161,76,36,35,657]]) for w in [1 / w for w in [1201,113,39,161,76,36,35,657]]], dtype=torch.float32).to(device)

In [None]:
def objective(trial):
    # Hiperparametre önerileri
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)

    # Task weights - dengeli bir şekilde ayarla
    classification_weight = trial.suggest_float('classification_weight', 0.5, 2.0)
    regression_weight = trial.suggest_float('regression_weight', 0.5, 2.0)
    detection_depth_weight = trial.suggest_float('detection_depth_weight', 0.1, 0.5)
    depth_map_weight = trial.suggest_float('depth_map_weight', 0.1, 0.5)

    task_weights = {
        'classification': classification_weight,
        'regression': regression_weight,
        'depth': detection_depth_weight,
        'depth_map': depth_map_weight
    }

    # IoU thresholds
    p_iou_threshold = trial.suggest_float('p_iou_threshold', 0.3, 0.7)
    n_iou_threshold = trial.suggest_float('n_iou_threshold', 0.1, 0.4)

    # Model oluştur ve BatchNorm ayarla
    model = CompleteMultiTaskModel(num_classes=8, num_anchors=9).to(device)

    best_val_loss = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        num_epochs=10,  # Optuna için kısa epoch
        learning_rate=learning_rate,
        device=device,
        save_path=f'optuna_trial_{trial.number}.pth',
        class_weights=classification_weight,
        task_weights=task_weights,
        scheduler_patience=3,
        scheduler_factor=0.2,
        p_iou_threshold=p_iou_threshold,
        n_iou_threshold=n_iou_threshold
    )

    return best_val_loss
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

print("En iyi parametreler:", study.best_params)

[I 2025-08-10 10:56:47,348] A new study created in memory with name: no-name-c4ef8536-aeef-4847-be9e-67b82886e01e
[34m[1mwandb[0m: Currently logged in as: [33mmehmeteminuludag[0m ([33mmehmeteminuludag-kirikkale-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/10 Train: 100%|██████████| 310/310 [03:23<00:00,  1.52it/s, Acc=0.565, ClsLoss=0.245, F1=0.557, RMSE=0.041, mAP=0.382, TotalLoss=0.538]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.612, ClsLoss=0.159, F1=0.559, RMSE=0.035, mAP=0.382, TotalLoss=0.312]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:22<00:00,  1.53it/s, Acc=0.585, ClsLoss=0.157, F1=0.572, RMSE=0.038, mAP=0.401, TotalLoss=0.365]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.485, ClsLoss=0.180, F1=0.516, RMSE=0.035, mAP=0.298, TotalLoss=0.366]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:23<00:00,  1.52it/s, Acc=0.607, ClsLoss=0.140, F1=0.592, RMSE=0.038, mAP=0.408, TotalLoss=0.285]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.608, ClsLoss=0.156, F1=0.553, RMSE=0.035, mAP=0.416, TotalLoss=0.308]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:23<00:00,  1.52it/s, Acc=0.619, ClsLoss=0.137, F1=0.598, RMSE=0.038, mAP=0.411, 

0,1
train/accuracy,▁▃▅▇▆▆▇▇██
train/classification_loss,█▃▂▂▃▂▁▁▁▁
train/f1_score,▁▃▅▆▆▆▇▆██
train/map,▁▄▅▆▇▇▇▇██
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,█▁███▇▇▇██
val/classification_loss,▄█▄▅▄▃▇▁▄▄
val/f1_score,▅▁▅▆▅▇▆▇█▇
val/map,▅▁▇▇▆▄▇▇▇█
val/rmse,▁█████████

0,1
train/accuracy,0.63186
train/classification_loss,0.12418
train/f1_score,0.6132
train/map,0.42625
train/rmse,0.03766
val/accuracy,0.60529
val/classification_loss,0.15868
val/f1_score,0.57142
val/map,0.43235
val/rmse,0.03549


[I 2025-08-10 11:33:43,812] Trial 0 finished with value: 0.2842940606208623 and parameters: {'learning_rate': 0.00443528430144658, 'classification_weight': 1.8030624769288597, 'regression_weight': 1.0711140562854187, 'detection_depth_weight': 0.35664103874392583, 'depth_map_weight': 0.24893458557825682, 'p_iou_threshold': 0.4850267189356135, 'n_iou_threshold': 0.37124729356020936}. Best is trial 0 with value: 0.2842940606208623.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:24<00:00,  1.52it/s, Acc=0.590, ClsLoss=0.214, F1=0.572, RMSE=0.052, mAP=0.371, TotalLoss=0.284]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.602, ClsLoss=0.280, F1=0.549, RMSE=0.034, mAP=0.424, TotalLoss=0.408]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:24<00:00,  1.52it/s, Acc=0.629, ClsLoss=0.148, F1=0.582, RMSE=0.035, mAP=0.396, TotalLoss=0.164]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.607, ClsLoss=0.157, F1=0.556, RMSE=0.034, mAP=0.396, TotalLoss=0.165]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:23<00:00,  1.52it/s, Acc=0.640, ClsLoss=0.141, F1=0.605, RMSE=0.035, mAP=0.428, TotalLoss=0.154]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.612, ClsLoss=0.151, F1=0.567, RMSE=0.033, mAP=0.394, TotalLoss=0.158]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:23<00:00,  1.52it/s, Acc=0.668, ClsLoss=0.116, F1=0.648, RMSE=0.035, mAP=0.449, 

0,1
train/accuracy,▁▂▂▃▄▅▆▇▇█
train/classification_loss,█▅▅▄▄▃▂▂▁▁
train/f1_score,▁▁▂▃▄▅▆▇▇█
train/map,▁▂▂▃▄▅▆▇▇█
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▃▄▄▄▄▅▄▂▁█
val/classification_loss,█▂▂▂▂▁▂▁▂▂
val/f1_score,▁▂▂▃▃▅▅▅▅█
val/map,▃▁▁▂▂▃▅▅▅█
val/rmse,▁▁▁▂▁▂▃▄▃█

0,1
train/accuracy,0.83368
train/classification_loss,0.03473
train/f1_score,0.84006
train/map,0.6673
train/rmse,0.03535
val/accuracy,0.6338
val/classification_loss,0.15147
val/f1_score,0.64881
val/map,0.4997
val/rmse,0.0382


[I 2025-08-10 12:10:47,367] Trial 1 finished with value: 0.14495912502155614 and parameters: {'learning_rate': 0.0007471321366266431, 'classification_weight': 0.894964860659696, 'regression_weight': 0.9581203843332198, 'detection_depth_weight': 0.3681244097839771, 'depth_map_weight': 0.16955074782499602, 'p_iou_threshold': 0.4636327974735715, 'n_iou_threshold': 0.3055644209541635}. Best is trial 1 with value: 0.14495912502155614.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:24<00:00,  1.52it/s, Acc=0.619, ClsLoss=0.208, F1=0.568, RMSE=0.053, mAP=0.357, TotalLoss=0.285]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.592, ClsLoss=0.158, F1=0.544, RMSE=0.033, mAP=0.333, TotalLoss=0.204]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:24<00:00,  1.51it/s, Acc=0.640, ClsLoss=0.135, F1=0.600, RMSE=0.035, mAP=0.406, TotalLoss=0.178]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.611, ClsLoss=0.148, F1=0.563, RMSE=0.036, mAP=0.383, TotalLoss=0.189]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:24<00:00,  1.52it/s, Acc=0.676, ClsLoss=0.112, F1=0.660, RMSE=0.035, mAP=0.454, TotalLoss=0.153]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.614, ClsLoss=0.149, F1=0.575, RMSE=0.034, mAP=0.428, TotalLoss=0.191]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:23<00:00,  1.52it/s, Acc=0.728, ClsLoss=0.086, F1=0.726, RMSE=0.035, mAP=0.514, 

0,1
train/accuracy,▁▂▂▄▅▆▇▇██
train/classification_loss,█▅▅▄▃▂▂▁▁▁
train/f1_score,▁▂▃▄▅▆▇▇██
train/map,▁▂▃▄▅▆▆▇██
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▄▄▄▃▆▄▅▆█
val/classification_loss,█▅▅▄▁▅▅▅▆▇
val/f1_score,▁▂▃▄▅▆▆▇▇█
val/map,▁▃▄▄▅▆▆▇██
val/rmse,▁▆▂▃▅█▅▃▃▂

0,1
train/accuracy,0.90031
train/classification_loss,0.01315
train/f1_score,0.90552
train/map,0.76721
train/rmse,0.03504
val/accuracy,0.64058
val/classification_loss,0.15541
val/f1_score,0.65725
val/map,0.52769
val/rmse,0.03363


[I 2025-08-10 12:47:54,009] Trial 2 finished with value: 0.1768098289363619 and parameters: {'learning_rate': 0.0004383436386931138, 'classification_weight': 0.9778428158846735, 'regression_weight': 1.5646503945581236, 'detection_depth_weight': 0.1316630576881649, 'depth_map_weight': 0.45271247157656114, 'p_iou_threshold': 0.4246966500363821, 'n_iou_threshold': 0.20442134435009027}. Best is trial 1 with value: 0.14495912502155614.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.350, ClsLoss=0.509, F1=0.417, RMSE=0.297, mAP=0.144, TotalLoss=0.734]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.34it/s, Acc=0.470, ClsLoss=0.335, F1=0.488, RMSE=0.163, mAP=0.157, TotalLoss=0.502]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.579, ClsLoss=0.308, F1=0.556, RMSE=0.109, mAP=0.175, TotalLoss=0.458]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.33it/s, Acc=0.579, ClsLoss=0.294, F1=0.531, RMSE=0.078, mAP=0.186, TotalLoss=0.439]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.626, ClsLoss=0.252, F1=0.578, RMSE=0.060, mAP=0.221, TotalLoss=0.382]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.33it/s, Acc=0.593, ClsLoss=0.244, F1=0.535, RMSE=0.052, mAP=0.237, TotalLoss=0.375]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.632, ClsLoss=0.204, F1=0.579, RMSE=0.045, mAP=0.282, 

0,1
train/accuracy,▁▆▇▇▇▇████
train/classification_loss,█▄▃▂▂▁▁▁▁▁
train/f1_score,▁▆▇▇▇▇▇▇▇█
train/map,▁▂▃▄▆▇▇███
train/rmse,█▃▂▁▁▁▁▁▁▁
val/accuracy,▁▇████████
val/classification_loss,█▆▃▂▁▁▁▁▁▁
val/f1_score,▁▇█▇▇▇▇▇▇█
val/map,▁▂▃▅▆▇▇███
val/rmse,█▃▂▁▁▁▁▁▁▁

0,1
train/accuracy,0.66206
train/classification_loss,0.14438
train/f1_score,0.61562
train/map,0.43161
train/rmse,0.03522
val/accuracy,0.6012
val/classification_loss,0.19904
val/f1_score,0.53866
val/map,0.39592
val/rmse,0.03509


[I 2025-08-10 13:25:23,928] Trial 3 finished with value: 0.299515854150321 and parameters: {'learning_rate': 1.3092236702298259e-05, 'classification_weight': 1.1200717972144119, 'regression_weight': 1.8764722801744818, 'detection_depth_weight': 0.16208250320848913, 'depth_map_weight': 0.44876776775996907, 'p_iou_threshold': 0.3811806409161767, 'n_iou_threshold': 0.18506326211030605}. Best is trial 1 with value: 0.14495912502155614.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:07<00:00,  1.66it/s, Acc=0.351, ClsLoss=0.366, F1=0.369, RMSE=0.171, mAP=0.124, TotalLoss=0.657]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.426, ClsLoss=0.191, F1=0.424, RMSE=0.059, mAP=0.157, TotalLoss=0.341]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:07<00:00,  1.65it/s, Acc=0.468, ClsLoss=0.164, F1=0.473, RMSE=0.043, mAP=0.239, TotalLoss=0.300]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.456, ClsLoss=0.155, F1=0.452, RMSE=0.037, mAP=0.226, TotalLoss=0.279]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:07<00:00,  1.66it/s, Acc=0.489, ClsLoss=0.138, F1=0.490, RMSE=0.035, mAP=0.314, TotalLoss=0.255]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.441, ClsLoss=0.158, F1=0.438, RMSE=0.034, mAP=0.271, TotalLoss=0.289]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:06<00:00,  1.66it/s, Acc=0.531, ClsLoss=0.100, F1=0.534, RMSE=0.035, mAP=0.368, 

0,1
train/accuracy,▁▄▄▆▆▆▇▇▇█
train/classification_loss,█▄▃▂▂▂▂▁▁▁
train/f1_score,▁▄▄▆▆▆▇▇▇█
train/map,▁▃▄▅▆▇▇▇██
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▃▂▂▆██▆▆▇
val/classification_loss,█▅▅▄▃▂▁▇▁▁
val/f1_score,▁▃▂▂▆██▆▆▇
val/map,▁▃▄▅▅▆▇▇▇█
val/rmse,█▂▁▁▁▁▁▁▁▁

0,1
train/accuracy,0.62587
train/classification_loss,0.04021
train/f1_score,0.62173
train/map,0.51676
train/rmse,0.03514
val/accuracy,0.50107
val/classification_loss,0.09928
val/f1_score,0.49343
val/map,0.43566
val/rmse,0.03516


[I 2025-08-10 13:59:40,185] Trial 4 finished with value: 0.17920076194245782 and parameters: {'learning_rate': 5.2683434322392304e-05, 'classification_weight': 1.689085662817106, 'regression_weight': 0.6915997380138458, 'detection_depth_weight': 0.3326812293599225, 'depth_map_weight': 0.2349275359485685, 'p_iou_threshold': 0.6320655811213576, 'n_iou_threshold': 0.12841288023551728}. Best is trial 1 with value: 0.14495912502155614.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.576, ClsLoss=0.264, F1=0.544, RMSE=0.094, mAP=0.304, TotalLoss=0.318]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.33it/s, Acc=0.595, ClsLoss=0.228, F1=0.529, RMSE=0.034, mAP=0.380, TotalLoss=0.267]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.634, ClsLoss=0.185, F1=0.581, RMSE=0.035, mAP=0.417, TotalLoss=0.218]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.33it/s, Acc=0.597, ClsLoss=0.246, F1=0.537, RMSE=0.035, mAP=0.397, TotalLoss=0.271]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.643, ClsLoss=0.164, F1=0.597, RMSE=0.035, mAP=0.433, TotalLoss=0.193]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.32it/s, Acc=0.602, ClsLoss=0.241, F1=0.547, RMSE=0.034, mAP=0.407, TotalLoss=0.265]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:26<00:00,  1.50it/s, Acc=0.664, ClsLoss=0.153, F1=0.628, RMSE=0.035, mAP=0.449, 

0,1
train/accuracy,▁▃▃▄▄▅▅▇▇█
train/classification_loss,█▅▅▄▄▃▃▂▁▁
train/f1_score,▁▂▂▃▄▅▅▆▇█
train/map,▁▃▄▄▅▅▆▇▇█
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▂▃▄▇█▇█▄▆
val/classification_loss,▅█▇▆▆▅▃▃▁▁
val/f1_score,▁▂▂▃▅▇▆▇██
val/map,▁▂▃▅▇▅▇█▆█
val/rmse,▃▄▂▅▇▁▂▅█▃

0,1
train/accuracy,0.81526
train/classification_loss,0.05896
train/f1_score,0.825
train/map,0.63701
train/rmse,0.03545
val/accuracy,0.61522
val/classification_loss,0.20414
val/f1_score,0.62028
val/map,0.47148
val/rmse,0.0346


[I 2025-08-10 14:37:08,866] Trial 5 finished with value: 0.2271784991432632 and parameters: {'learning_rate': 0.0001301420535178772, 'classification_weight': 0.8942199077113089, 'regression_weight': 1.1328944941665642, 'detection_depth_weight': 0.4105435206241591, 'depth_map_weight': 0.10198640729345856, 'p_iou_threshold': 0.37320748563139405, 'n_iou_threshold': 0.3823356721527512}. Best is trial 1 with value: 0.14495912502155614.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:11<00:00,  1.62it/s, Acc=0.469, ClsLoss=0.220, F1=0.472, RMSE=0.039, mAP=0.318, TotalLoss=0.208]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.448, ClsLoss=0.131, F1=0.447, RMSE=0.035, mAP=0.320, TotalLoss=0.104]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:10<00:00,  1.63it/s, Acc=0.523, ClsLoss=0.112, F1=0.526, RMSE=0.038, mAP=0.380, TotalLoss=0.090]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.454, ClsLoss=0.133, F1=0.458, RMSE=0.035, mAP=0.342, TotalLoss=0.105]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:10<00:00,  1.63it/s, Acc=0.528, ClsLoss=0.080, F1=0.526, RMSE=0.038, mAP=0.392, TotalLoss=0.066]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.446, ClsLoss=0.119, F1=0.451, RMSE=0.035, mAP=0.339, TotalLoss=0.104]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:09<00:00,  1.63it/s, Acc=0.528, ClsLoss=0.077, F1=0.532, RMSE=0.038, mAP=0.392, 

0,1
train/accuracy,▁▅▆▆▇▇▇███
train/classification_loss,█▃▂▂▁▁▁▁▁▃
train/f1_score,▁▅▅▆▇▇▇█▇█
train/map,▁▅▆▆▇█▇███
train/rmse,█▃▃▃▁▃▃▃▃▃
val/accuracy,▄▄▄▆▄▆▂▄█▁
val/classification_loss,██▆▁▆▆▃▃▁▄
val/f1_score,▃▄▃▅▂▆▂▃█▁
val/map,▁▄▃▅▆█▅▅▇▆
val/rmse,▁█████████

0,1
train/accuracy,0.55722
train/classification_loss,0.11562
train/f1_score,0.55626
train/map,0.41892
train/rmse,0.03766
val/accuracy,0.39067
val/classification_loss,0.1088
val/f1_score,0.41626
val/map,0.35531
val/rmse,0.03549


[I 2025-08-10 15:11:54,222] Trial 6 finished with value: 0.07242675338306986 and parameters: {'learning_rate': 0.003552281394907798, 'classification_weight': 0.6736651453623459, 'regression_weight': 0.9084780480855039, 'detection_depth_weight': 0.35184588085349644, 'depth_map_weight': 0.18501431401164817, 'p_iou_threshold': 0.6164303603103478, 'n_iou_threshold': 0.15941486858962342}. Best is trial 6 with value: 0.07242675338306986.


Epoch 1/10 Train: 100%|██████████| 310/310 [02:45<00:00,  1.87it/s, Acc=0.336, ClsLoss=0.158, F1=0.338, RMSE=0.050, mAP=0.171, TotalLoss=0.160]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.288, ClsLoss=0.075, F1=0.292, RMSE=0.035, mAP=0.186, TotalLoss=0.134]
Epoch 2/10 Train: 100%|██████████| 310/310 [02:45<00:00,  1.87it/s, Acc=0.404, ClsLoss=0.122, F1=0.400, RMSE=0.035, mAP=0.272, TotalLoss=0.144]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.293, ClsLoss=0.061, F1=0.297, RMSE=0.036, mAP=0.183, TotalLoss=0.062]
Epoch 3/10 Train: 100%|██████████| 310/310 [02:45<00:00,  1.87it/s, Acc=0.432, ClsLoss=0.196, F1=0.432, RMSE=0.035, mAP=0.299, TotalLoss=0.192]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.293, ClsLoss=0.030, F1=0.293, RMSE=0.034, mAP=0.204, TotalLoss=0.058]
Epoch 4/10 Train: 100%|██████████| 310/310 [02:45<00:00,  1.87it/s, Acc=0.450, ClsLoss=0.037, F1=0.449, RMSE=0.035, mAP=0.325, 

0,1
train/accuracy,▁▄▆▇▇▇▇███
train/classification_loss,▇▅█▂▁▂▃▁▁▂
train/f1_score,▁▄▆▇▇▇▇███
train/map,▁▄▅▆▇▇▇███
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▂▂▃█▂█▃▇█
val/classification_loss,▇▆▃▄▃█▃▃▃▁
val/f1_score,▁▂▁▃▇▃█▄▇█
val/map,▁▁▃▅█▆█▇█▇
val/rmse,▄▆▁▁▂▂▂▃▄█

0,1
train/accuracy,0.47409
train/classification_loss,0.03889
train/f1_score,0.47341
train/map,0.39129
train/rmse,0.03512
val/accuracy,0.32051
val/classification_loss,0.01425
val/f1_score,0.32265
val/map,0.24112
val/rmse,0.03733


[I 2025-08-10 15:42:34,063] Trial 7 finished with value: 0.014146041291846463 and parameters: {'learning_rate': 0.00040567870430664255, 'classification_weight': 0.6361435407895153, 'regression_weight': 1.7475245789525151, 'detection_depth_weight': 0.38060624731679427, 'depth_map_weight': 0.301875722351189, 'p_iou_threshold': 0.6988018957643491, 'n_iou_threshold': 0.38633771808693684}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [02:46<00:00,  1.86it/s, Acc=0.329, ClsLoss=0.148, F1=0.331, RMSE=0.054, mAP=0.138, TotalLoss=0.248]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.277, ClsLoss=0.076, F1=0.282, RMSE=0.034, mAP=0.169, TotalLoss=0.129]
Epoch 2/10 Train: 100%|██████████| 310/310 [02:46<00:00,  1.86it/s, Acc=0.410, ClsLoss=0.115, F1=0.410, RMSE=0.035, mAP=0.295, TotalLoss=0.200]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.307, ClsLoss=0.052, F1=0.312, RMSE=0.035, mAP=0.225, TotalLoss=0.092]
Epoch 3/10 Train: 100%|██████████| 310/310 [02:46<00:00,  1.86it/s, Acc=0.429, ClsLoss=0.028, F1=0.426, RMSE=0.035, mAP=0.332, TotalLoss=0.079]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.330, ClsLoss=0.025, F1=0.333, RMSE=0.035, mAP=0.251, TotalLoss=0.047]
Epoch 4/10 Train: 100%|██████████| 310/310 [02:46<00:00,  1.86it/s, Acc=0.455, ClsLoss=0.048, F1=0.454, RMSE=0.035, mAP=0.370, 

0,1
train/accuracy,▁▅▆▇█▇████
train/classification_loss,█▆▂▃▁▄▁▂▁▃
train/f1_score,▁▅▅▇█▇████
train/map,▁▅▆▇▇▇▇███
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▃▅▆▄▄▇▆▅█
val/classification_loss,▃▃▂█▂▂▁▁▁▁
val/f1_score,▁▄▆▆▄▄▇▅▅█
val/map,▁▄▅▆▆▇█▇▇█
val/rmse,▁▃▃▁▃▁▃█▂▂

0,1
train/accuracy,0.48323
train/classification_loss,0.03859
train/f1_score,0.48294
train/map,0.42543
train/rmse,0.03505
val/accuracy,0.36218
val/classification_loss,0.00838
val/f1_score,0.36081
val/map,0.29359
val/rmse,0.03416


[I 2025-08-10 16:13:23,297] Trial 8 finished with value: 0.02072382223032503 and parameters: {'learning_rate': 0.0001790941521542287, 'classification_weight': 1.2611033130840446, 'regression_weight': 1.820545395104868, 'detection_depth_weight': 0.20188972001851657, 'depth_map_weight': 0.33091699787786677, 'p_iou_threshold': 0.6924289205194154, 'n_iou_threshold': 0.2512017229094623}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [02:53<00:00,  1.79it/s, Acc=0.298, ClsLoss=0.266, F1=0.312, RMSE=0.124, mAP=0.089, TotalLoss=0.395]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.345, ClsLoss=0.151, F1=0.348, RMSE=0.045, mAP=0.094, TotalLoss=0.218]
Epoch 2/10 Train: 100%|██████████| 310/310 [02:53<00:00,  1.79it/s, Acc=0.428, ClsLoss=0.106, F1=0.432, RMSE=0.037, mAP=0.200, TotalLoss=0.159]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.328, ClsLoss=0.084, F1=0.335, RMSE=0.035, mAP=0.177, TotalLoss=0.128]
Epoch 3/10 Train: 100%|██████████| 310/310 [02:52<00:00,  1.79it/s, Acc=0.482, ClsLoss=0.060, F1=0.480, RMSE=0.035, mAP=0.322, TotalLoss=0.100]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.326, ClsLoss=0.090, F1=0.330, RMSE=0.035, mAP=0.222, TotalLoss=0.141]
Epoch 4/10 Train: 100%|██████████| 310/310 [02:53<00:00,  1.79it/s, Acc=0.500, ClsLoss=0.034, F1=0.499, RMSE=0.035, mAP=0.394, 

0,1
train/accuracy,▁▅▆▇▇▇████
train/classification_loss,█▄▂▂▁▁▁▁▁▁
train/f1_score,▁▅▆▇▇▇████
train/map,▁▃▅▆▇▇████
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▄▁▁▅▄▇▆█▇▅
val/classification_loss,█▃▄▂▂▁▁▃▁▂
val/f1_score,▄▂▁▅▃▇▅█▇▅
val/map,▁▄▅▆▇▇▇███
val/rmse,█▂▁▁▂▂▁▁▂▁

0,1
train/accuracy,0.55022
train/classification_loss,0.00548
train/f1_score,0.55014
train/map,0.48829
train/rmse,0.03515
val/accuracy,0.35064
val/classification_loss,0.05677
val/f1_score,0.35305
val/map,0.29503
val/rmse,0.03405


[I 2025-08-10 16:45:16,888] Trial 9 finished with value: 0.07865401620218994 and parameters: {'learning_rate': 5.5292857018056875e-05, 'classification_weight': 1.3885316498641553, 'regression_weight': 0.9150176817378766, 'detection_depth_weight': 0.3312325962942875, 'depth_map_weight': 0.2194157408802837, 'p_iou_threshold': 0.6771717173841169, 'n_iou_threshold': 0.2629609545194854}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:21<00:00,  1.54it/s, Acc=0.558, ClsLoss=0.457, F1=0.544, RMSE=0.048, mAP=0.327, TotalLoss=0.364]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.555, ClsLoss=0.252, F1=0.518, RMSE=0.034, mAP=0.312, TotalLoss=0.165]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:21<00:00,  1.54it/s, Acc=0.630, ClsLoss=0.130, F1=0.613, RMSE=0.035, mAP=0.406, TotalLoss=0.107]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.34it/s, Acc=0.561, ClsLoss=0.131, F1=0.543, RMSE=0.034, mAP=0.373, TotalLoss=0.096]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:21<00:00,  1.54it/s, Acc=0.674, ClsLoss=0.092, F1=0.655, RMSE=0.035, mAP=0.446, TotalLoss=0.078]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.552, ClsLoss=0.115, F1=0.533, RMSE=0.034, mAP=0.365, TotalLoss=0.081]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.55it/s, Acc=0.715, ClsLoss=0.075, F1=0.709, RMSE=0.035, mAP=0.489, 

0,1
train/accuracy,▁▃▄▅▅▇▇▇██
train/classification_loss,█▃▂▂▂▂▁▁▁▁
train/f1_score,▁▃▄▅▅▇▇▇██
train/map,▁▃▄▅▅▆▇▇██
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▂▁▂▅▅▆▅▇█
val/classification_loss,█▃▂▂▂▂▂▁▁▁
val/f1_score,▁▃▂▃▆▆▆▆▇█
val/map,▁▄▃▅▅▆▇▇██
val/rmse,▁▂▂▂▃▃▃█▁▁

0,1
train/accuracy,0.8158
train/classification_loss,0.02576
train/f1_score,0.81418
train/map,0.64842
train/rmse,0.03522
val/accuracy,0.62908
val/classification_loss,0.09388
val/f1_score,0.62112
val/map,0.47361
val/rmse,0.03343


[I 2025-08-10 17:21:52,016] Trial 10 finished with value: 0.06827960982655686 and parameters: {'learning_rate': 0.000983335064312507, 'classification_weight': 0.5269828108991573, 'regression_weight': 1.5338115058039041, 'detection_depth_weight': 0.49332158117902225, 'depth_map_weight': 0.3753093855715984, 'p_iou_threshold': 0.5586446211858067, 'n_iou_threshold': 0.3169885298146174}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [02:47<00:00,  1.85it/s, Acc=0.312, ClsLoss=0.198, F1=0.316, RMSE=0.098, mAP=0.137, TotalLoss=0.339]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.290, ClsLoss=0.125, F1=0.306, RMSE=0.038, mAP=0.180, TotalLoss=0.219]
Epoch 2/10 Train: 100%|██████████| 310/310 [02:47<00:00,  1.85it/s, Acc=0.424, ClsLoss=0.045, F1=0.423, RMSE=0.036, mAP=0.305, TotalLoss=0.115]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.325, ClsLoss=0.064, F1=0.327, RMSE=0.034, mAP=0.203, TotalLoss=0.149]
Epoch 3/10 Train: 100%|██████████| 310/310 [02:47<00:00,  1.85it/s, Acc=0.457, ClsLoss=0.057, F1=0.455, RMSE=0.035, mAP=0.347, TotalLoss=0.122]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.319, ClsLoss=0.045, F1=0.323, RMSE=0.034, mAP=0.229, TotalLoss=0.104]
Epoch 4/10 Train: 100%|██████████| 310/310 [02:47<00:00,  1.85it/s, Acc=0.477, ClsLoss=0.017, F1=0.476, RMSE=0.034, mAP=0.377, 

0,1
train/accuracy,▁▅▆▇██████
train/classification_loss,█▃▃▂▁▃▁▁▁▄
train/f1_score,▁▅▆▇▇█████
train/map,▁▅▆▇▇█████
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▅▄▄▇▇▆▅█▆
val/classification_loss,█▄▂▃▆▅▆▆▃▁
val/f1_score,▁▄▃▃▆▆▅▅█▆
val/map,▁▂▄▆▇▆▇▇██
val/rmse,█▂▁▁▁▁▂▁▂▂

0,1
train/accuracy,0.4929
train/classification_loss,0.09195
train/f1_score,0.4929
train/map,0.43468
train/rmse,0.03493
val/accuracy,0.34103
val/classification_loss,0.02716
val/f1_score,0.34338
val/map,0.29052
val/rmse,0.03464


[I 2025-08-10 17:52:48,594] Trial 11 finished with value: 0.04505068290041564 and parameters: {'learning_rate': 0.00018206157603277632, 'classification_weight': 1.4226166619536798, 'regression_weight': 1.9805247221072184, 'detection_depth_weight': 0.2449000477269571, 'depth_map_weight': 0.3384769976942713, 'p_iou_threshold': 0.687678267287252, 'n_iou_threshold': 0.24351178591245679}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.54it/s, Acc=0.560, ClsLoss=0.400, F1=0.546, RMSE=0.040, mAP=0.365, TotalLoss=0.732]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.478, ClsLoss=0.313, F1=0.466, RMSE=0.035, mAP=0.346, TotalLoss=0.481]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.54it/s, Acc=0.581, ClsLoss=0.238, F1=0.573, RMSE=0.038, mAP=0.414, TotalLoss=0.632]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.516, ClsLoss=2.397, F1=0.511, RMSE=0.035, mAP=0.366, TotalLoss=4.353]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.55it/s, Acc=0.605, ClsLoss=0.310, F1=0.591, RMSE=0.038, mAP=0.425, TotalLoss=0.755]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.35it/s, Acc=0.481, ClsLoss=0.492, F1=0.498, RMSE=0.035, mAP=0.389, TotalLoss=1.195]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.55it/s, Acc=0.610, ClsLoss=0.264, F1=0.603, RMSE=0.038, mAP=0.426, 

0,1
train/accuracy,▁▂▃▃▄▄▆▇▇█
train/classification_loss,█▅▆▅▃▂▂▂▂▁
train/f1_score,▁▂▃▃▄▄▆▇▇█
train/map,▁▃▃▃▄▅▆▆▇█
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▃▁▅▃▇▃█▅▇
val/classification_loss,▂█▂▁▁▁▁▁▁▁
val/f1_score,▁▃▃▄▃▇▅█▆▇
val/map,▁▂▄▂▄▄▃▅▆█
val/rmse,▁█████████

0,1
train/accuracy,0.74947
train/classification_loss,0.06375
train/f1_score,0.74995
train/map,0.56235
train/rmse,0.03766
val/accuracy,0.58717
val/classification_loss,0.11236
val/f1_score,0.57527
val/map,0.45085
val/rmse,0.03549


[I 2025-08-10 18:29:20,122] Trial 12 finished with value: 0.16452689370714965 and parameters: {'learning_rate': 0.001593772220282828, 'classification_weight': 1.2536837443429862, 'regression_weight': 1.6348825684114165, 'detection_depth_weight': 0.23654472226279918, 'depth_map_weight': 0.30267297735018006, 'p_iou_threshold': 0.5562265686146393, 'n_iou_threshold': 0.32746014250329286}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:28<00:00,  1.49it/s, Acc=0.606, ClsLoss=0.233, F1=0.547, RMSE=0.056, mAP=0.349, TotalLoss=0.453]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.33it/s, Acc=0.570, ClsLoss=0.258, F1=0.503, RMSE=0.035, mAP=0.354, TotalLoss=0.486]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:27<00:00,  1.49it/s, Acc=0.629, ClsLoss=0.202, F1=0.572, RMSE=0.035, mAP=0.399, TotalLoss=0.389]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.33it/s, Acc=0.576, ClsLoss=0.240, F1=0.514, RMSE=0.036, mAP=0.371, TotalLoss=0.458]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:27<00:00,  1.49it/s, Acc=0.641, ClsLoss=0.179, F1=0.597, RMSE=0.035, mAP=0.421, TotalLoss=0.350]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:18<00:00,  4.32it/s, Acc=0.576, ClsLoss=0.233, F1=0.519, RMSE=0.035, mAP=0.379, TotalLoss=0.438]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:27<00:00,  1.49it/s, Acc=0.659, ClsLoss=0.157, F1=0.630, RMSE=0.035, mAP=0.458, 

0,1
train/accuracy,▁▂▂▂▃▄▅▆▇█
train/classification_loss,█▇▆▅▄▄▃▂▁▁
train/f1_score,▁▂▂▃▄▅▆▇██
train/map,▁▂▂▃▄▅▆▆▇█
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▂▂▄▂▁▅▅█▅
val/classification_loss,▅▃▃▁▂▂▅▃▅█
val/f1_score,▁▂▂▅▄▅▅▇█▇
val/map,▁▂▂▃▄▂▆▆██
val/rmse,▃▇▃▂▆▆▃█▁▇

0,1
train/accuracy,0.88872
train/classification_loss,0.03909
train/f1_score,0.89769
train/map,0.72234
train/rmse,0.03512
val/accuracy,0.60024
val/classification_loss,0.29056
val/f1_score,0.58734
val/map,0.5047
val/rmse,0.03581


[I 2025-08-10 19:07:00,062] Trial 13 finished with value: 0.4113715348335413 and parameters: {'learning_rate': 0.00024649919868543086, 'classification_weight': 1.559753352047727, 'regression_weight': 1.3363832830903184, 'detection_depth_weight': 0.2481710274202371, 'depth_map_weight': 0.3859850424106454, 'p_iou_threshold': 0.3065288118848394, 'n_iou_threshold': 0.3984487022943964}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:14<00:00,  1.60it/s, Acc=0.415, ClsLoss=0.337, F1=0.434, RMSE=0.231, mAP=0.146, TotalLoss=0.750]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.442, ClsLoss=0.191, F1=0.442, RMSE=0.098, mAP=0.196, TotalLoss=0.434]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:13<00:00,  1.60it/s, Acc=0.588, ClsLoss=0.126, F1=0.576, RMSE=0.067, mAP=0.330, TotalLoss=0.300]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.37it/s, Acc=0.483, ClsLoss=0.134, F1=0.474, RMSE=0.053, mAP=0.297, TotalLoss=0.316]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:13<00:00,  1.60it/s, Acc=0.667, ClsLoss=0.063, F1=0.652, RMSE=0.044, mAP=0.461, TotalLoss=0.184]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.561, ClsLoss=0.104, F1=0.540, RMSE=0.041, mAP=0.373, TotalLoss=0.253]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:13<00:00,  1.60it/s, Acc=0.701, ClsLoss=0.040, F1=0.695, RMSE=0.038, mAP=0.541, 

0,1
train/accuracy,▁▄▆▆▇▇████
train/classification_loss,█▄▂▂▂▁▂▁▁▁
train/f1_score,▁▄▅▆▇▇████
train/map,▁▃▅▆▇▇▇███
train/rmse,█▂▁▁▁▁▁▁▁▁
val/accuracy,▁▃▇█▆▇▇███
val/classification_loss,█▄▂▁▂▂▂▂▂▂
val/f1_score,▁▃▇▇▆▇▆▇██
val/map,▁▃▅▆▆▇▇███
val/rmse,█▃▂▁▁▁▁▁▁▁

0,1
train/accuracy,0.79246
train/classification_loss,0.00313
train/f1_score,0.79447
train/map,0.70058
train/rmse,0.03506
val/accuracy,0.56886
val/classification_loss,0.10713
val/f1_score,0.55837
val/map,0.48099
val/rmse,0.03448


[I 2025-08-10 19:42:21,947] Trial 14 finished with value: 0.22246332696373336 and parameters: {'learning_rate': 7.298914065895153e-05, 'classification_weight': 1.9335701123081082, 'regression_weight': 1.7547973740994351, 'detection_depth_weight': 0.45674343406445894, 'depth_map_weight': 0.29776098838813947, 'p_iou_threshold': 0.6077011901891718, 'n_iou_threshold': 0.25592213813535725}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [02:47<00:00,  1.85it/s, Acc=0.127, ClsLoss=0.476, F1=0.142, RMSE=0.287, mAP=0.063, TotalLoss=0.379]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.41it/s, Acc=0.175, ClsLoss=0.141, F1=0.193, RMSE=0.121, mAP=0.054, TotalLoss=0.116]
Epoch 2/10 Train: 100%|██████████| 310/310 [02:46<00:00,  1.86it/s, Acc=0.303, ClsLoss=0.161, F1=0.316, RMSE=0.076, mAP=0.080, TotalLoss=0.127]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.210, ClsLoss=0.123, F1=0.223, RMSE=0.057, mAP=0.061, TotalLoss=0.106]
Epoch 3/10 Train: 100%|██████████| 310/310 [02:46<00:00,  1.86it/s, Acc=0.375, ClsLoss=0.132, F1=0.379, RMSE=0.046, mAP=0.101, TotalLoss=0.109]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.41it/s, Acc=0.227, ClsLoss=0.113, F1=0.238, RMSE=0.041, mAP=0.071, TotalLoss=0.103]
Epoch 4/10 Train: 100%|██████████| 310/310 [02:47<00:00,  1.85it/s, Acc=0.415, ClsLoss=0.099, F1=0.413, RMSE=0.038, mAP=0.131, 

0,1
train/accuracy,▁▅▆▇▇▇████
train/classification_loss,█▃▃▂▂▂▁▁▁▁
train/f1_score,▁▅▆▇▇▇████
train/map,▁▁▂▃▄▅▆▇██
train/rmse,█▂▁▁▁▁▁▁▁▁
val/accuracy,▁▃▄▅▆▆▆██▇
val/classification_loss,█▆▅▅▃▂▁▁▁▁
val/f1_score,▁▃▄▅▅▆▆██▇
val/map,▁▁▂▃▅▅▇▇▇█
val/rmse,█▃▂▁▁▁▁▁▁▁

0,1
train/accuracy,0.46301
train/classification_loss,0.01615
train/f1_score,0.46123
train/map,0.33677
train/rmse,0.03455
val/accuracy,0.26774
val/classification_loss,0.07312
val/f1_score,0.27733
val/map,0.1671
val/rmse,0.03417


[I 2025-08-10 20:13:14,438] Trial 15 finished with value: 0.06751714493368173 and parameters: {'learning_rate': 1.8850856978015146e-05, 'classification_weight': 0.6820388395823366, 'regression_weight': 1.3253698995347996, 'detection_depth_weight': 0.1795246324432812, 'depth_map_weight': 0.39200516444060574, 'p_iou_threshold': 0.6951168950053663, 'n_iou_threshold': 0.3452036822324188}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.54it/s, Acc=0.521, ClsLoss=0.398, F1=0.518, RMSE=0.048, mAP=0.307, TotalLoss=0.530]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.525, ClsLoss=0.180, F1=0.496, RMSE=0.033, mAP=0.362, TotalLoss=0.249]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:21<00:00,  1.54it/s, Acc=0.595, ClsLoss=0.176, F1=0.565, RMSE=0.035, mAP=0.425, TotalLoss=0.234]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.470, ClsLoss=0.168, F1=0.469, RMSE=0.034, mAP=0.363, TotalLoss=0.224]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.55it/s, Acc=0.609, ClsLoss=0.145, F1=0.589, RMSE=0.035, mAP=0.435, TotalLoss=0.200]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.529, ClsLoss=0.150, F1=0.489, RMSE=0.034, mAP=0.383, TotalLoss=0.204]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:20<00:00,  1.54it/s, Acc=0.617, ClsLoss=0.121, F1=0.596, RMSE=0.035, mAP=0.453, 

0,1
train/accuracy,▁▅▅▆▇▇▇█▇█
train/classification_loss,█▃▂▂▂▂▁▁▁▁
train/f1_score,▁▃▅▅▆▇▆█▇█
train/map,▁▅▆▆▆▇▇▇██
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▄▁▄▄▅▇▇█▆▆
val/classification_loss,█▇▅▄▃▄▂▁▂▁
val/f1_score,▂▁▂▃▄▄▇█▆▆
val/map,▁▁▃▁▃█▅▆▇▅
val/rmse,▁▂▂█▃▂▃▁▅▄

0,1
train/accuracy,0.66514
train/classification_loss,0.07738
train/f1_score,0.65922
train/map,0.49712
train/rmse,0.03512
val/accuracy,0.56773
val/classification_loss,0.10462
val/f1_score,0.56514
val/map,0.40685
val/rmse,0.03574


[I 2025-08-10 20:49:45,826] Trial 16 finished with value: 0.14346115569666532 and parameters: {'learning_rate': 0.00042577344743492776, 'classification_weight': 1.1419386359061607, 'regression_weight': 1.7540079571057503, 'detection_depth_weight': 0.27551922012019997, 'depth_map_weight': 0.290176225622549, 'p_iou_threshold': 0.5548511474161075, 'n_iou_threshold': 0.2834195446106313}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:06<00:00,  1.66it/s, Acc=0.254, ClsLoss=0.406, F1=0.289, RMSE=0.185, mAP=0.107, TotalLoss=0.630]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.39it/s, Acc=0.348, ClsLoss=0.223, F1=0.369, RMSE=0.068, mAP=0.116, TotalLoss=0.394]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:07<00:00,  1.66it/s, Acc=0.503, ClsLoss=0.194, F1=0.519, RMSE=0.050, mAP=0.155, TotalLoss=0.320]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.38it/s, Acc=0.426, ClsLoss=0.190, F1=0.435, RMSE=0.042, mAP=0.140, TotalLoss=0.328]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:06<00:00,  1.66it/s, Acc=0.545, ClsLoss=0.138, F1=0.548, RMSE=0.038, mAP=0.224, TotalLoss=0.238]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.37it/s, Acc=0.437, ClsLoss=0.159, F1=0.444, RMSE=0.036, mAP=0.184, TotalLoss=0.304]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:06<00:00,  1.67it/s, Acc=0.568, ClsLoss=0.095, F1=0.569, RMSE=0.035, mAP=0.304, 

0,1
train/accuracy,▁▅▆▆▇▇▇███
train/classification_loss,█▄▃▂▂▂▁▁▁▁
train/f1_score,▁▅▆▆▇▇▇███
train/map,▁▂▃▄▅▆▇▇██
train/rmse,█▂▁▁▁▁▁▁▁▁
val/accuracy,▁▅▆▆▅▇▇███
val/classification_loss,█▆▄▄▃▃▂▁▁▂
val/f1_score,▁▅▆▆▅▇▇███
val/map,▁▂▃▄▅▆▆▇▇█
val/rmse,█▃▁▁▁▁▁▁▁▁

0,1
train/accuracy,0.6732
train/classification_loss,0.01954
train/f1_score,0.67338
train/map,0.52571
train/rmse,0.03504
val/accuracy,0.47915
val/classification_loss,0.10705
val/f1_score,0.47878
val/map,0.34821
val/rmse,0.03469


[I 2025-08-10 21:23:54,632] Trial 17 finished with value: 0.18662569232625315 and parameters: {'learning_rate': 2.8940539809640463e-05, 'classification_weight': 1.3253305092294358, 'regression_weight': 1.960415125795786, 'detection_depth_weight': 0.2029198023724233, 'depth_map_weight': 0.4875318820523147, 'p_iou_threshold': 0.6335037222455735, 'n_iou_threshold': 0.10083158741008455}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [03:15<00:00,  1.59it/s, Acc=0.517, ClsLoss=0.718, F1=0.506, RMSE=0.038, mAP=0.336, TotalLoss=1.256]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.37it/s, Acc=0.514, ClsLoss=0.198, F1=0.476, RMSE=0.034, mAP=0.356, TotalLoss=0.733]
Epoch 2/10 Train: 100%|██████████| 310/310 [03:15<00:00,  1.58it/s, Acc=0.567, ClsLoss=0.211, F1=0.555, RMSE=0.035, mAP=0.381, TotalLoss=0.382]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.37it/s, Acc=0.431, ClsLoss=0.164, F1=0.439, RMSE=0.035, mAP=0.324, TotalLoss=0.587]
Epoch 3/10 Train: 100%|██████████| 310/310 [03:14<00:00,  1.59it/s, Acc=0.552, ClsLoss=0.253, F1=0.546, RMSE=0.035, mAP=0.391, TotalLoss=0.596]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.36it/s, Acc=0.438, ClsLoss=0.388, F1=0.446, RMSE=0.035, mAP=0.340, TotalLoss=0.581]
Epoch 4/10 Train: 100%|██████████| 310/310 [03:15<00:00,  1.59it/s, Acc=0.577, ClsLoss=0.099, F1=0.571, RMSE=0.035, mAP=0.405, 

0,1
train/accuracy,▁▄▃▄▅▅▇▅▅█
train/classification_loss,▃▁▂▁▂▂▁█▁▂
train/f1_score,▁▃▃▄▅▅▇▅▆█
train/map,▁▃▄▅▅▆▇▆▇█
train/rmse,█▁▁▁▁▂▁▁▁▂
val/accuracy,▅▁▁▆▅▆▅▅█▇
val/classification_loss,▃▂█▁▁▃▁▁▁▁
val/f1_score,▃▁▁▆▄▆▅▆██
val/map,▄▁▂▄▆▇▅▅██
val/rmse,▂▃▄▂▁▁▃█▃▁

0,1
train/accuracy,0.64516
train/classification_loss,0.45916
train/f1_score,0.64361
train/map,0.47114
train/rmse,0.03548
val/accuracy,0.5496
val/classification_loss,0.10985
val/f1_score,0.54372
val/map,0.40517
val/rmse,0.03361


[I 2025-08-10 21:59:31,391] Trial 18 finished with value: 0.15365224707933614 and parameters: {'learning_rate': 0.0019388437746094128, 'classification_weight': 1.059172047072042, 'regression_weight': 1.4634653141697418, 'detection_depth_weight': 0.40148055997259535, 'depth_map_weight': 0.3405956106700145, 'p_iou_threshold': 0.5873726591464528, 'n_iou_threshold': 0.22502636843501506}. Best is trial 7 with value: 0.014146041291846463.


Epoch 1/10 Train: 100%|██████████| 310/310 [02:56<00:00,  1.75it/s, Acc=0.341, ClsLoss=0.231, F1=0.352, RMSE=0.123, mAP=0.126, TotalLoss=0.214]
Epoch 1/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.328, ClsLoss=0.178, F1=0.336, RMSE=0.047, mAP=0.165, TotalLoss=0.178]
Epoch 2/10 Train: 100%|██████████| 310/310 [02:55<00:00,  1.77it/s, Acc=0.480, ClsLoss=0.077, F1=0.475, RMSE=0.039, mAP=0.308, TotalLoss=0.098]
Epoch 2/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.41it/s, Acc=0.386, ClsLoss=0.140, F1=0.387, RMSE=0.036, mAP=0.248, TotalLoss=0.133]
Epoch 3/10 Train: 100%|██████████| 310/310 [02:56<00:00,  1.76it/s, Acc=0.553, ClsLoss=0.043, F1=0.553, RMSE=0.035, mAP=0.401, TotalLoss=0.063]
Epoch 3/10 Validation: 100%|██████████| 78/78 [00:17<00:00,  4.40it/s, Acc=0.387, ClsLoss=0.121, F1=0.387, RMSE=0.034, mAP=0.289, TotalLoss=0.119]
Epoch 4/10 Train: 100%|██████████| 310/310 [02:56<00:00,  1.76it/s, Acc=0.563, ClsLoss=0.018, F1=0.563, RMSE=0.035, mAP=0.435, 

0,1
train/accuracy,▁▅▇▇▇█████
train/classification_loss,█▃▂▁▂▂▁▂▁▁
train/f1_score,▁▄▇▇▇█████
train/map,▁▄▆▆▇▇████
train/rmse,█▁▁▁▁▁▁▁▁▁
val/accuracy,▁▅▅▇▆▆█▇▇▇
val/classification_loss,█▅▄▄▂▃▃▁▁▁
val/f1_score,▁▅▅▇▆▆█▇██
val/map,▁▄▆▆▇▇▇███
val/rmse,█▂▁▁▁▁▁▁▁▁

0,1
train/accuracy,0.60484
train/classification_loss,0.00209
train/f1_score,0.60521
train/map,0.51877
train/rmse,0.035
val/accuracy,0.42094
val/classification_loss,0.07421
val/f1_score,0.42498
val/map,0.35244
val/rmse,0.03409


[I 2025-08-10 22:31:57,691] Trial 19 finished with value: 0.07431056797026823 and parameters: {'learning_rate': 0.00010251016313975215, 'classification_weight': 0.7329382463081738, 'regression_weight': 1.739701486354742, 'detection_depth_weight': 0.11432038019962487, 'depth_map_weight': 0.2722463047077742, 'p_iou_threshold': 0.6643865834002174, 'n_iou_threshold': 0.16730229069057762}. Best is trial 7 with value: 0.014146041291846463.


En iyi parametreler: {'learning_rate': 0.00040567870430664255, 'classification_weight': 0.6361435407895153, 'regression_weight': 1.7475245789525151, 'detection_depth_weight': 0.38060624731679427, 'depth_map_weight': 0.301875722351189, 'p_iou_threshold': 0.6988018957643491, 'n_iou_threshold': 0.38633771808693684}


In [None]:
# ön eğitim ile belirlediğimiz en iyi hiper parametreler
#'learning_rate': 5.5292857018056875e-05,
#'classification_weight': 1.3885316498641553,
#'regression_weight': 0.9150176817378766,
#'detection_depth_weight': 0.3312325962942875,
#'depth_map_weight': 0.2194157408802837,
#'p_iou_threshold': 0.6771717173841169,
#'n_iou_threshold': 0.2629609545194854


In [6]:
task_weights = {
          'classification': 1.3885316498641553,
          'regression': 0.9150176817378766,
          'depth': 0.3312325962942875,
          'depth_map': 0.2194157408802837,}
model = CompleteMultiTaskModel(num_classes=8, num_anchors=9).to(device)
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=20,
    early_stop_patience=3,
    learning_rate=1e-4,
    device=device,
    save_path='/content/drive/MyDrive/model_best_weights.pth',
    class_weights=class_weights,
    task_weights=task_weights,
    scheduler_patience=2,
    scheduler_factor=0.2,
    p_iou_threshold=0.67,
    n_iou_threshold=0.26
    )

[34m[1mwandb[0m: Currently logged in as: [33mmehmeteminuludag[0m ([33mmehmeteminuludag-kirikkale-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/10 Train:   2%|▏         | 20/1169 [00:11<11:02,  1.73it/s, Acc=0.000, ClsLoss=0.723, F1=0.000, RMSE=0.448, mAP=0.019, TotalLoss=1.117]


KeyboardInterrupt: 