In [1]:
import numpy as np
import pandas as pd
import polars as pl
import os
import gc
import json
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import zarr
import napari

gc.enable()

pd.options.display.max_columns = None
#pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)

#pl.Config.set_tbl_rows(-1)
pl.Config.set_tbl_cols(-1)
pl.Config.set_fmt_str_lengths(10000)

polars.config.Config

In [2]:
path = 'I:/Kaggle/czii-cryo-et-object-identification/'

In [3]:
train_data_experiment_folders_path = path + 'train/static/ExperimentRuns/'
train_data_experiment_folders_path

'I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/'

In [4]:
test_data_experiment_folders_path = path + 'test/static/ExperimentRuns/'
test_data_experiment_folders_path

'I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/'

In [5]:
train_data_experiments = os.listdir(train_data_experiment_folders_path)
train_data_experiments

['TS_5_4', 'TS_69_2', 'TS_6_4', 'TS_6_6', 'TS_73_6', 'TS_86_3', 'TS_99_9']

In [6]:
test_data_experiments = os.listdir(test_data_experiment_folders_path)
test_data_experiments

['TS_5_4', 'TS_69_2', 'TS_6_4']

In [7]:
data_dict = {}
for experiment in tqdm(train_data_experiments):
    image_types_dict = {}    
    image_types_dict['denoised'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/denoised.zarr', mode='r')
    image_types_dict['iso'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/isonetcorrected.zarr', mode='r')
    image_types_dict['dcon'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/ctfdeconvolved.zarr', mode='r')
    image_types_dict['wbp'] = zarr.open(train_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/wbp.zarr', mode='r')
    data_dict[experiment] = image_types_dict

100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 171.48it/s]


In [8]:
data_dict

{'TS_5_4': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/denoised.zarr>,
  'iso': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/isonetcorrected.zarr>,
  'dcon': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/ctfdeconvolved.zarr>,
  'wbp': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/wbp.zarr>},
 'TS_69_2': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/denoised.zarr>,
  'iso': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/isonetcorrected.zarr>,
  'dcon': <Group file://I:/Kaggle/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.00

In [9]:
test_data_dict = {}
for experiment in tqdm(test_data_experiments):
    image_types_dict = {}    
    image_types_dict['denoised'] = zarr.open(test_data_experiment_folders_path + f'{experiment}/VoxelSpacing10.000/denoised.zarr', mode='r')
    test_data_dict[experiment] = image_types_dict

100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 495.47it/s]


In [10]:
test_data_dict

{'TS_5_4': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/denoised.zarr>},
 'TS_69_2': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/denoised.zarr>},
 'TS_6_4': {'denoised': <Group file://I:/Kaggle/czii-cryo-et-object-identification/test/static/ExperimentRuns/TS_6_4/VoxelSpacing10.000/denoised.zarr>}}

In [11]:
train_label_experiment_folders_path = path + 'train/overlay/ExperimentRuns/'
train_label_experiment_folders_path

'I:/Kaggle/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/'

In [12]:
train_label_experiments = os.listdir(train_label_experiment_folders_path)
train_label_experiments

['TS_5_4', 'TS_69_2', 'TS_6_4', 'TS_6_6', 'TS_73_6', 'TS_86_3', 'TS_99_9']

In [13]:
labels_dict = {}
for experiment in tqdm(train_label_experiments):
    particle_types_dict = {}
    
    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/apo-ferritin.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['apo-ferritin'] = loaded_json

    '''
    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/beta-amylase.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['beta-amylase'] = loaded_json
    '''

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/beta-galactosidase.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['beta-galactosidase'] = loaded_json

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/ribosome.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['ribosome'] = loaded_json

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/thyroglobulin.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['thyroglobulin'] = loaded_json

    with open(f'{train_label_experiment_folders_path}{experiment}/Picks/virus-like-particle.json') as f:
        loaded_json = json.loads(f.read())
    particle_types_dict['virus-like-particle'] = loaded_json

    labels_dict[experiment] = particle_types_dict

100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 522.37it/s]


In [14]:
labels_dict

{'TS_5_4': {'apo-ferritin': {'pickable_object_name': 'apo-ferritin',
   'user_id': 'curation',
   'session_id': '0',
   'run_name': 'TS_5_4',
   'voxel_spacing': None,
   'unit': 'angstrom',
   'points': [{'location': {'x': 468.514, 'y': 5915.906, 'z': 604.167},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      [0.0, 0.0, 1.0, 0.0],
      [0.0, 0.0, 0.0, 1.0]],
     'instance_id': 0},
    {'location': {'x': 5674.694, 'y': 1114.354, 'z': 565.068},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      [0.0, 0.0, 1.0, 0.0],
      [0.0, 0.0, 0.0, 1.0]],
     'instance_id': 0},
    {'location': {'x': 5744.509, 'y': 1049.172, 'z': 653.712},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      [0.0, 0.0, 1.0, 0.0],
      [0.0, 0.0, 0.0, 1.0]],
     'instance_id': 0},
    {'location': {'x': 5880.769, 'y': 1125.348, 'z': 579.56},
     'transformation_': [[1.0, 0.0, 0.0, 0.0],
      [0.0, 1.0, 0.0, 0.0],
      

In [22]:
particle_radius = {
    'apo-ferritin': 60,
    'beta-amylase': 65,
    'beta-galactosidase': 90,
    'ribosome': 150,
    'thyroglobulin': 130,
    'virus-like-particle': 135,
}

In [23]:
class_ids = {
    'apo-ferritin': 0,
    'beta-galactosidase': 1,
    'ribosome': 2,
    'thyroglobulin': 3,
    'virus-like-particle': 4,
}

In [28]:
experiment_list = []
particle_type_list = []
x_list = []
y_list = []
z_list = []
w_list = []
h_list = []
d_list = []
class_id_list = []
for experiment in tqdm(train_data_experiments):
    #print(experiment)
    #print(len(labels_dict[experiment]['apo-ferritin']['points']))
    #print(type(labels_dict[experiment]['apo-ferritin']['points']))
    #print(labels_dict[experiment]['apo-ferritin']['points'][0])

    for key in labels_dict[experiment].keys():
        #print(labels_dict[experiment][key])
        #print(labels_dict[experiment][key]['pickable_object_name'])
        for i in range(len(labels_dict[experiment][key]['points'])):
            experiment_list.append(labels_dict[experiment][key]['run_name'])
            particle_type_list.append(labels_dict[experiment][key]['pickable_object_name'])
            x_list.append(labels_dict[experiment][key]['points'][i]['location']['x'])
            y_list.append(labels_dict[experiment][key]['points'][i]['location']['y'])
            z_list.append(labels_dict[experiment][key]['points'][i]['location']['z'])
            w_list.append(particle_radius[key])
            h_list.append(particle_radius[key])
            d_list.append(particle_radius[key])
            class_id_list.append(class_ids[key])

100%|██████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 3500.25it/s]


In [29]:
labels_df = pd.DataFrame({'experiment':experiment_list, 'particle_type':particle_type_list, 'x':x_list, 'y':y_list, 'z':z_list, 'w':w_list, 'h':h_list, 'd':d_list, 'class_id':class_id_list})
print(labels_df.shape)
labels_df

(1182, 9)


Unnamed: 0,experiment,particle_type,x,y,z,w,h,d,class_id
0,TS_5_4,apo-ferritin,468.514,5915.906,604.167,60,60,60,0
1,TS_5_4,apo-ferritin,5674.694,1114.354,565.068,60,60,60,0
2,TS_5_4,apo-ferritin,5744.509,1049.172,653.712,60,60,60,0
3,TS_5_4,apo-ferritin,5880.769,1125.348,579.560,60,60,60,0
4,TS_5_4,apo-ferritin,4661.667,1269.497,810.409,60,60,60,0
...,...,...,...,...,...,...,...,...,...
1177,TS_99_9,virus-like-particle,2010.056,4752.618,1057.078,135,135,135,4
1178,TS_99_9,virus-like-particle,2244.068,4310.063,959.548,135,135,135,4
1179,TS_99_9,virus-like-particle,804.270,5817.135,579.493,135,135,135,4
1180,TS_99_9,virus-like-particle,4198.228,5534.578,858.169,135,135,135,4


In [30]:
labels_df['experiment'].unique()

array(['TS_5_4', 'TS_69_2', 'TS_6_4', 'TS_6_6', 'TS_73_6', 'TS_86_3',
       'TS_99_9'], dtype=object)

In [31]:
first_df = labels_df[labels_df['experiment'] == 'TS_5_4']
first_df

Unnamed: 0,experiment,particle_type,x,y,z,w,h,d,class_id
0,TS_5_4,apo-ferritin,468.514,5915.906,604.167,60,60,60,0
1,TS_5_4,apo-ferritin,5674.694,1114.354,565.068,60,60,60,0
2,TS_5_4,apo-ferritin,5744.509,1049.172,653.712,60,60,60,0
3,TS_5_4,apo-ferritin,5880.769,1125.348,579.560,60,60,60,0
4,TS_5_4,apo-ferritin,4661.667,1269.497,810.409,60,60,60,0
...,...,...,...,...,...,...,...,...,...
125,TS_5_4,virus-like-particle,2636.539,4214.980,965.410,135,135,135,4
126,TS_5_4,virus-like-particle,3137.396,3572.460,372.914,135,135,135,4
127,TS_5_4,virus-like-particle,3294.133,3027.464,674.070,135,135,135,4
128,TS_5_4,virus-like-particle,2997.686,4948.218,1169.375,135,135,135,4


In [20]:
image = data_dict['TS_5_4']['denoised']['0']
image.shape

(184, 630, 630)

In [33]:
import torch
import torch.nn as nn

In [36]:
from torch.utils.data import Dataset, DataLoader

In [41]:
import torch.optim as optim

In [34]:
class YOLO3D_NoConfidence(nn.Module):
    def __init__(self, num_classes=5, num_anchors=3):
        super(YOLO3D_NoConfidence, self).__init__()
        self.conv1 = nn.Conv3d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv3d(64, 128, kernel_size=3, stride=1, padding=1)
        self.num_classes = num_classes
        self.num_anchors = num_anchors

        # Fully connected layer without confidence prediction
        self.fc = None

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = torch.flatten(x, start_dim=1)

        # Dynamically set FC layer the first time forward() runs
        if self.fc is None:
            num_features = x.shape[1]  # Get dynamic input size
            self.fc = nn.Linear(num_features, self.num_anchors * (6 + self.num_classes))  # Removed confidence
            self.fc = self.fc.to(x.device)

        x = self.fc(x)
        return x

In [35]:
class YOLO3DLoss_NoConfidence(nn.Module):
    def __init__(self):
        super(YOLO3DLoss_NoConfidence, self).__init__()
        self.mse_loss = nn.MSELoss()  # Bounding box regression loss
        self.ce_loss = nn.CrossEntropyLoss()  # Classification loss

    def forward(self, predictions, targets):
        """
        predictions: (batch_size, num_anchors, num_classes + 6)  # Includes x, y, z, w, h, d
        targets: (batch_size, num_anchors, num_classes + 6)
        """
        pred_boxes = predictions[..., :6]  # (x, y, z, w, h, d)
        target_boxes = targets[..., :6]

        box_loss = self.mse_loss(pred_boxes, target_boxes)

        pred_class = predictions[..., 6:]  # Class probabilities
        target_class = targets[..., 6:]

        class_loss = self.ce_loss(pred_class, target_class)

        return box_loss + class_loss

In [50]:
class YOLO3DDataset_NoConfidence(Dataset):
    def __init__(self, image, labels_df):#, transform=None):
        self.data = image  # Load 3D voxel data
        self.labels = labels_df[['x', 'y', 'z', 'w', 'h', 'd', 'class_id']].to_numpy()  # Load bounding box + class labels
        #self.transform = transform  # Optional augmentations

        # Ensure data is wrapped in a list if there's only one sample
        if len(self.data.shape) == 3:  # (Depth, Height, Width) without batch dimension
            self.data = np.expand_dims(self.data, axis=0)  # Add batch dimension

        if len(self.labels.shape) == 1:  # If labels are single row
            self.labels = np.expand_dims(self.labels, axis=0)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor(self.data[idx], dtype=torch.float32).unsqueeze(0)  # (1, D, H, W)
        y = torch.tensor(self.labels[idx], dtype=torch.float32)  # (x, y, z, w, h, d, class)
        return x, y

In [51]:
dataset = YOLO3DDataset_NoConfidence(image, first_df)

In [57]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [58]:
def train_yolo3d_no_confidence(model, dataloader, epochs=10, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = YOLO3DLoss_NoConfidence()

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_idx, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            predictions = model(x)
            loss = loss_fn(predictions, y)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}], Loss: {loss.item():.4f}")

        print(f"Epoch [{epoch+1}/{epochs}] Total Loss: {total_loss:.4f}")

    print("Training complete!")

In [59]:
model = YOLO3D_NoConfidence(num_classes=5, num_anchors=3)  # Modify as needed
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

OutOfMemoryError: CUDA out of memory. Tried to allocate 235.06 GiB. GPU 0 has a total capacity of 15.99 GiB of which 0 bytes is free. Of the allocated memory 35.38 GiB is allocated by PyTorch, and 17.16 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [56]:
print(torch.version.cuda)

12.4


In [65]:
class YOLO3DDataset_Cropped(Dataset):
    def __init__(self, image, labels_df, crop_size=(64, 128, 128), stride=(32, 64, 64), transform=None):
        self.image = image  # Load the full 3D image
        self.labels = labels_df[['x', 'y', 'z', 'w', 'h', 'd', 'class_id']].to_numpy()  # Load full bounding box annotations
        self.crop_size = crop_size
        self.stride = stride
        self.transform = transform  # Optional augmentations

        # Generate patches from the large 3D image
        self.patches, self.patch_bboxes = self.create_crops_with_labels()

    def create_crops_with_labels(self):
        """ Extract overlapping 3D patches and assign bounding boxes to them. """
        d, h, w = self.image.shape
        crops, bboxes = [], []

        for z in range(0, d - self.crop_size[0] + 1, self.stride[0]):
            for y in range(0, h - self.crop_size[1] + 1, self.stride[1]):
                for x in range(0, w - self.crop_size[2] + 1, self.stride[2]):
                    # Extract cropped patch
                    cropped_patch = self.image[z:z+self.crop_size[0], y:y+self.crop_size[1], x:x+self.crop_size[2]]

                    # Find bounding boxes within this patch
                    cropped_bboxes = []
                    for bbox in self.labels:
                        x_center, y_center, z_center, w, h, d, cls = bbox

                        # Check if bbox is inside the cropped region
                        if (x <= x_center < x + self.crop_size[2] and
                            y <= y_center < y + self.crop_size[1] and
                            z <= z_center < z + self.crop_size[0]):

                            # Convert bbox coordinates relative to cropped patch
                            x_new = x_center - x
                            y_new = y_center - y
                            z_new = z_center - z

                            cropped_bboxes.append([x_new, y_new, z_new, w, h, d, cls])

                    if len(cropped_bboxes) > 0:  # Keep only patches that contain objects
                        crops.append(cropped_patch)
                        bboxes.append(cropped_bboxes)

        return torch.tensor(crops, dtype=torch.float32), torch.tensor(bboxes, dtype=torch.float32)

    def __len__(self):
        return len(self.patches)

    def __getitem__(self, idx):
        x = self.patches[idx].unsqueeze(0)  # Add channel dimension (1, D, H, W)
        y = self.patch_bboxes[idx]  # Bounding boxes for this patch
        return x, y

In [66]:
dataset = YOLO3DDataset_Cropped(image, first_df)

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [86]:
class YOLO3DDataset_Cropped(Dataset):
    def __init__(self, image, labels_df, crop_size=(64, 128, 128), stride=(32, 64, 64), transform=None):
        self.image = image  # Load the full 3D image
        self.labels = labels_df[['x', 'y', 'z', 'w', 'h', 'd', 'class_id']].to_numpy()  # Load full bounding box annotations
        self.crop_size = crop_size
        self.stride = stride
        self.transform = transform  # Optional augmentations

        # Generate patches from the large 3D image
        self.patches, self.patch_bboxes = self.create_crops_with_labels()

    def create_crops_with_labels(self):
        """ Extract overlapping 3D patches and assign bounding boxes to them. """
        d, h, w = map(int, self.image.shape)  # ✅ Ensure dimensions are integers
        crops, bboxes = [], []
    
        # ✅ Convert crop_size & stride to integers
        crop_size = tuple(map(int, self.crop_size))
        stride = tuple(map(int, self.stride))
    
        print(f"Crop Size: {crop_size}, Type: {type(crop_size)}")
        print(f"Stride: {stride}, Type: {type(stride)}")
        print(f"Image Shape: {self.image.shape}, Type: {type(self.image.shape)}")
    
        # ✅ Print values before using range()
        z_start = 0
        z_end = int(d - crop_size[0] + 1)  # ✅ Explicit conversion to int
        y_start = 0
        y_end = int(h - crop_size[1] + 1)  # ✅ Explicit conversion to int
        x_start = 0
        x_end = int(w - crop_size[2] + 1)  # ✅ Explicit conversion to int
    
        print(f"z_start: {z_start}, z_end: {z_end}, stride[0]: {stride[0]}")
        print(f"y_start: {y_start}, y_end: {y_end}, stride[1]: {stride[1]}")
        print(f"x_start: {x_start}, x_end: {x_end}, stride[2]: {stride[2]}")
        print(f"z_end type: {type(z_end)}, stride[0] type: {type(stride[0])}")
    
        # ✅ Explicit int conversion inside range()
        for z in range(int(z_start), int(z_end), int(stride[0])):
            for y in range(int(y_start), int(y_end), int(stride[1])):
                for x in range(int(x_start), int(x_end), int(stride[2])):
                    # Extract cropped patch
                    cropped_patch = self.image[z:z+crop_size[0], y:y+crop_size[1], x:x+crop_size[2]]
    
                    # Find bounding boxes within this patch
                    cropped_bboxes = []
                    for bbox in self.labels:
                        x_center, y_center, z_center, w, h, d, cls = bbox
    
                        if (x <= x_center < x + crop_size[2] and
                            y <= y_center < y + crop_size[1] and
                            z <= z_center < z + crop_size[0]):
    
                            x_new = x_center - x
                            y_new = y_center - y
                            z_new = z_center - z
    
                            cropped_bboxes.append([x_new, y_new, z_new, w, h, d, cls])
    
                    if len(cropped_bboxes) > 0:  # Keep only patches that contain objects
                        crops.append(cropped_patch)
                        bboxes.append(cropped_bboxes)
    
        print(f"Total patches created: {len(crops)}")  # 🔥 Debugging total patches
    
        return torch.tensor(crops, dtype=torch.float32), torch.tensor(bboxes, dtype=torch.float32)

    def __len__(self):
        return len(self.patches)

    def __getitem__(self, idx):
        x = self.patches[idx].unsqueeze(0)  # Add channel dimension (1, D, H, W)
        y = self.patch_bboxes[idx]  # Bounding boxes for this patch
        return x, y

In [87]:
dataset = YOLO3DDataset_Cropped(image, first_df)

Crop Size: (64, 128, 128), Type: <class 'tuple'>
Stride: (32, 64, 64), Type: <class 'tuple'>
Image Shape: (184, 630, 630), Type: <class 'tuple'>
z_start: 0, z_end: 121, stride[0]: 32
y_start: 0, y_end: 503, stride[1]: 64
x_start: 0, x_end: 503, stride[2]: 64
z_end type: <class 'int'>, stride[0] type: <class 'int'>
Total patches created: 0


In [81]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [82]:
print(f"Dataset length: {len(dataset)}")

Dataset length: 0


In [100]:
experiment_list = []
particle_type_list = []
x_list = []
y_list = []
z_list = []
w_list = []
h_list = []
d_list = []
class_id_list = []
for experiment in tqdm(train_data_experiments):
    #print(experiment)
    #print(len(labels_dict[experiment]['apo-ferritin']['points']))
    #print(type(labels_dict[experiment]['apo-ferritin']['points']))
    #print(labels_dict[experiment]['apo-ferritin']['points'][0])

    for key in labels_dict[experiment].keys():
        #print(labels_dict[experiment][key])
        #print(labels_dict[experiment][key]['pickable_object_name'])
        for i in range(len(labels_dict[experiment][key]['points'])):
            experiment_list.append(labels_dict[experiment][key]['run_name'])
            particle_type_list.append(labels_dict[experiment][key]['pickable_object_name'])
            x_list.append(labels_dict[experiment][key]['points'][i]['location']['x']/10)
            y_list.append(labels_dict[experiment][key]['points'][i]['location']['y']/10)
            z_list.append(labels_dict[experiment][key]['points'][i]['location']['z']/10)
            w_list.append(particle_radius[key]/10)
            h_list.append(particle_radius[key]/10)
            d_list.append(particle_radius[key]/10)
            class_id_list.append(class_ids[key])

100%|██████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 3313.78it/s]


In [101]:
labels_df = pd.DataFrame({'experiment':experiment_list, 'particle_type':particle_type_list, 'x':x_list, 'y':y_list, 'z':z_list, 'w':w_list, 'h':h_list, 'd':d_list, 'class_id':class_id_list})
print(labels_df.shape)
labels_df

(1182, 9)


Unnamed: 0,experiment,particle_type,x,y,z,w,h,d,class_id
0,TS_5_4,apo-ferritin,46.8514,591.5906,60.4167,6.0,6.0,6.0,0
1,TS_5_4,apo-ferritin,567.4694,111.4354,56.5068,6.0,6.0,6.0,0
2,TS_5_4,apo-ferritin,574.4509,104.9172,65.3712,6.0,6.0,6.0,0
3,TS_5_4,apo-ferritin,588.0769,112.5348,57.9560,6.0,6.0,6.0,0
4,TS_5_4,apo-ferritin,466.1667,126.9497,81.0409,6.0,6.0,6.0,0
...,...,...,...,...,...,...,...,...,...
1177,TS_99_9,virus-like-particle,201.0056,475.2618,105.7078,13.5,13.5,13.5,4
1178,TS_99_9,virus-like-particle,224.4068,431.0063,95.9548,13.5,13.5,13.5,4
1179,TS_99_9,virus-like-particle,80.4270,581.7135,57.9493,13.5,13.5,13.5,4
1180,TS_99_9,virus-like-particle,419.8228,553.4578,85.8169,13.5,13.5,13.5,4


In [102]:
first_df = labels_df[labels_df['experiment'] == 'TS_5_4']
first_df

Unnamed: 0,experiment,particle_type,x,y,z,w,h,d,class_id
0,TS_5_4,apo-ferritin,46.8514,591.5906,60.4167,6.0,6.0,6.0,0
1,TS_5_4,apo-ferritin,567.4694,111.4354,56.5068,6.0,6.0,6.0,0
2,TS_5_4,apo-ferritin,574.4509,104.9172,65.3712,6.0,6.0,6.0,0
3,TS_5_4,apo-ferritin,588.0769,112.5348,57.9560,6.0,6.0,6.0,0
4,TS_5_4,apo-ferritin,466.1667,126.9497,81.0409,6.0,6.0,6.0,0
...,...,...,...,...,...,...,...,...,...
125,TS_5_4,virus-like-particle,263.6539,421.4980,96.5410,13.5,13.5,13.5,4
126,TS_5_4,virus-like-particle,313.7396,357.2460,37.2914,13.5,13.5,13.5,4
127,TS_5_4,virus-like-particle,329.4133,302.7464,67.4070,13.5,13.5,13.5,4
128,TS_5_4,virus-like-particle,299.7686,494.8218,116.9375,13.5,13.5,13.5,4


In [103]:
dataset = YOLO3DDataset_Cropped(image, first_df)

Crop Size: (64, 128, 128), Type: <class 'tuple'>
Stride: (32, 64, 64), Type: <class 'tuple'>
Image Shape: (184, 630, 630), Type: <class 'tuple'>
z_start: 0, z_end: 121, stride[0]: 32
y_start: 0, y_end: 503, stride[1]: 64
x_start: 0, x_end: 503, stride[2]: 64
z_end type: <class 'int'>, stride[0] type: <class 'int'>
Total patches created: 223


ValueError: expected sequence of length 2 at dim 1 (got 1)

In [104]:
class YOLO3DDataset_Cropped(Dataset):
    def __init__(self, image, labels_df, crop_size=(64, 128, 128), stride=(32, 64, 64), transform=None):
        self.image = image  # Load the full 3D image
        self.labels = labels_df[['x', 'y', 'z', 'w', 'h', 'd', 'class_id']].to_numpy()  # Load full bounding box annotations

        print(self.labels.shape[1])
        if self.labels.shape[1] == 7:  # Check if bounding boxes are (x, y, z) instead of (z, y, x)
            print("⚠️ Fixing bounding box format from (x, y, z) to (z, y, x)")
            self.labels[:, [0, 1, 2]] = self.labels[:, [2, 1, 0]]  # Swap x and z
        
        self.crop_size = crop_size
        self.stride = stride
        self.transform = transform  # Optional augmentations

        # Generate patches from the large 3D image
        self.patches, self.patch_bboxes = self.create_crops_with_labels()

    def create_crops_with_labels(self):
        """ Extract overlapping 3D patches and assign bounding boxes to them. """
        d, h, w = map(int, self.image.shape)  # ✅ Ensure dimensions are integers
        crops, bboxes = [], []
    
        # ✅ Convert crop_size & stride to integers
        crop_size = tuple(map(int, self.crop_size))
        stride = tuple(map(int, self.stride))
    
        print(f"Crop Size: {crop_size}, Type: {type(crop_size)}")
        print(f"Stride: {stride}, Type: {type(stride)}")
        print(f"Image Shape: {self.image.shape}, Type: {type(self.image.shape)}")
    
        # ✅ Print values before using range()
        z_start = 0
        z_end = int(d - crop_size[0] + 1)  # ✅ Explicit conversion to int
        y_start = 0
        y_end = int(h - crop_size[1] + 1)  # ✅ Explicit conversion to int
        x_start = 0
        x_end = int(w - crop_size[2] + 1)  # ✅ Explicit conversion to int
    
        print(f"z_start: {z_start}, z_end: {z_end}, stride[0]: {stride[0]}")
        print(f"y_start: {y_start}, y_end: {y_end}, stride[1]: {stride[1]}")
        print(f"x_start: {x_start}, x_end: {x_end}, stride[2]: {stride[2]}")
        print(f"z_end type: {type(z_end)}, stride[0] type: {type(stride[0])}")
    
        # ✅ Explicit int conversion inside range()
        for z in range(0, d - crop_size[0] + 1, stride[0]):  # Move along depth (Z)
            for y in range(0, h - crop_size[1] + 1, stride[1]):  # Move along height (Y)
                for x in range(0, w - crop_size[2] + 1, stride[2]):  # Move along width (X)
                    # Extract cropped patch
                    cropped_patch = self.image[z:z+crop_size[0], y:y+crop_size[1], x:x+crop_size[2]]
    
                    # Find bounding boxes within this patch
                    cropped_bboxes = []
                    for bbox in self.labels:
                        x_center, y_center, z_center, w, h, d, cls = bbox
    
                        if (x <= x_center < x + crop_size[2] and
                            y <= y_center < y + crop_size[1] and
                            z <= z_center < z + crop_size[0]):
    
                            x_new = x_center - x
                            y_new = y_center - y
                            z_new = z_center - z
    
                            cropped_bboxes.append([x_new, y_new, z_new, w, h, d, cls])
    
                    if len(cropped_bboxes) > 0:  # Keep only patches that contain objects
                        crops.append(cropped_patch)
                        bboxes.append(cropped_bboxes)
    
        print(f"Total patches created: {len(crops)}")  # 🔥 Debugging total patches
    
        return torch.tensor(crops, dtype=torch.float32), torch.tensor(bboxes, dtype=torch.float32)

    def __len__(self):
        return len(self.patches)

    def __getitem__(self, idx):
        x = self.patches[idx].unsqueeze(0)  # Add channel dimension (1, D, H, W)
        y = self.patch_bboxes[idx]  # Bounding boxes for this patch
        return x, y

In [105]:
dataset = YOLO3DDataset_Cropped(image, first_df)

7
⚠️ Fixing bounding box format from (x, y, z) to (z, y, x)
Crop Size: (64, 128, 128), Type: <class 'tuple'>
Stride: (32, 64, 64), Type: <class 'tuple'>
Image Shape: (184, 630, 630), Type: <class 'tuple'>
z_start: 0, z_end: 121, stride[0]: 32
y_start: 0, y_end: 503, stride[1]: 64
x_start: 0, x_end: 503, stride[2]: 64
z_end type: <class 'int'>, stride[0] type: <class 'int'>
Total patches created: 60


ValueError: expected sequence of length 1 at dim 1 (got 2)

In [107]:
class YOLO3DDataset_Cropped(Dataset):
    def __init__(self, image, labels_df, crop_size=(64, 128, 128), stride=(32, 64, 64), transform=None):
        self.image = image  # Load full 3D image
        self.labels = labels_df[['x', 'y', 'z', 'w', 'h', 'd', 'class_id']].to_numpy()  # Load bounding boxes (x, y, z, w, h, d, class)
        self.crop_size = crop_size
        self.stride = stride
        self.transform = transform  # Optional augmentations

        # ✅ Convert labels to (z, y, x) format if needed
        self.labels = np.array(self.labels)
        if self.labels.shape[1] == 7:  # Ensure correct bounding box format
            print("⚠️ Fixing bounding box format from (x, y, z) to (z, y, x)")
            self.labels[:, [0, 1, 2]] = self.labels[:, [2, 1, 0]]  # Swap x and z

        # ✅ Generate patches and corresponding bounding boxes
        self.patches, self.patch_bboxes = self.create_crops_with_labels()

    def create_crops_with_labels(self):
        """ Extract overlapping 3D patches and assign bounding boxes to them. """
        d, h, w = map(int, self.image.shape)  # Ensure dimensions are integers
        crops, bboxes = [], []

        crop_size = tuple(map(int, self.crop_size))
        stride = tuple(map(int, self.stride))

        print(f"✅ Image Shape: {self.image.shape}")  # Should be (Z, Y, X)
        print(f"✅ Crop Size: {crop_size}, Stride: {stride}")

        for z in range(0, d - crop_size[0] + 1, stride[0]):  # Iterate over depth (Z)
            for y in range(0, h - crop_size[1] + 1, stride[1]):  # Iterate over height (Y)
                for x in range(0, w - crop_size[2] + 1, stride[2]):  # Iterate over width (X)
                    
                    # ✅ Extract cropped patch
                    cropped_patch = self.image[z:z+crop_size[0], y:y+crop_size[1], x:x+crop_size[2]]

                    # ✅ Find bounding boxes within this patch
                    cropped_bboxes = []
                    for bbox in self.labels:
                        z_center, y_center, x_center, d_box, h_box, w_box, cls = bbox  # Correct (Z, Y, X) format

                        # ✅ Check if bounding box center is inside the cropped patch
                        if (z <= z_center < z + crop_size[0] and
                            y <= y_center < y + crop_size[1] and
                            x <= x_center < x + crop_size[2]):

                            # Convert bounding box coordinates relative to cropped patch
                            z_new = z_center - z
                            y_new = y_center - y
                            x_new = x_center - x

                            cropped_bboxes.append([z_new, y_new, x_new, d_box, h_box, w_box, cls])

                    # ✅ Append patches and bounding boxes
                    crops.append(cropped_patch)
                    bboxes.append(cropped_bboxes if cropped_bboxes else [])  # Ensure consistent shape

        print(f"✅ Total patches created: {len(crops)}")

        # ✅ Fix bounding box tensor format
        bboxes_fixed = []
        for bbox in bboxes:
            if len(bbox) == 0:
                bboxes_fixed.append(torch.empty((0, 7)))  # Create an empty tensor
            else:
                bboxes_fixed.append(torch.tensor(bbox, dtype=torch.float32))

        # ✅ Ensure all bounding box tensors have a consistent shape
        bboxes_tensor = torch.nn.utils.rnn.pad_sequence(bboxes_fixed, batch_first=True, padding_value=-1)

        print(f"✅ Final Bounding Box Tensor Shape: {bboxes_tensor.shape}")
        return torch.tensor(crops, dtype=torch.float32), bboxes_tensor

    def __len__(self):
        return len(self.patches)

    def __getitem__(self, idx):
        x = self.patches[idx].unsqueeze(0)  # Add channel dimension (1, D, H, W)
        y = self.patch_bboxes[idx]  # Bounding boxes for this patch
        return x, y

In [108]:
dataset = YOLO3DDataset_Cropped(image, first_df)

⚠️ Fixing bounding box format from (x, y, z) to (z, y, x)
✅ Image Shape: (184, 630, 630)
✅ Crop Size: (64, 128, 128), Stride: (32, 64, 64)
✅ Total patches created: 256
✅ Final Bounding Box Tensor Shape: torch.Size([256, 15, 7])


In [109]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [110]:
model = YOLO3D_NoConfidence(num_classes=5, num_anchors=3)  # Modify as needed
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: 0D or 1D target tensor expected, multi-target not supported

In [117]:
class YOLO3D_NoConfidence(nn.Module):
    def __init__(self, num_classes=5, num_anchors=3):
        super(YOLO3D_NoConfidence, self).__init__()
        self.conv1 = nn.Conv3d(1, 16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool3d(2)
        self.conv2 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool3d(2)
        self.conv3 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool3d(2)
        self.num_classes = num_classes
        self.num_anchors = num_anchors

        # ✅ Ensure FC layer outputs multiple bounding boxes
        self.fc = nn.Linear(64, self.num_anchors * (6 + self.num_classes))  

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool1(x)
        x = torch.relu(self.conv2(x))
        x = self.pool2(x)
        x = torch.relu(self.conv3(x))
        x = self.pool3(x)
        
        x = torch.flatten(x, start_dim=1)  # Flatten before fully connected layer
    
        # 🔥 Debugging print
        print(f"Flattened Feature Shape: {x.shape}")
    
        x = self.fc(x)  # FC Layer
    
        return x.view(-1, self.num_anchors, 6 + self.num_classes)

In [118]:
class YOLO3DLoss_NoConfidence(nn.Module):
    def __init__(self):
        super(YOLO3DLoss_NoConfidence, self).__init__()
        self.mse_loss = nn.MSELoss()
        self.ce_loss = nn.CrossEntropyLoss()

    def forward(self, predictions, targets):
        """
        predictions: (batch_size, num_anchors, 6 + num_classes)
        targets: (batch_size, max_bboxes, 6 + num_classes)
        """

        # ✅ Ensure the target shape matches predictions
        num_anchors = predictions.shape[1]
        targets = targets[:, :num_anchors, :]  # Truncate if more bboxes than anchors

        # ✅ Compute MSE loss for bounding box regression
        box_loss = self.mse_loss(predictions[..., :6], targets[..., :6])  

        # ✅ Compute classification loss (ignore -1 padding)
        pred_class = predictions[..., 6:]  # Class probabilities
        target_class = targets[..., 6:].argmax(dim=-1)  # Convert one-hot to class index

        class_loss = self.ce_loss(pred_class.view(-1, pred_class.shape[-1]), target_class.view(-1))

        return box_loss + class_loss

In [119]:
def train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = YOLO3DLoss_NoConfidence()

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_idx, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            predictions = model(x)

            # 🔥 Print shapes before computing loss
            print(f"Batch {batch_idx}: Predictions Shape: {predictions.shape}, Targets Shape: {y.shape}")

            loss = loss_fn(predictions, y)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}], Loss: {loss.item():.4f}")

        print(f"Epoch [{epoch+1}/{epochs}] Total Loss: {total_loss:.4f}")

    print("Training complete!")

In [120]:
model = YOLO3D_NoConfidence(num_classes=5, num_anchors=3)  # Modify as needed
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

Flattened Feature Shape: torch.Size([1, 131072])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x131072 and 64x33)

In [127]:
class YOLO3D_NoConfidence(nn.Module):
    def __init__(self, num_classes=5, num_anchors=3):
        super(YOLO3D_NoConfidence, self).__init__()
        self.conv1 = nn.Conv3d(1, 16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool3d(2)
        self.conv2 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool3d(2)
        self.conv3 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool3d(2)
        self.num_classes = num_classes
        self.num_anchors = num_anchors

        # ✅ Dummy input to determine correct FC input size
        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, 64, 128, 128)  # Adjust if needed
            dummy_output = self._get_conv_output(dummy_input)
            feature_size = dummy_output.view(1, -1).shape[1]  # Dynamically infer size
        
        # ✅ Update FC layer to match correct input size
        self.fc = nn.Linear(feature_size, self.num_anchors * (6 + self.num_classes))

    def _get_conv_output(self, x):
        """Pass dummy tensor through conv layers to compute feature size."""
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        return x  # Return unflattened feature map

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool1(x)
        x = torch.relu(self.conv2(x))
        x = self.pool2(x)
        x = torch.relu(self.conv3(x))
        x = self.pool3(x)

        x = torch.flatten(x, start_dim=1)

        # 🔥 Debugging print
        #print(f"Flattened Feature Shape: {x.shape}")

        x = self.fc(x)

        return x.view(-1, self.num_anchors, 6 + self.num_classes)

In [128]:
class YOLO3DLoss_NoConfidence(nn.Module):
    def __init__(self):
        super(YOLO3DLoss_NoConfidence, self).__init__()
        self.mse_loss = nn.MSELoss()
        self.ce_loss = nn.CrossEntropyLoss()

    def forward(self, predictions, targets):
        """
        predictions: (batch_size, num_anchors, 6 + num_classes)
        targets: (batch_size, max_bboxes, 6 + num_classes)
        """

        # ✅ Ensure the target shape matches predictions
        num_anchors = predictions.shape[1]
        targets = targets[:, :num_anchors, :]  # Truncate if more bboxes than anchors

        # ✅ Compute MSE loss for bounding box regression
        box_loss = self.mse_loss(predictions[..., :6], targets[..., :6])  

        # ✅ Compute classification loss (ignore -1 padding)
        pred_class = predictions[..., 6:]  # Class probabilities
        target_class = targets[..., 6:].argmax(dim=-1)  # Convert one-hot to class index

        class_loss = self.ce_loss(pred_class.view(-1, pred_class.shape[-1]), target_class.view(-1))

        return box_loss + class_loss

In [129]:
def train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = YOLO3DLoss_NoConfidence()

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_idx, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            predictions = model(x)

            # 🔥 Print shapes before computing loss
            #print(f"Batch {batch_idx}: Predictions Shape: {predictions.shape}, Targets Shape: {y.shape}")

            loss = loss_fn(predictions, y)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}], Loss: {loss.item():.4f}")

        print(f"Epoch [{epoch+1}/{epochs}] Total Loss: {total_loss:.4f}")

    print("Training complete!")

In [130]:
model = YOLO3D_NoConfidence(num_classes=5, num_anchors=3)  # Modify as needed
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

Epoch [1/5], Batch [0], Loss: 2009.1328
Epoch [1/5], Batch [10], Loss: 1762.6278
Epoch [1/5], Batch [20], Loss: 1476.9825
Epoch [1/5], Batch [30], Loss: 815.1925
Epoch [1/5], Batch [40], Loss: 663.8298
Epoch [1/5], Batch [50], Loss: 1010.9552
Epoch [1/5], Batch [60], Loss: 1096.6326
Epoch [1/5], Batch [70], Loss: 725.8221
Epoch [1/5], Batch [80], Loss: 660.4634
Epoch [1/5], Batch [90], Loss: 794.8763
Epoch [1/5], Batch [100], Loss: 446.4623
Epoch [1/5], Batch [110], Loss: 884.3242
Epoch [1/5], Batch [120], Loss: 712.9034
Epoch [1/5], Batch [130], Loss: 820.6149
Epoch [1/5], Batch [140], Loss: 599.5311
Epoch [1/5], Batch [150], Loss: 584.7922
Epoch [1/5], Batch [160], Loss: 540.9503
Epoch [1/5], Batch [170], Loss: 334.4901
Epoch [1/5], Batch [180], Loss: 831.5427
Epoch [1/5], Batch [190], Loss: 459.5589
Epoch [1/5], Batch [200], Loss: 516.6456
Epoch [1/5], Batch [210], Loss: 503.7547
Epoch [1/5], Batch [220], Loss: 981.1341
Epoch [1/5], Batch [230], Loss: 933.5803
Epoch [1/5], Batch [24

In [131]:
class YOLO3D_NoConfidence(nn.Module):
    def __init__(self, num_classes=5, num_anchors=3):
        super(YOLO3D_NoConfidence, self).__init__()
        self.conv1 = nn.Conv3d(1, 16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool3d(2)
        self.conv2 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool3d(2)
        self.conv3 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool3d(2)
        self.num_classes = num_classes
        self.num_anchors = num_anchors

        # ✅ Auto-detect FC layer input size
        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, 64, 128, 128)  # Adjust input size if needed
            dummy_output = self._get_conv_output(dummy_input)
            feature_size = dummy_output.view(1, -1).shape[1]  

        # ✅ Fully Connected Layer
        self.fc = nn.Linear(feature_size, self.num_anchors * (6 + self.num_classes))

    def _get_conv_output(self, x):
        """Pass dummy tensor through conv layers to compute feature size."""
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        return x  # Return unflattened feature map

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool1(x)
        x = torch.relu(self.conv2(x))
        x = self.pool2(x)
        x = torch.relu(self.conv3(x))
        x = self.pool3(x)

        x = torch.flatten(x, start_dim=1)

        # ✅ Debugging print
        print(f"Flattened Feature Shape: {x.shape}")

        x = self.fc(x)

        return x.view(-1, self.num_anchors, 6 + self.num_classes)

# ✅ Enable Model Compilation for Faster Execution
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YOLO3D_NoConfidence(num_classes=5, num_anchors=3).to(device)
model = torch.compile(model)  # Speeds up execution on CUDA

In [132]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YOLO3D_NoConfidence(num_classes=5, num_anchors=3).to(device)
model = torch.compile(model)  # Speeds up execution on CUDA

In [133]:
# ✅ Optimized DataLoader
dataloader = DataLoader(
    dataset, 
    batch_size=4,  # Try 4 or 8 based on VRAM availability
    shuffle=True,
    num_workers=8,  # Utilize CPU parallelism
    pin_memory=True,  # Speeds up data transfer to GPU
    persistent_workers=True,  # Avoids worker recreation overhead
    prefetch_factor=2  # Improves throughput
)

In [135]:
#scaler = torch.cuda.amp.GradScaler()  # ✅ Enable mixed precision training

  scaler = torch.cuda.amp.GradScaler()  # ✅ Enable mixed precision training


In [136]:
scaler = torch.amp.GradScaler(device="cuda")  # ✅ Updated PyTorch syntax

In [137]:
loss_fn = YOLO3DLoss_NoConfidence()  # Define the loss function

def train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr)  # ✅ Use AdamW for stability
    torch.backends.cudnn.benchmark = True  # ✅ Enables faster convolutions

    # ✅ Enable CUDA Graphs (reduces CPU overhead)
    use_cuda_graphs = True
    stream = torch.cuda.Stream() if use_cuda_graphs else None

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_idx, (x, y) in enumerate(dataloader):
            x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)

            optimizer.zero_grad()

            # ✅ Enable mixed precision (Float16)
            with torch.cuda.amp.autocast():
                predictions = model(x)
                loss = loss_fn(predictions, y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}], Loss: {loss.item():.4f}")

        print(f"Epoch [{epoch+1}/{epochs}] Total Loss: {total_loss:.4f}")

    print("✅ Training Complete!")

In [138]:
# ✅ Run training
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

RuntimeError: DataLoader worker (pid(s) 17868, 7984, 23988, 27984, 8068, 2276, 14904, 11584) exited unexpectedly

In [139]:
dataloader = DataLoader(
    dataset, 
    batch_size=4,  
    shuffle=True,
    num_workers=0,  # ✅ Reduce workers for Windows compatibility
    pin_memory=True,  
    persistent_workers=False,  
    prefetch_factor=None  # ✅ Disable prefetching if using num_workers=0
)

In [140]:
# ✅ Run training
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

  with torch.cuda.amp.autocast():


BackendCompilerFailed: backend='inductor' raised:
RuntimeError: Cannot find a working triton installation. Either the package is not installed or it is too old. More information on installing Triton can be found at https://github.com/openai/triton

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information


You can suppress this exception and fall back to eager by setting:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True


In [141]:
class YOLO3DDataset_Cropped(Dataset):
    def __init__(self, image, labels_df, crop_size=(64, 128, 128), stride=(32, 64, 64), transform=None):
        self.image = image  # Load full 3D image
        self.labels = labels_df[['x', 'y', 'z', 'w', 'h', 'd', 'class_id']].to_numpy()  # Load bounding boxes (x, y, z, w, h, d, class)
        self.crop_size = crop_size
        self.stride = stride
        self.transform = transform  # Optional augmentations

        # ✅ Convert labels to (z, y, x) format if needed
        self.labels = np.array(self.labels)
        if self.labels.shape[1] == 7:  # Ensure correct bounding box format
            print("⚠️ Fixing bounding box format from (x, y, z) to (z, y, x)")
            self.labels[:, [0, 1, 2]] = self.labels[:, [2, 1, 0]]  # Swap x and z

        # ✅ Generate patches and corresponding bounding boxes
        self.patches, self.patch_bboxes = self.create_crops_with_labels()

    def create_crops_with_labels(self):
        """ Extract overlapping 3D patches and assign bounding boxes to them. """
        d, h, w = map(int, self.image.shape)  # Ensure dimensions are integers
        crops, bboxes = [], []

        crop_size = tuple(map(int, self.crop_size))
        stride = tuple(map(int, self.stride))

        print(f"✅ Image Shape: {self.image.shape}")  # Should be (Z, Y, X)
        print(f"✅ Crop Size: {crop_size}, Stride: {stride}")

        for z in range(0, d - crop_size[0] + 1, stride[0]):  # Iterate over depth (Z)
            for y in range(0, h - crop_size[1] + 1, stride[1]):  # Iterate over height (Y)
                for x in range(0, w - crop_size[2] + 1, stride[2]):  # Iterate over width (X)
                    
                    # ✅ Extract cropped patch
                    cropped_patch = self.image[z:z+crop_size[0], y:y+crop_size[1], x:x+crop_size[2]]

                    # ✅ Find bounding boxes within this patch
                    cropped_bboxes = []
                    for bbox in self.labels:
                        z_center, y_center, x_center, d_box, h_box, w_box, cls = bbox  # Correct (Z, Y, X) format

                        # ✅ Check if bounding box center is inside the cropped patch
                        if (z <= z_center < z + crop_size[0] and
                            y <= y_center < y + crop_size[1] and
                            x <= x_center < x + crop_size[2]):

                            # Convert bounding box coordinates relative to cropped patch
                            z_new = z_center - z
                            y_new = y_center - y
                            x_new = x_center - x

                            cropped_bboxes.append([z_new, y_new, x_new, d_box, h_box, w_box, cls])

                    # ✅ Append patches and bounding boxes
                    crops.append(cropped_patch)
                    bboxes.append(cropped_bboxes if cropped_bboxes else [])  # Ensure consistent shape

        print(f"✅ Total patches created: {len(crops)}")

        # ✅ Fix bounding box tensor format
        bboxes_fixed = []
        for bbox in bboxes:
            if len(bbox) == 0:
                bboxes_fixed.append(torch.empty((0, 7)))  # Create an empty tensor
            else:
                bboxes_fixed.append(torch.tensor(bbox, dtype=torch.float32))

        # ✅ Ensure all bounding box tensors have a consistent shape
        bboxes_tensor = torch.nn.utils.rnn.pad_sequence(bboxes_fixed, batch_first=True, padding_value=-1)

        print(f"✅ Final Bounding Box Tensor Shape: {bboxes_tensor.shape}")
        return torch.tensor(crops, dtype=torch.float32), bboxes_tensor

    def __len__(self):
        return len(self.patches)

    def __getitem__(self, idx):
        try:
            x = self.patches[idx].unsqueeze(0)  # Add channel dimension (1, D, H, W)
            y = self.patch_bboxes[idx]  
    
            print(f"Loading patch {idx}: Shape {x.shape}, Labels: {y.shape}")
            return x, y
        
        except Exception as e:
            print(f"❌ Error loading patch {idx}: {e}")
            return None  # Handle broken dataset entries safely

In [142]:
dataset = YOLO3DDataset_Cropped(image, first_df)

⚠️ Fixing bounding box format from (x, y, z) to (z, y, x)
✅ Image Shape: (184, 630, 630)
✅ Crop Size: (64, 128, 128), Stride: (32, 64, 64)
✅ Total patches created: 256
✅ Final Bounding Box Tensor Shape: torch.Size([256, 15, 7])


In [143]:
dataloader = DataLoader(
    dataset, 
    batch_size=4,  
    shuffle=True,
    num_workers=0,  # ✅ Reduce workers for Windows compatibility
    pin_memory=True,  
    persistent_workers=False,  
    prefetch_factor=None  # ✅ Disable prefetching if using num_workers=0
)

In [144]:
# ✅ Run training
train_yolo3d_no_confidence(model, dataloader, epochs=5, lr=0.0001)

Loading patch 22: Shape torch.Size([1, 64, 128, 128]), Labels: torch.Size([15, 7])
Loading patch 95: Shape torch.Size([1, 64, 128, 128]), Labels: torch.Size([15, 7])
Loading patch 43: Shape torch.Size([1, 64, 128, 128]), Labels: torch.Size([15, 7])
Loading patch 92: Shape torch.Size([1, 64, 128, 128]), Labels: torch.Size([15, 7])


  with torch.cuda.amp.autocast():


BackendCompilerFailed: backend='inductor' raised:
RuntimeError: Cannot find a working triton installation. Either the package is not installed or it is too old. More information on installing Triton can be found at https://github.com/openai/triton

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information


You can suppress this exception and fall back to eager by setting:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True


# References

1. https://www.kaggle.com/code/davidlist/experiment-ts-6-4-visualization
2. https://www.kaggle.com/code/nk35jk/3d-visualization-of-particles