In [8]:
from vision.datasets.slr_dataset import SLRDataset
from config import default_config as cfg
import numpy as np

# Khởi tạo dataset
dataset = SLRDataset(root=cfg.datasets[0], transform=None, target_transform=None)

# Danh sách để lưu các mẫu có vấn đề
problematic_samples = []

# Duyệt qua toàn bộ dataset
for i in range(len(dataset)):
    try:
        image, boxes, labels = dataset[i]
        print(f"Sample {i} (ID: {dataset.ids[i]}): Checking data...")
        
        # Kiểm tra image
        if image is None:
            problematic_samples.append((i, dataset.ids[i], "Image is None"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Image is None")
            continue
        
        # Kiểm tra boxes
        if boxes is None:
            problematic_samples.append((i, dataset.ids[i], "Boxes is None"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Boxes is None")
        elif not isinstance(boxes, np.ndarray):
            problematic_samples.append((i, dataset.ids[i], f"Boxes is not a NumPy array: {type(boxes)}"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Boxes is not a NumPy array: {type(boxes)}")
        elif len(boxes.shape) != 2 or boxes.shape[1] != 4:
            problematic_samples.append((i, dataset.ids[i], f"Invalid boxes shape: {boxes.shape}"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Invalid boxes shape: {boxes.shape}")
        else:
            print(f"Sample {i} (ID: {dataset.ids[i]}): Boxes shape={boxes.shape}")

        # Kiểm tra labels
        if labels is None:
            problematic_samples.append((i, dataset.ids[i], "Labels is None"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Labels is None")
        elif not isinstance(labels, np.ndarray):
            problematic_samples.append((i, dataset.ids[i], f"Labels is not a NumPy array: {type(labels)}"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Labels is not a NumPy array: {type(labels)}")
        elif len(labels.shape) != 1:
            problematic_samples.append((i, dataset.ids[i], f"Invalid labels shape: {labels.shape}"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Invalid labels shape: {labels.shape}")
        else:
            print(f"Sample {i} (ID: {dataset.ids[i]}): Labels shape={labels.shape}")

        # Kiểm tra sự khớp nhau giữa boxes và labels
        if boxes.shape[0] != labels.shape[0]:
            problematic_samples.append((i, dataset.ids[i], f"Mismatch between boxes ({boxes.shape[0]}) and labels ({labels.shape[0]})"))
            print(f"Sample {i} (ID: {dataset.ids[i]}): Mismatch between boxes ({boxes.shape[0]}) and labels ({labels.shape[0]})")

    except ValueError as e:
        problematic_samples.append((i, dataset.ids[i], f"ValueError: {str(e)}"))
        print(f"Sample {i} (ID: {dataset.ids[i]}): ValueError: {str(e)}")
    except Exception as e:
        problematic_samples.append((i, dataset.ids[i], f"Other error: {str(e)}"))
        print(f"Sample {i} (ID: {dataset.ids[i]}): Other error: {str(e)}")

# In tóm tắt các mẫu có vấn đề
if problematic_samples:
    print("\nProblematic samples found:")
    for idx, image_id, issue in problematic_samples:
        print(f"Sample {idx} (ID: {image_id}): {issue}")
    print(f"Total problematic samples: {len(problematic_samples)}")
else:
    print("No problematic samples found in the dataset.")

Sample 0 (ID: C0002_test_jpg.rf.a3a79aa448c3298fe676599bec81dd85): Checking data...
Sample 0 (ID: C0002_test_jpg.rf.a3a79aa448c3298fe676599bec81dd85): Boxes shape=(1, 4)
Sample 0 (ID: C0002_test_jpg.rf.a3a79aa448c3298fe676599bec81dd85): Labels shape=(1,)
Sample 1 (ID: WIN_20240221_23_46_54_Pro_jpg.rf.73d7f669db5e45a1bc3a6d690e7eb759): Checking data...
Sample 1 (ID: WIN_20240221_23_46_54_Pro_jpg.rf.73d7f669db5e45a1bc3a6d690e7eb759): Boxes shape=(1, 4)
Sample 1 (ID: WIN_20240221_23_46_54_Pro_jpg.rf.73d7f669db5e45a1bc3a6d690e7eb759): Labels shape=(1,)
Sample 2 (ID: A25_jpg.rf.286bb83ef1585d5c20e41772a34cde8b): Checking data...
Sample 2 (ID: A25_jpg.rf.286bb83ef1585d5c20e41772a34cde8b): Boxes shape=(1, 4)
Sample 2 (ID: A25_jpg.rf.286bb83ef1585d5c20e41772a34cde8b): Labels shape=(1,)
Sample 3 (ID: D22_jpg.rf.e5fe8dd515ff6d7eef75d9bc0e15fdc6): Checking data...
Sample 3 (ID: D22_jpg.rf.e5fe8dd515ff6d7eef75d9bc0e15fdc6): Boxes shape=(1, 4)
Sample 3 (ID: D22_jpg.rf.e5fe8dd515ff6d7eef75d9bc0e15fd

In [11]:
import argparse
import os
import logging
import sys
import itertools

import torch
from torch.utils.data import DataLoader, ConcatDataset
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR

from vision.utils.misc import str2bool, Timer, freeze_net_layers, store_labels
from vision.ssd.ssd import MatchPrior
# from vision.ssd.vgg_ssd import create_vgg_ssd
# from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd
# from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite
from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite
# from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite
from vision.datasets.slr_dataset import SLRDataset
from vision.datasets.open_images import OpenImagesDataset
from vision.nn.multibox_loss import MultiboxLoss
# from vision.ssd.config import vgg_ssd_config
from vision.ssd.config import mobilenetv1_ssd_config
# from vision.ssd.config import squeezenet_ssd_config
from vision.ssd.data_preprocessing import TrainAugmentation, TestTransform

from vision.ssd.mobilenet_v3_ssd_lite import create_mobilenetv3_ssd_lite
from config import default_config as cfg  # Import the default config instance



logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    logging.info("Use Cuda.")


def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
    net.train(True)
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    for i, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        confidence, locations = net(images)
        regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)  # TODO CHANGE BOXES
        loss = regression_loss + classification_loss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
        if i and i % debug_steps == 0:
            avg_loss = running_loss / debug_steps
            avg_reg_loss = running_regression_loss / debug_steps
            avg_clf_loss = running_classification_loss / debug_steps
            logging.info(
                f"Epoch: {epoch}, Step: {i}, " +
                f"Average Loss: {avg_loss:.4f}, " +
                f"Average Regression Loss {avg_reg_loss:.4f}, " +
                f"Average Classification Loss: {avg_clf_loss:.4f}"
            )
            running_loss = 0.0
            running_regression_loss = 0.0
            running_classification_loss = 0.0


def test(loader, net, criterion, device):
    net.eval()
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    num = 0
    for _, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)
        num += 1

        with torch.no_grad():
            confidence, locations = net(images)
            regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
            loss = regression_loss + classification_loss

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
    return running_loss / num, running_regression_loss / num, running_classification_loss / num


if __name__ == '__main__':
    timer = Timer()

    logging.info(cfg)
    if cfg.net == 'mb2-ssd-lite':
        create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=cfg.mb2_width_mult)
        config = mobilenetv1_ssd_config
    elif cfg.net == 'mb3-ssd-lite':
        create_net = lambda num: create_mobilenetv3_ssd_lite(num)
        config = mobilenetv1_ssd_config
    else:
        logging.fatal("The net type is wrong.")
        parser.print_help(sys.stderr)
        sys.exit(1)
    train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
    target_transform = MatchPrior(config.priors, config.center_variance,
                                  config.size_variance, 0.5)

    test_transform = TestTransform(config.image_size, config.image_mean, config.image_std)

    logging.info("Prepare training datasets.")
    datasets = []
    
    # Add this before trying to store labels
    os.makedirs(cfg.checkpoint_folder, exist_ok=True)
    
    for dataset_path in cfg.datasets:
        if cfg.dataset_type == 'slr':
            dataset = SLRDataset(dataset_path, transform=train_transform,
                                 target_transform=target_transform)
            label_file = os.path.join(cfg.checkpoint_folder, "slr-model-labels.txt")
            store_labels(label_file, dataset.class_names)
            num_classes = len(dataset.class_names)
        elif cfg.dataset_type == 'open_images':
            dataset = OpenImagesDataset(dataset_path,
                 transform=train_transform, target_transform=target_transform,
                 dataset_type="train", balance_data=cfg.balance_data)
            label_file = os.path.join(cfg.checkpoint_folder, "open-images-model-labels.txt")
            store_labels(label_file, dataset.class_names)
            logging.info(dataset)
            num_classes = len(dataset.class_names)

        else:
            raise ValueError(f"Dataset tpye {cfg.dataset_type} is not supported.")
        datasets.append(dataset)
    logging.info(f"Stored labels into file {label_file}.")
    train_dataset = ConcatDataset(datasets)
    logging.info("Train dataset size: {}".format(len(train_dataset)))
    train_loader = DataLoader(train_dataset, cfg.batch_size,
                              num_workers=cfg.num_workers,
                              shuffle=True,
                              drop_last=True)
    logging.info("Prepare Validation datasets.")
    if cfg.dataset_type == "slr":
        val_dataset = SLRDataset(cfg.validation_dataset, transform=test_transform,
                                 target_transform=target_transform, is_test=True)
    elif cfg.dataset_type == 'open_images':
        val_dataset = OpenImagesDataset(dataset_path,
                                        transform=test_transform, target_transform=target_transform,
                                        dataset_type="test")
        logging.info(val_dataset)
    logging.info("validation dataset size: {}".format(len(val_dataset)))

    val_loader = DataLoader(val_dataset, cfg.batch_size,
                            num_workers=cfg.num_workers,
                            shuffle=False,
                            drop_last=True)
    logging.info("Build network.")
    net = create_net(num_classes)
    #print(net)

  


    min_loss = -10000.0
    last_epoch = -1

    base_net_lr = cfg.base_net_lr if cfg.base_net_lr is not None else cfg.lr
    extra_layers_lr = cfg.extra_layers_lr if cfg.extra_layers_lr is not None else cfg.lr
    if cfg.freeze_base_net:
        logging.info("Freeze base net.")
        freeze_net_layers(net.base_net)
        params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(),
                                 net.regression_headers.parameters(), net.classification_headers.parameters())
        params = [
            {'params': itertools.chain(
                net.source_layer_add_ons.parameters(),
                net.extras.parameters()
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(
                net.regression_headers.parameters(),
                net.classification_headers.parameters()
            )}
        ]
    elif cfg.freeze_net:
        freeze_net_layers(net.base_net)
        freeze_net_layers(net.source_layer_add_ons)
        freeze_net_layers(net.extras)
        params = itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters())
        logging.info("Freeze all the layers except prediction heads.")
    else:
        params = [
            {'params': net.base_net.parameters(), 'lr': base_net_lr},
            {'params': itertools.chain(
                net.source_layer_add_ons.parameters(),
                net.extras.parameters()
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(
                net.regression_headers.parameters(),
                net.classification_headers.parameters()
            )}
        ]

    timer.start("Load Model")
    if cfg.resume:
        logging.info(f"Resume from the model {cfg.resume}")
        net.load(cfg.resume)
    elif cfg.base_net:
        logging.info(f"Init from base net {cfg.base_net}")
        net.init_from_base_net(cfg.base_net)
    elif cfg.pretrained_ssd:
        logging.info(f"Init from pretrained ssd {cfg.pretrained_ssd}")
        net.init_from_pretrained_ssd(cfg.pretrained_ssd)
    logging.info(f'Took {timer.end("Load Model"):.2f} seconds to load the model.')

    net.to(DEVICE)

    criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3,
                             center_variance=0.1, size_variance=0.2, device=DEVICE)
    optimizer = torch.optim.SGD(params, lr=cfg.lr, momentum=cfg.momentum,
                                weight_decay=cfg.weight_decay)
    logging.info(f"Learning rate: {cfg.lr}, Base net learning rate: {base_net_lr}, "
                 + f"Extra Layers learning rate: {extra_layers_lr}.")

    if cfg.scheduler == 'multi-step':
        logging.info("Uses MultiStepLR scheduler.")
        milestones = [int(v.strip()) for v in cfg.milestones.split(",")]
        scheduler = MultiStepLR(optimizer, milestones=milestones,
                                                     gamma=0.1, last_epoch=last_epoch)
    elif cfg.scheduler == 'cosine':
        logging.info("Uses CosineAnnealingLR scheduler.")
        scheduler = CosineAnnealingLR(optimizer, cfg.t_max, last_epoch=last_epoch)
    else:
        logging.fatal(f"Unsupported Scheduler: {cfg.scheduler}.")
        parser.print_help(sys.stderr)
        sys.exit(1)

    logging.info(f"Start training from epoch {last_epoch + 1}.")

    #sys.exit(0)#test


    for epoch in range(last_epoch + 1, cfg.num_epochs):
        scheduler.step()
        train(train_loader, net, criterion, optimizer,
              device=DEVICE, debug_steps=cfg.debug_steps, epoch=epoch)
        
        if epoch % cfg.validation_epochs == 0 or epoch == cfg.num_epochs - 1:
            val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE)
            logging.info(
                f"Epoch: {epoch}, " +
                f"Validation Loss: {val_loss:.4f}, " +
                f"Validation Regression Loss {val_regression_loss:.4f}, " +
                f"Validation Classification Loss: {val_classification_loss:.4f}"
            )
            model_path = os.path.join(cfg.checkpoint_folder, f"{cfg.net}-Epoch-{epoch}-Loss-{val_loss}.pth")
            net.save(model_path)
            logging.info(f"Saved model {model_path}")


RandomSampleCrop initialized with: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options:

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/dat/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/dat/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dat/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/home/dat/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/utils/data/dataset.py", line 350, in __getitem__
    return self.datasets[dataset_idx][sample_idx]
           ~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^
  File "/home/dat/2025/thesis/edgetpu_tflite_inference/train/vision/datasets/slr_dataset.py", line 64, in __getitem__
  File "/home/dat/2025/thesis/edgetpu_tflite_inference/train/vision/ssd/data_preprocessing.py", line 34, in __call__
    return self.augment(img, boxes, labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dat/2025/thesis/edgetpu_tflite_inference/train/vision/transforms/transforms.py", line 55, in __call__
    for t in self.transforms:
                             ^
  File "/home/dat/2025/thesis/edgetpu_tflite_inference/train/vision/transforms/transforms.py", line 251, in __call__
    print(f"Error in random.choice with sample_options: {self.sample_options}")
    ^^^^^^^
  File "/home/dat/2025/thesis/edgetpu_tflite_inference/train/vision/transforms/transforms.py", line 248, in __call__
    try:
         
  File "numpy/random/mtrand.pyx", line 956, in numpy.random.mtrand.RandomState.choice
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (6,) + inhomogeneous part.


 [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options:

 [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Before random.choice, sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
Error in random.choice with sample_options: [None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None)]
