<a href="https://colab.research.google.com/github/lorenzopaoria/Smoking-detection-and-distance-analysis/blob/main/model_train_cigarette.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Train a model for sigarette detection

In [33]:
import torch
import torchvision
import psutil
import os
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision import transforms
from PIL import Image
import numpy as np
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

In [34]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:
class CigaretteDataset(Dataset):
    def __init__(self, coco_annotation_file, image_dir, transform=None):
        self.coco = COCO(coco_annotation_file)
        self.image_dir = image_dir
        self.transform = transform if transform is not None else transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        cat_ids = self.coco.getCatIds(catNms=['cigarette'])
        if not cat_ids:
            raise ValueError("No 'cigarette' category found in COCO file.")
        self.image_ids = list(set(self.coco.getImgIds(catIds=cat_ids)))

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        image = Image.open(f"{self.image_dir}/{img_info['file_name']}").convert("RGB")

        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        annotations = self.coco.loadAnns(ann_ids)
        boxes, labels = [], []

        for ann in annotations:
            cat_name = self.coco.loadCats(ann['category_id'])[0]['name']
            x, y, w, h = ann['bbox']

            if cat_name == 'cigarette':
                boxes.append([x, y, x + w, y + h])
                labels.append(1)  # Class 1 for 'cigarette'

        if len(boxes) == 0:
            return None

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image = self.transform(image)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([img_id])
        }

        return image, target

In [36]:
def collate_fn(batch):
    return tuple(zip(*[b for b in batch if b is not None]))

In [37]:
def check_system_usage():
    print(f"CPU Usage: {psutil.cpu_percent()}%")
    print(f"RAM Usage: {psutil.virtual_memory().percent}%")
    if torch.cuda.is_available():
        print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
        print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

In [38]:
def evaluate_model(model, dataset, device):
    print("\n=== Starting Validation ===")
    model.eval()
    total_val_loss = 0
    coco_dt = []
    coco_gt = dataset.coco

    with torch.no_grad():
        for idx in tqdm(range(len(dataset)), desc="Validating", ncols=100):
            image, target = dataset[idx]
            image = image.to(device).unsqueeze(0)  # Add batch dimension
            target = [{k: v.to(device) for k, v in target.items()}]

            # During validation, the model should return only predictions, not loss.
            predictions = model(image)  # Only predictions during evaluation

            # If the model is returning a dictionary with loss values (for training):
            if isinstance(predictions, dict):
                loss_dict = predictions
                loss = sum(loss for loss in loss_dict.values())
                total_val_loss += loss.item()

            # Collect detection results (boxes, labels, scores) for COCO eval:
            for box, score, label in zip(predictions[0]['boxes'], predictions[0]['scores'], predictions[0]['labels']):
                x1, y1, x2, y2 = box.tolist()
                coco_dt.append({
                    'image_id': target[0]['image_id'].item(),
                    'category_id': label.item(),
                    'bbox': [x1, y1, x2 - x1, y2 - y1],  # COCO format [x, y, width, height]
                    'score': score.item()
                })

    avg_val_loss = total_val_loss / len(dataset) if len(dataset) > 0 else 0
    print(f"\nValidation Loss: {avg_val_loss:.4f}")

    # COCO evaluation (optional, if needed):
    if len(coco_dt) > 0:
        try:
            coco_dt = coco_gt.loadRes(coco_dt)
            coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
            coco_eval.evaluate()
            coco_eval.accumulate()
            coco_eval.summarize()
        except Exception as e:
            print(f"Error during COCO evaluation: {str(e)}")

    print("\n=== Validation Complete ===\n")
    return avg_val_loss  # Return average validation loss


In [39]:
def train_model(dataset, num_epochs=10, val_dataset=None, patience=3):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Initialize model and move to device
    model = fasterrcnn_resnet50_fpn(weights='DEFAULT')
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes=4)
    model = model.to(device)

    data_loader = DataLoader(
        dataset,
        batch_size=8,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True,
        persistent_workers=True
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

    scaler = torch.amp.GradScaler('cuda') if torch.cuda.is_available() else None
    epoch_losses = []
    val_losses = []
    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        start_time = time.time()
        print(f"Starting Epoch {epoch+1}/{num_epochs}")

        for batch_idx, (images, targets) in enumerate(tqdm(data_loader, desc=f"Epoch {epoch+1}/{num_epochs}", ncols=100, unit="batch")):
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()

            if scaler is not None:
                with torch.amp.autocast('cuda'):
                    loss_dict = model(images, targets)
                    loss = sum(loss for loss in loss_dict.values())

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                loss_dict = model(images, targets)
                loss = sum(loss for loss in loss_dict.values())
                loss.backward()
                optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(data_loader)
        epoch_losses.append(avg_loss)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Time: {time.time() - start_time:.2f}s")
        check_system_usage()

        if val_dataset:
            val_loss = evaluate_model(model, val_dataset, device)  # Ora val_loss è un float
            val_losses.append(val_loss)

            # Early stopping logic
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_no_improve = 0
                best_model_state = model.state_dict()
                torch.save({
                    'epoch': epoch + 1,
                    'model_state_dict': best_model_state,
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': avg_loss
                }, 'best_model.pth')
                print(f"New best model saved with validation loss: {best_val_loss:.4f}")
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"No improvement for {patience} epochs, stopping training.")
                    break
        else:
            print("No validation dataset provided - skipping validation")

        # Save the last epoch model
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss
        }, 'last_model.pth')
        print(f"Last model saved for epoch {epoch+1}")

    # Save the final model
    torch.save(model.state_dict(), f"fasterrcnn_cigarette_final.pth")
    print(f'Final model saved: fasterrcnn_cigarette_final.pth')

    return model

In [40]:
if __name__ == "__main__":
    #psutil.Process().nice(psutil.BELOW_NORMAL_PRIORITY_CLASS)
    train_image_dir = '/content/drive/MyDrive/Photo/train'
    train_coco_annotation_file = '/content/drive/MyDrive/Photo/train/_annotations.coco.json'

    valid_image_dir = '/content/drive/MyDrive/Photo/valid'
    valid_coco_annotation_file = '/content/drive/MyDrive/Photo/valid/_annotations.coco.json'

    dataset = CigaretteDataset(train_coco_annotation_file, train_image_dir)
    val_dataset = CigaretteDataset(valid_coco_annotation_file, valid_image_dir)

    model = train_model(dataset, num_epochs=10, val_dataset= val_dataset, patience= 3)

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Using device: cuda
Starting Epoch 1/10


Epoch 1/10: 100%|████████████████████████████████████████████████| 92/92 [01:56<00:00,  1.26s/batch]


Epoch 1/10, Loss: 0.5134, Time: 116.09s
CPU Usage: 68.7%
RAM Usage: 53.7%
GPU Memory Allocated: 4.09 GB
GPU Memory Reserved: 14.13 GB

=== Starting Validation ===


Validating: 100%|█████████████████████████████████████████████████| 142/142 [00:26<00:00,  5.41it/s]



Validation Loss: 0.0000
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=20.97s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.048
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.168
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.009
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.155
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.119
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.039
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.104
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.107
 Average Recall     (AR) @[ IoU=0.50:0.95 

Epoch 2/10: 100%|████████████████████████████████████████████████| 92/92 [01:56<00:00,  1.27s/batch]


Epoch 2/10, Loss: 0.2258, Time: 116.71s
CPU Usage: 79.6%
RAM Usage: 45.9%
GPU Memory Allocated: 1.61 GB
GPU Memory Reserved: 14.13 GB

=== Starting Validation ===


Validating: 100%|█████████████████████████████████████████████████| 142/142 [00:25<00:00,  5.49it/s]



Validation Loss: 0.0000
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.17s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.103
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.264
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.048
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.299
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.340
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.064
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.143
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.146
 Average Recall     (AR) @[ IoU=0.50:0.95 |

Epoch 3/10: 100%|████████████████████████████████████████████████| 92/92 [01:54<00:00,  1.25s/batch]


Epoch 3/10, Loss: 0.1962, Time: 114.94s
CPU Usage: 86.7%
RAM Usage: 45.7%
GPU Memory Allocated: 1.61 GB
GPU Memory Reserved: 14.13 GB

=== Starting Validation ===


Validating: 100%|█████████████████████████████████████████████████| 142/142 [00:25<00:00,  5.52it/s]



Validation Loss: 0.0000
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.20s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.133
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.299
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.086
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.377
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.466
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.073
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.162
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.163
 Average Recall     (AR) @[ IoU=0.50:0.95 |

Epoch 4/10: 100%|████████████████████████████████████████████████| 92/92 [01:55<00:00,  1.26s/batch]


Epoch 4/10, Loss: 0.1671, Time: 115.63s
CPU Usage: 89.4%
RAM Usage: 45.9%
GPU Memory Allocated: 1.61 GB
GPU Memory Reserved: 14.13 GB

=== Starting Validation ===


Validating: 100%|█████████████████████████████████████████████████| 142/142 [00:25<00:00,  5.53it/s]



Validation Loss: 0.0000
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.137
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.278
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.120
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.435
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.080
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.164
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.164
 Average Recall     (AR) @[ IoU=0.50:0.95 |