In [8]:
# Cell 1: Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
else:
    print("WARNING: CUDA not available. Training will be slow without GPU acceleration.")

CUDA available: True
GPU name: NVIDIA GeForce RTX 4060 Laptop GPU
GPU memory: 8.585281536 GB


# Cell 2: Install required packages
!pip install PyYAML matplotlib numpy Pillow opencv-python
!pip install -q torch torchvision

In [9]:
# Cell 3: Clone YOLOv5 repository (if needed)
import os
if not os.path.exists("yolov5"):
    !git clone https://github.com/ultralytics/yolov5
    %cd yolov5
    !pip install -r requirements.txt
    %cd ..
else:
    print("YOLOv5 already cloned")

c:\Users\devan\Downloads\yolov5


Cloning into 'yolov5'...


c:\Users\devan\Downloads


In [10]:
import tarfile

val_path = "C:/Users/devan/Downloads/object.tar.gz"
extract_path = "dataset"

if os.path.exists(val_path):
    with tarfile.open(val_path, "r:gz") as tar:
        tar.extractall(extract_path)
    print(f"Extracted {val_path} to {extract_path}")
    print("Extracted files:", os.listdir(extract_path))
else:
    print(f"ERROR: Could not find {val_path}. Place the file in the correct directory.")


Extracted C:/Users/devan/Downloads/object.tar.gz to dataset
Extracted files: ['val']


In [11]:
# Cell 5 (Updated): Organize DIODE dataset into YOLOv5 format
import glob
from PIL import Image
import os
from pathlib import Path
import shutil

# Identify the structure of the extracted data
val_dir = Path("dataset/val")
if not val_dir.exists():
    val_dir = Path("dataset")  # Try alternate location

print(f"Examining dataset structure in {val_dir}")

# Check for indoor/outdoor structure
indoor_dir = val_dir / "indoors"
outdoor_dir = val_dir / "outdoor"

if indoor_dir.exists() or outdoor_dir.exists():
    print("Found DIODE dataset structure with indoor/outdoor directories")
    
    # Create YOLOv5 directory structure
    images_dir = Path("dataset/images")
    labels_dir = Path("dataset/labels")
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)
    
    # Function to find all RGB images and depth images
    def process_diode_directory(base_dir):
        rgb_images = []
        for scene_dir in base_dir.glob("scene_*"):
            for scan_dir in scene_dir.glob("scan_*"):
                # Find RGB images
                for rgb_file in scan_dir.glob("*.png"):
                    if "depth" not in rgb_file.name and "semantic" not in rgb_file.name:
                        rgb_images.append(rgb_file)
        return rgb_images
    
    # Process both indoor and outdoor directories
    all_rgb_images = []
    if indoor_dir.exists():
        indoor_images = process_diode_directory(indoor_dir)
        all_rgb_images.extend(indoor_images)
        print(f"Found {len(indoor_images)} indoor RGB images")
    
    if outdoor_dir.exists():
        outdoor_images = process_diode_directory(outdoor_dir)
        all_rgb_images.extend(outdoor_images)
        print(f"Found {len(outdoor_images)} outdoor RGB images")
    
    print(f"Total: {len(all_rgb_images)} RGB images")
    
    # Copy images to YOLOv5 structure with simplified names
    image_count = 0
    for i, img_path in enumerate(all_rgb_images):
        # Create a simplified name that preserves scene and scan info
        # Format: indoor_scene00019_scan00183_image.png
        location = "indoor" if "indoors" in str(img_path) else "outdoor"
        scene = img_path.parent.parent.name
        scan = img_path.parent.name
        new_name = f"{location}_{scene}_{scan}_{img_path.name}"
        
        # Copy the file
        dest_path = images_dir / new_name
        shutil.copy(str(img_path), str(dest_path))
        image_count += 1
    
    print(f"Copied {image_count} images to {images_dir}")
    
    # Create dummy labels for object detection training
    # Note: For actual object detection, you need real annotations
    # This is just to set up the structure for the tutorial
    print("Creating placeholder label files (you'll need real annotations for actual training)")
    for img_file in images_dir.glob("*.png"):
        label_file = labels_dir / f"{img_file.stem}.txt"
        # Create an empty label file - replace this with actual annotations
        with open(label_file, "w") as f:
            # If you have actual annotations, write them here instead
            pass
    
    # Update data.yaml paths
    train_path = "images"
    val_path = "images"  # Using same images for validation in this example
else:
    print("Could not find expected DIODE dataset structure")
    # Default fallback behavior
    train_path = "object/images"
    val_path = "object/images"

Examining dataset structure in dataset\val
Found DIODE dataset structure with indoor/outdoor directories
Found 325 indoor RGB images
Found 446 outdoor RGB images
Total: 771 RGB images
Copied 771 images to dataset\images
Creating placeholder label files (you'll need real annotations for actual training)


In [12]:
import os
from pathlib import Path
import torch
from collections import Counter
import matplotlib.pyplot as plt
import warnings

# Suppress the specific FutureWarning about torch.cuda.amp.autocast
warnings.filterwarnings("ignore", category=FutureWarning, 
                        message=".*torch.cuda.amp.autocast.*")

# Load a pre-trained YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
images_dir = Path("dataset/images")
class_counts = Counter()

# Process a sample of images (limit to 100 for speed)
sample_images = list(images_dir.glob("*.png"))[:100]
print(f"Analyzing {len(sample_images)} images for class discovery...")

for img_path in sample_images:
    # Run detection
    results = model(str(img_path))
    
    # Get detected classes
    detections = results.pandas().xyxy[0]
    if not detections.empty:
        # Count unique classes in this image
        classes = detections['name'].unique()
        class_counts.update(classes)

# Show top classes found in your dataset
top_classes = class_counts.most_common(15)
print("\nTop 15 detected classes in your dataset:")
for cls, count in top_classes:
    print(f"{cls}: {count} instances")

# Plot class distribution
plt.figure(figsize=(12, 6))
classes, counts = zip(*top_classes)
plt.bar(classes, counts)
plt.xticks(rotation=45, ha='right')
plt.title("Most Common Classes in Dataset")
plt.tight_layout()
plt.savefig("dataset/class_distribution.png")  # Save the plot as an image
plt.show()

# Save discovered classes
os.makedirs("dataset", exist_ok=True)  # Create the directory if it doesn't exist
with open("dataset/discovered_classes.txt", "w") as f:
    for cls, count in class_counts.most_common():
        f.write(f"{cls}: {count}\n")

print(f"\nDiscovered {len(class_counts)} classes in total")
print("Full class list saved to dataset/discovered_classes.txt")
print("Class distribution plot saved to dataset/class_distribution.png")

Using cache found in C:\Users\devan/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2025-3-16 Python-3.10.0 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:00<00:00, 15.9MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Analyzing 100 images for class discovery...

Top 15 detected classes in your dataset:
chair: 46 instances
tv: 21 instances
bottle: 12 instances
couch: 8 instances
potted plant: 8 instances
vase: 8 instances
bowl: 7 instances
dining table: 6 instances
book: 6 instances
cup: 6 instances
wine glass: 4 instances
clock: 3 instances
refrigerator: 3 instances
person: 3 instances
cat: 3 instances

Discovered 23 classes in total
Full class list saved to dataset/discovered_classes.txt
Class distribution plot saved to dataset/class_distribution.png


In [13]:
# Read and display the full class list

if os.path.exists("dataset/discovered_classes.txt"):
    with open("dataset/discovered_classes.txt", "r") as file:
        class_list = file.read()
    print("Full class list:")
    print(class_list)
else:
    print(f"ERROR: File dataset/discovered_classes.txt does not exist.")

Full class list:
chair: 46
tv: 21
bottle: 12
couch: 8
potted plant: 8
vase: 8
bowl: 7
dining table: 6
book: 6
cup: 6
wine glass: 4
clock: 3
refrigerator: 3
person: 3
cat: 3
bed: 3
remote: 2
traffic light: 1
sink: 1
toilet: 1
teddy bear: 1
backpack: 1
mouse: 1



In [14]:
import os
import random
import shutil

# Set directories
dataset_dir = '/path/to/dataset'  # Path to your dataset directory
image_dir = os.path.join(dataset_dir, 'images')  # Images folder
label_dir = os.path.join(dataset_dir, 'labels')  # Labels folder

train_image_dir = os.path.join(image_dir, 'train')  # Train images
val_image_dir = os.path.join(image_dir, 'val')  # Validation images
train_label_dir = os.path.join(label_dir, 'train')  # Train labels
val_label_dir = os.path.join(label_dir, 'val')  # Validation labels

# Create directories if they don't exist
os.makedirs(train_image_dir, exist_ok=True)
os.makedirs(val_image_dir, exist_ok=True)
os.makedirs(train_label_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)

# List all image files
image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg') or f.endswith('.png')]

# Shuffle image files to get a random split
random.shuffle(image_files)

# Split dataset (90% train, 10% val)
train_size = int(0.9 * len(image_files))
train_files = image_files[:train_size]
val_files = image_files[train_size:]

# Move images and corresponding labels to the train/val directories
for file in train_files:
    shutil.move(os.path.join(image_dir, file), os.path.join(train_image_dir, file))
    shutil.move(os.path.join(label_dir, file.replace('.jpg', '.txt')), os.path.join(train_label_dir, file.replace('.jpg', '.txt')))

for file in val_files:
    shutil.move(os.path.join(image_dir, file), os.path.join(val_image_dir, file))
    shutil.move(os.path.join(label_dir, file.replace('.jpg', '.txt')), os.path.join(val_label_dir, file.replace('.jpg', '.txt')))

print(f"Train and Validation sets created. {len(train_files)} training images, {len(val_files)} validation images.")


Train and Validation sets created. 0 training images, 0 validation images.


In [15]:
import yaml

# Read classes from discovered_classes.txt
with open("dataset/discovered_classes.txt", "r") as f:
    classes = [line.split(':')[0].strip() for line in f.readlines()]

# Create data dictionary with correct train and val paths
data = {
    'path': './dataset',  # dataset root directory
    'train': 'images/train',  # Training images folder (relative to 'path')
    'val': 'images/val',    # Validation images folder (relative to 'path')
    'names': classes        # Class names
}

# Save to YAML file
with open('dataset/data.yaml', 'w') as f:
    yaml.safe_dump(data, f, sort_keys=False)

print("Created data.yaml with correct train and val paths.")


Created data.yaml with correct train and val paths.


In [16]:
# Cell 8: Set up training parameters for GPU
batch_size = 16  # Reduce if you run out of memory
img_size = 640
epochs = 50      # You can start with fewer epochs (e.g., 10) to test
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

print(f"Training configuration:")
print(f"- Device: {device}")
print(f"- Batch size: {batch_size}")
print(f"- Image size: {img_size}")
print(f"- Epochs: {epochs}")

Training configuration:
- Device: cuda:0
- Batch size: 16
- Image size: 640
- Epochs: 50


In [17]:
# Cell 9: Download pre-trained weights
!cd yolov5 && python -c "from utils.downloads import attempt_download; attempt_download('yolov5s.pt')"

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...

  0%|          | 0.00/14.1M [00:00<?, ?B/s]
  9%|▉         | 1.25M/14.1M [00:00<00:01, 11.9MB/s]
 24%|██▍       | 3.38M/14.1M [00:00<00:00, 17.1MB/s]
 40%|███▉      | 5.62M/14.1M [00:00<00:00, 19.2MB/s]
 56%|█████▌    | 7.88M/14.1M [00:00<00:00, 20.3MB/s]
 71%|███████   | 10.0M/14.1M [00:00<00:00, 20.9MB/s]
 85%|████████▍ | 12.0M/14.1M [00:00<00:00, 20.7MB/s]
 99%|█████████▉| 14.0M/14.1M [00:00<00:00, 20.5MB/s]
100%|██████████| 14.1M/14.1M [00:00<00:00, 19.7MB/s]



In [18]:
# Change to YOLOv5 directory first
%cd yolov5

# Run training command
!python train.py \
	--data ../dataset/data.yaml \
	--img {img_size} \
	--batch {batch_size} \
	--epochs {epochs} \
	--weights yolov5s.pt \
	--device {device}

# Return to original directory
%cd ..


c:\Users\devan\Downloads\yolov5
c:\Users\devan\Downloads


[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=../dataset/data.yaml, hyp=data\hyps\hyp.scratch-low.yaml, epochs=50, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data\hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=cuda:0, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs\train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, ndjson_console=False, ndjson_file=False
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 
YOLOv5  v7.0-399-g8cc44963 Python-3.10.0 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, 

In [19]:
import os

# Check training results and weights

# Define paths to check
exp_dir = "yolov5/runs/train/exp"
weights_dir = os.path.join(exp_dir, "weights")
best_weights = os.path.join(weights_dir, "best.pt")
last_weights = os.path.join(weights_dir, "last.pt")

# Check for experiment directory
if os.path.exists(exp_dir):
    print(f"Training directory exists at: {exp_dir}")
    # List contents
    print("\nContents of training directory:")
    for item in os.listdir(exp_dir):
        print(f"- {item}")
    
    # Check weights directory
    if os.path.exists(weights_dir):
        print(f"\nWeights directory exists at: {weights_dir}")
        print("\nAvailable weights files:")
        for weight_file in os.listdir(weights_dir):
            print(f"- {weight_file}")
    else:
        print("\nWARNING: Weights directory not found!")
        
    # Check specific weight files
    if os.path.exists(best_weights):
        print(f"\nBest weights found at: {best_weights}")
    else:
        print("\nWARNING: best.pt not found!")
        
    if os.path.exists(last_weights):
        print(f"Last weights found at: {last_weights}")
    else:
        print("WARNING: last.pt not found!")
else:
    print(f"ERROR: Training directory not found at {exp_dir}")
    print("Make sure training has completed successfully before validation.")

Training directory exists at: yolov5/runs/train/exp

Contents of training directory:
- events.out.tfevents.1742334917.ADMIN.32152.0
- hyp.yaml
- opt.yaml
- weights

Weights directory exists at: yolov5/runs/train/exp\weights

Available weights files:



In [20]:
# Cell 12: Run inference on test images
%cd yolov5
!python detect.py \
    --weights runs/train/exp/weights/best.pt \
    --source ../dataset/val/images \
    --conf 0.25 \
    --device {device}
%cd ..

# Display a few results
from IPython.display import Image, display
import glob

result_images = list(glob.glob("yolov5/runs/detect/exp/*.jpg"))[:5]  # Show first 5 results
for img_path in result_images:
    display(Image(filename=img_path))

c:\Users\devan\Downloads\yolov5
c:\Users\devan\Downloads


[34m[1mdetect: [0mweights=['runs/train/exp/weights/best.pt'], source=../dataset/val/images, data=data\coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=cuda:0, view_img=False, save_txt=False, save_format=0, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs\detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5  v7.0-399-g8cc44963 Python-3.10.0 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)

Traceback (most recent call last):
  File "c:\Users\devan\Downloads\yolov5\detect.py", line 438, in <module>
    main(opt)
  File "c:\Users\devan\Downloads\yolov5\detect.py", line 433, in main
    run(**vars(opt))
  File "c:\Users\devan\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
    re