In [6]:
import os
import torch
from PIL import Image
import torchvision.transforms as T
import json

# Define the path to your furniture folder
downloads_path = os.path.expanduser("~/Downloads/furniture")  # Adjust for Windows: r"C:\Users\<YourUsername>\Downloads\furniture"

# Get list of image paths from all subfolders
image_paths = []
subfolders = ["cabinets,cupboards,etc", "chair", "fridge", "table", "tv"]
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    for fname in os.listdir(subfolder_path):
        if fname.lower().endswith((".jpg", ".png", ".jpeg")):
            image_paths.append(os.path.join(subfolder_path, fname))

# Load annotations (assuming COCO JSON format)
annotations_path = os.path.join(downloads_path, "annotations.json")
with open(annotations_path, "r") as f:
    coco_data = json.load(f)

# Map annotations to images
annotations = []
for img_path in image_paths:
    img_name = os.path.basename(img_path)
    img_id = next((item["id"] for item in coco_data["images"] if item["file_name"] == img_name), None)
    if img_id is None:
        print(f"Warning: No annotation found for {img_name}")
        continue
    img_annotations = [ann for ann in coco_data["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    annotations.append({
        "boxes": boxes,  # Format: [[xmin, ymin, xmax, ymax], ...]
        "labels": labels  # Class labels
    })

# Filter image_paths to only include images with annotations
image_paths = [img_path for img_path, ann in zip(image_paths, annotations) if ann["boxes"] and ann["labels"]]

# Define the load_dataset function
def load_dataset(image_paths, annotations):
    dataset = []
    transform = T.ToTensor()  # Convert PIL image to tensor
    for img_path, ann in zip(image_paths, annotations):
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        target = {
            "boxes": torch.tensor(ann["boxes"], dtype=torch.float32),
            "labels": torch.tensor(ann["labels"], dtype=torch.int64)
        }
        dataset.append((img_tensor, target))
    return dataset

# Create dataset and DataLoader
from torch.utils.data import DataLoader
dataset = load_dataset(image_paths, annotations)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

print(f"Loaded {len(dataset)} images with annotations.")



ValueError: num_samples should be a positive integer value, but got num_samples=0

In [8]:
import os
import random
import torch
from PIL import Image
import torchvision.transforms as T
import json
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader

# Set random seed for reproducibility
random.seed(42)
torch.manual_seed(42)

# Define the path to your furniture folder
downloads_path = os.path.expanduser("~/Downloads/furniture")  # Adjust for Windows: r"C:\Users\<YourUsername>\Downloads\furniture"

# Get list of image paths from all subfolders
image_paths = []
subfolders = ["cabinets,cupboards,etc", "chair", "fridge", "table", "tv"]
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    for fname in os.listdir(subfolder_path):
        if fname.lower().endswith((".jpg", ".png", ".jpeg", ".jpe")):
            image_paths.append(os.path.join(subfolder_path, fname))

# Select one random image from each subfolder (total 5 images)
selected_image_paths = []
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    subfolder_images = [p for p in image_paths if subfolder in p]
    if subfolder_images:
        selected_image_paths.append(random.choice(subfolder_images))

# Debugging: Print selected image paths
print("Selected image paths:")
for img_path in selected_image_paths:
    print(img_path)

# Create dummy annotations
# Categories: 1=cabinets, 2=chair, 3=fridge, 4=table, 5=tv
categories = [
    {"id": 1, "name": "cabinets"},
    {"id": 2, "name": "chair"},
    {"id": 3, "name": "fridge"},
    {"id": 4, "name": "table"},
    {"id": 5, "name": "tv"}
]

# Generate dummy COCO-format annotations
coco_data = {
    "images": [],
    "annotations": [],
    "categories": categories
}

for idx, img_path in enumerate(selected_image_paths):
    # Get image dimensions
    img = Image.open(img_path).convert("RGB")
    width, height = img.size
    
    # Add image metadata
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    # Normalize the path separator and extension
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    coco_data["images"].append({
        "id": idx + 1,
        "file_name": relative_path,
        "width": width,
        "height": height
    })
    
    # Add a dummy bounding box (e.g., center of the image, 100x100 pixels)
    box = [width/2 - 50, height/2 - 50, 100, 100]  # [x, y, width, height]
    category_id = subfolders.index(os.path.basename(os.path.dirname(img_path))) + 1
    coco_data["annotations"].append({
        "image_id": idx + 1,
        "bbox": box,
        "category_id": category_id,
        "id": idx + 1
    })

# Debugging: Print the file names in coco_data
print("\nFile names in coco_data['images']:")
for item in coco_data["images"]:
    print(item["file_name"])

# Map annotations to images
annotations = []
for img_path in selected_image_paths:
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    print(f"\nTrying to match: {relative_path}")
    
    img_id = next((item["id"] for item in coco_data["images"] if item["file_name"].lower() == relative_path), None)
    if img_id is None:
        print(f"Warning: No match found for {relative_path}")
        continue
    img_annotations = [ann for ann in coco_data["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    annotations.append({
        "boxes": boxes,
        "labels": labels
    })

# Filter image_paths to only include images with annotations
selected_image_paths = [img_path for img_path, ann in zip(selected_image_paths, annotations) if ann["boxes"] and ann["labels"]]

# Define the load_dataset function
def load_dataset(image_paths, annotations):
    dataset = []
    transform = T.ToTensor()
    for img_path, ann in zip(image_paths, annotations):
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        target = {
            "boxes": torch.tensor(ann["boxes"], dtype=torch.float32),
            "labels": torch.tensor(ann["labels"], dtype=torch.int64)
        }
        dataset.append((img_tensor, target))
    return dataset

# Create dataset and DataLoader
dataset = load_dataset(selected_image_paths, annotations)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Load Faster R-CNN model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 6  # 5 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model = model.to(device)

# Training setup
model.train()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    for images, targets in dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {losses.item()}")

print("Training completed.")

Selected image paths:
/Users/skyzhao/Downloads/furniture/cabinets,cupboards,etc/image_226.jpeg
/Users/skyzhao/Downloads/furniture/chair/image_1599.jpeg
/Users/skyzhao/Downloads/furniture/fridge/image_1202.jpeg
/Users/skyzhao/Downloads/furniture/table/image_1638.jpeg
/Users/skyzhao/Downloads/furniture/tv/image_2214.jpeg

File names in coco_data['images']:
cabinets,cupboards,etc/image_226.jpegg
chair/image_1599.jpegg
fridge/image_1202.jpegg
table/image_1638.jpegg
tv/image_2214.jpegg

Trying to match: cabinets,cupboards,etc/image_226.jpegg

Trying to match: chair/image_1599.jpegg

Trying to match: fridge/image_1202.jpegg

Trying to match: table/image_1638.jpegg

Trying to match: tv/image_2214.jpegg




Epoch 1/5, Loss: 0.4183807373046875
Epoch 2/5, Loss: 0.6392212510108948
Epoch 3/5, Loss: 0.44014203548431396
Epoch 4/5, Loss: 0.29586315155029297
Epoch 5/5, Loss: 0.3235889673233032
Training completed.


In [9]:
import os
import random
import torch
from PIL import Image
import torchvision.transforms as T
import json
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.utils.data import DataLoader

# Set random seed for reproducibility
random.seed(42)
torch.manual_seed(42)

# Define the path to your furniture folder
downloads_path = os.path.expanduser("~/Downloads/furniture")  # Adjust for Windows: r"C:\Users\<YourUsername>\Downloads\furniture"

# Get list of image paths from all subfolders
image_paths = []
subfolders = ["cabinets,cupboards,etc", "chair", "fridge", "table", "tv"]
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    for fname in os.listdir(subfolder_path):
        if fname.lower().endswith((".jpg", ".png", ".jpeg", ".jpe")):
            image_paths.append(os.path.join(subfolder_path, fname))

# Select one random image from each subfolder (total 5 images)
selected_image_paths = []
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    subfolder_images = [p for p in image_paths if subfolder in p]
    if subfolder_images:
        selected_image_paths.append(random.choice(subfolder_images))

# Debugging: Print selected image paths
print("Selected image paths:")
for img_path in selected_image_paths:
    print(img_path)

# Create dummy annotations
# Categories: 1=cabinets, 2=chair, 3=fridge, 4=table, 5=tv
categories = [
    {"id": 1, "name": "cabinets"},
    {"id": 2, "name": "chair"},
    {"id": 3, "name": "fridge"},
    {"id": 4, "name": "table"},
    {"id": 5, "name": "tv"}
]

# Generate dummy COCO-format annotations
coco_data = {
    "images": [],
    "annotations": [],
    "categories": categories
}

for idx, img_path in enumerate(selected_image_paths):
    # Get image dimensions
    img = Image.open(img_path).convert("RGB")
    width, height = img.size
    
    # Add image metadata
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    # Normalize the path separator and extension
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    coco_data["images"].append({
        "id": idx + 1,
        "file_name": relative_path,
        "width": width,
        "height": height
    })
    
    # Add a dummy bounding box (e.g., center of the image, 100x100 pixels)
    box = [width/2 - 50, height/2 - 50, 100, 100]  # [x, y, width, height]
    category_id = subfolders.index(os.path.basename(os.path.dirname(img_path))) + 1
    coco_data["annotations"].append({
        "image_id": idx + 1,
        "bbox": box,
        "category_id": category_id,
        "id": idx + 1
    })

# Debugging: Print the file names in coco_data['images']
print("\nFile names in coco_data['images']:")
for item in coco_data["images"]:
    print(item["file_name"])

# Map annotations to images
annotations = []
for img_path in selected_image_paths:
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    print(f"\nTrying to match: {relative_path}")
    
    img_id = next((item["id"] for item in coco_data["images"] if item["file_name"].lower() == relative_path), None)
    if img_id is None:
        print(f"Warning: No match found for {relative_path}")
        continue
    img_annotations = [ann for ann in coco_data["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    annotations.append({
        "boxes": boxes,
        "labels": labels
    })

# Filter image_paths to only include images with annotations
selected_image_paths = [img_path for img_path, ann in zip(selected_image_paths, annotations) if ann["boxes"] and ann["labels"]]

# Define the load_dataset function
def load_dataset(image_paths, annotations):
    dataset = []
    transform = T.ToTensor()
    for img_path, ann in zip(image_paths, annotations):
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        target = {
            "boxes": torch.tensor(ann["boxes"], dtype=torch.float32),
            "labels": torch.tensor(ann["labels"], dtype=torch.int64)
        }
        dataset.append((img_tensor, target))
    return dataset

# Create dataset and DataLoader
dataset = load_dataset(selected_image_paths, annotations)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Load Faster R-CNN model with updated weights parameter
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 6  # 5 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model = model.to(device)

# Training setup
model.train()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    for images, targets in dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {losses.item()}")

print("Training completed.")

Selected image paths:
/Users/skyzhao/Downloads/furniture/cabinets,cupboards,etc/image_226.jpeg
/Users/skyzhao/Downloads/furniture/chair/image_1599.jpeg
/Users/skyzhao/Downloads/furniture/fridge/image_1202.jpeg
/Users/skyzhao/Downloads/furniture/table/image_1638.jpeg
/Users/skyzhao/Downloads/furniture/tv/image_2214.jpeg

File names in coco_data['images']:
cabinets,cupboards,etc/image_226.jpegg
chair/image_1599.jpegg
fridge/image_1202.jpegg
table/image_1638.jpegg
tv/image_2214.jpegg

Trying to match: cabinets,cupboards,etc/image_226.jpegg

Trying to match: chair/image_1599.jpegg

Trying to match: fridge/image_1202.jpegg

Trying to match: table/image_1638.jpegg

Trying to match: tv/image_2214.jpegg
Epoch 1/5, Loss: 0.4183807373046875
Epoch 2/5, Loss: 0.6392212510108948
Epoch 3/5, Loss: 0.44014203548431396
Epoch 4/5, Loss: 0.29586315155029297
Epoch 5/5, Loss: 0.3235889673233032
Training completed.


In [3]:
from torchvision.models.detection.transform import GeneralizedRCNNTransform
from torchvision.ops import box_iou
import numpy as np

def evaluate(model, dataloader, device):
    model.eval()
    total_iou = 0
    total_predictions = 0

    with torch.no_grad():
        for images, targets in dataloader:
            images = list(img.to(device) for img in images)
            outputs = model(images)

            for pred, true in zip(outputs, targets):
                pred_boxes = pred['boxes'].cpu()
                true_boxes = true['boxes'].cpu()
                
                if len(pred_boxes) == 0 or len(true_boxes) == 0:
                    continue

                ious = box_iou(pred_boxes, true_boxes)  # shape: [pred, true]
                max_ious = ious.max(dim=1)[0]  # best match for each prediction
                total_iou += max_ious.sum().item()
                total_predictions += len(pred_boxes)

    mean_iou = total_iou / total_predictions if total_predictions > 0 else 0
    print(f"Mean IoU over predictions: {mean_iou:.4f}")
