In [6]:
import os
import torch
from PIL import Image
import torchvision.transforms as T
import json

# Define the path to your furniture folder
downloads_path = os.path.expanduser("~/Downloads/furniture")  # Adjust for Windows: r"C:\Users\<YourUsername>\Downloads\furniture"

# Get list of image paths from all subfolders
image_paths = []
subfolders = ["cabinets,cupboards,etc", "chair", "fridge", "table", "tv"]
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    for fname in os.listdir(subfolder_path):
        if fname.lower().endswith((".jpg", ".png", ".jpeg")):
            image_paths.append(os.path.join(subfolder_path, fname))

# Load annotations (assuming COCO JSON format)
annotations_path = os.path.join(downloads_path, "annotations.json")
with open(annotations_path, "r") as f:
    coco_data = json.load(f)

# Map annotations to images
annotations = []
for img_path in image_paths:
    img_name = os.path.basename(img_path)
    img_id = next((item["id"] for item in coco_data["images"] if item["file_name"] == img_name), None)
    if img_id is None:
        print(f"Warning: No annotation found for {img_name}")
        continue
    img_annotations = [ann for ann in coco_data["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    annotations.append({
        "boxes": boxes,  # Format: [[xmin, ymin, xmax, ymax], ...]
        "labels": labels  # Class labels
    })

# Filter image_paths to only include images with annotations
image_paths = [img_path for img_path, ann in zip(image_paths, annotations) if ann["boxes"] and ann["labels"]]

# Define the load_dataset function
def load_dataset(image_paths, annotations):
    dataset = []
    transform = T.ToTensor()  # Convert PIL image to tensor
    for img_path, ann in zip(image_paths, annotations):
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        target = {
            "boxes": torch.tensor(ann["boxes"], dtype=torch.float32),
            "labels": torch.tensor(ann["labels"], dtype=torch.int64)
        }
        dataset.append((img_tensor, target))
    return dataset

# Create dataset and DataLoader
from torch.utils.data import DataLoader
dataset = load_dataset(image_paths, annotations)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

print(f"Loaded {len(dataset)} images with annotations.")



ValueError: num_samples should be a positive integer value, but got num_samples=0

In [1]:
import os
import random
import torch
from PIL import Image
import torchvision.transforms as T
import json
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.utils.data import DataLoader

# Set random seed for reproducibility
random.seed(42)
torch.manual_seed(42)

# Define the path to your furniture folder
downloads_path = os.path.expanduser("~/Downloads/furniture")  # Adjust for Windows: r"C:\Users\<YourUsername>\Downloads\furniture"

# Get list of image paths from all subfolders
image_paths = []
subfolders = ["cabinets,cupboards,etc", "chair", "fridge", "table", "tv"]
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    for fname in os.listdir(subfolder_path):
        if fname.lower().endswith((".jpg", ".png", ".jpeg", ".jpe")):
            image_paths.append(os.path.join(subfolder_path, fname))

# Select one random image from each subfolder (total 5 images)
selected_image_paths = []
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    subfolder_images = [p for p in image_paths if subfolder in p]
    if subfolder_images:
        selected_image_paths.append(random.choice(subfolder_images))

# Debugging: Print selected image paths
print("Selected image paths:")
for img_path in selected_image_paths:
    print(img_path)

# Create dummy annotations
# Categories: 1=cabinets, 2=chair, 3=fridge, 4=table, 5=tv
categories = [
    {"id": 1, "name": "cabinets"},
    {"id": 2, "name": "chair"},
    {"id": 3, "name": "fridge"},
    {"id": 4, "name": "table"},
    {"id": 5, "name": "tv"}
]

# Generate dummy COCO-format annotations
coco_data = {
    "images": [],
    "annotations": [],
    "categories": categories
}

for idx, img_path in enumerate(selected_image_paths):
    # Get image dimensions
    img = Image.open(img_path).convert("RGB")
    width, height = img.size
    
    # Add image metadata
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    # Normalize the path separator and extension
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    coco_data["images"].append({
        "id": idx + 1,
        "file_name": relative_path,
        "width": width,
        "height": height
    })
    
    # Add a dummy bounding box (e.g., center of the image, 100x100 pixels)
    box = [width/2 - 50, height/2 - 50, 100, 100]  # [x, y, width, height]
    category_id = subfolders.index(os.path.basename(os.path.dirname(img_path))) + 1
    coco_data["annotations"].append({
        "image_id": idx + 1,
        "bbox": box,
        "category_id": category_id,
        "id": idx + 1
    })

# Debugging: Print the file names in coco_data['images']
print("\nFile names in coco_data['images']:")
for item in coco_data["images"]:
    print(item["file_name"])

# Map annotations to images
annotations = []
for img_path in selected_image_paths:
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    print(f"\nTrying to match: {relative_path}")
    
    img_id = next((item["id"] for item in coco_data["images"] if item["file_name"].lower() == relative_path), None)
    if img_id is None:
        print(f"Warning: No match found for {relative_path}")
        continue
    img_annotations = [ann for ann in coco_data["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    annotations.append({
        "boxes": boxes,
        "labels": labels
    })

# Filter image_paths to only include images with annotations
selected_image_paths = [img_path for img_path, ann in zip(selected_image_paths, annotations) if ann["boxes"] and ann["labels"]]

# Define the load_dataset function
def load_dataset(image_paths, annotations):
    dataset = []
    transform = T.ToTensor()
    for img_path, ann in zip(image_paths, annotations):
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        target = {
            "boxes": torch.tensor(ann["boxes"], dtype=torch.float32),
            "labels": torch.tensor(ann["labels"], dtype=torch.int64)
        }
        dataset.append((img_tensor, target))
    return dataset

# Create dataset and DataLoader
dataset = load_dataset(selected_image_paths, annotations)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Load Faster R-CNN model with updated weights parameter
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 6  # 5 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model = model.to(device)

# Training setup
model.train()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    for images, targets in dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {losses.item()}")

print("Training completed.")

Selected image paths:
/Users/skyzhao/Downloads/furniture/cabinets,cupboards,etc/image_226.jpeg
/Users/skyzhao/Downloads/furniture/chair/image_1599.jpeg
/Users/skyzhao/Downloads/furniture/fridge/image_1202.jpeg
/Users/skyzhao/Downloads/furniture/table/image_1638.jpeg
/Users/skyzhao/Downloads/furniture/tv/image_2214.jpeg

File names in coco_data['images']:
cabinets,cupboards,etc/image_226.jpegg
chair/image_1599.jpegg
fridge/image_1202.jpegg
table/image_1638.jpegg
tv/image_2214.jpegg

Trying to match: cabinets,cupboards,etc/image_226.jpegg

Trying to match: chair/image_1599.jpegg

Trying to match: fridge/image_1202.jpegg

Trying to match: table/image_1638.jpegg

Trying to match: tv/image_2214.jpegg
Epoch 1/5, Loss: 0.4183807373046875
Epoch 2/5, Loss: 0.6392212510108948
Epoch 3/5, Loss: 0.44014203548431396
Epoch 4/5, Loss: 0.29586315155029297
Epoch 5/5, Loss: 0.3235889673233032
Training completed.


In [4]:
import os
import random
import torch
from PIL import Image
import torchvision.transforms as T
import json
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.utils.data import DataLoader
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# Set random seed for reproducibility
random.seed(42)
torch.manual_seed(42)

# Define the path to your furniture folder
downloads_path = os.path.expanduser("~/Downloads/furniture")  # Adjust for Windows: r"C:\Users\<YourUsername>\Downloads\furniture"

# Get list of image paths from all subfolders
image_paths = []
subfolders = ["cabinets,cupboards,etc", "chair", "fridge", "table", "tv"]
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    for fname in os.listdir(subfolder_path):
        if fname.lower().endswith((".jpg", ".png", ".jpeg", ".jpe")):
            image_paths.append(os.path.join(subfolder_path, fname))

# Select one random image from each subfolder (total 5 images)
selected_image_paths = []
for subfolder in subfolders:
    subfolder_path = os.path.join(downloads_path, subfolder)
    subfolder_images = [p for p in image_paths if subfolder in p]
    if subfolder_images:
        selected_image_paths.append(random.choice(subfolder_images))

# Split into training (4 images) and testing (1 image)
random.shuffle(selected_image_paths)
train_image_paths = selected_image_paths[:4]
test_image_paths = selected_image_paths[4:]

# Create dummy annotations
categories = [
    {"id": 1, "name": "cabinets"},
    {"id": 2, "name": "chair"},
    {"id": 3, "name": "fridge"},
    {"id": 4, "name": "table"},
    {"id": 5, "name": "tv"}
]

# Generate dummy COCO-format annotations for training and testing
coco_data = {"images": [], "annotations": [], "categories": categories}
coco_data_test = {"images": [], "annotations": [], "categories": categories}

# Training annotations
for idx, img_path in enumerate(train_image_paths):
    img = Image.open(img_path).convert("RGB")
    width, height = img.size
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    coco_data["images"].append({
        "id": idx + 1,
        "file_name": relative_path,
        "width": width,
        "height": height
    })
    box = [width/2 - 50, height/2 - 50, 100, 100]  # Dummy box
    category_id = subfolders.index(os.path.basename(os.path.dirname(img_path))) + 1
    coco_data["annotations"].append({
        "image_id": idx + 1,
        "bbox": box,
        "category_id": category_id,
        "id": idx + 1,
        "iscrowd": 0  # Added iscrowd field
    })

# Testing annotations
for idx, img_path in enumerate(test_image_paths):
    img = Image.open(img_path).convert("RGB")
    width, height = img.size
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    coco_data_test["images"].append({
        "id": idx + 1,
        "file_name": relative_path,
        "width": width,
        "height": height
    })
    box = [width/2 - 50, height/2 - 50, 100, 100]  # Dummy box
    category_id = subfolders.index(os.path.basename(os.path.dirname(img_path))) + 1
    coco_data_test["annotations"].append({
        "image_id": idx + 1,
        "bbox": box,
        "category_id": category_id,
        "id": idx + 1,
        "iscrowd": 0  # Added iscrowd field
    })

# Map annotations to images (training)
train_annotations = []
for img_path in train_image_paths:
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    img_id = next((item["id"] for item in coco_data["images"] if item["file_name"].lower() == relative_path), None)
    if img_id is None:
        continue
    img_annotations = [ann for ann in coco_data["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    train_annotations.append({
        "boxes": boxes,
        "labels": labels
    })

# Map annotations to images (testing)
test_annotations = []
for img_path in test_image_paths:
    relative_path = os.path.join(os.path.basename(os.path.dirname(img_path)), os.path.basename(img_path))
    relative_path = relative_path.replace(os.sep, "/").lower().replace(".jpe", ".jpeg")
    img_id = next((item["id"] for item in coco_data_test["images"] if item["file_name"].lower() == relative_path), None)
    if img_id is None:
        continue
    img_annotations = [ann for ann in coco_data_test["annotations"] if ann["image_id"] == img_id]
    boxes = [[ann["bbox"][0], ann["bbox"][1], ann["bbox"][0] + ann["bbox"][2], ann["bbox"][1] + ann["bbox"][3]] for ann in img_annotations]
    labels = [ann["category_id"] for ann in img_annotations]
    test_annotations.append({
        "boxes": boxes,
        "labels": labels
    })

# Define the load_dataset function
def load_dataset(image_paths, annotations):
    dataset = []
    transform = T.ToTensor()
    for img_path, ann in zip(image_paths, annotations):
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        target = {
            "boxes": torch.tensor(ann["boxes"], dtype=torch.float32),
            "labels": torch.tensor(ann["labels"], dtype=torch.int64)
        }
        dataset.append((img_tensor, target))
    return dataset

# Create training and testing datasets
train_dataset = load_dataset(train_image_paths, train_annotations)
test_dataset = load_dataset(test_image_paths, test_annotations)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Load Faster R-CNN model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 6  # 5 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model = model.to(device)

# Training setup
model.train()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    for images, targets in train_dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {losses.item()}")

# Evaluation
model.eval()
predictions = []

# Run inference on test set
with torch.no_grad():
    for images, targets in test_dataloader:
        images = list(image.to(device) for image in images)
        outputs = model(images)
        for output, target in zip(outputs, targets):
            predictions.append({
                "boxes": output["boxes"].cpu(),
                "scores": output["scores"].cpu(),
                "labels": output["labels"].cpu(),
                "target_boxes": target["boxes"],
                "target_labels": target["labels"]
            })

# Save ground truth and predictions in COCO format for evaluation
coco_gt_file = os.path.join(downloads_path, "coco_gt.json")
coco_dt_file = os.path.join(downloads_path, "coco_dt.json")

with open(coco_gt_file, "w") as f:
    json.dump(coco_data_test, f)

# Format predictions in COCO detection format
coco_dt = []
for idx, pred in enumerate(predictions):
    image_id = idx + 1  # Matches the image_id in coco_data_test
    for box, score, label in zip(pred["boxes"], pred["scores"], pred["labels"]):
        box = box.tolist()
        coco_dt.append({
            "image_id": image_id,
            "category_id": int(label),
            "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]],  # [x, y, width, height]
            "score": float(score)
        })

with open(coco_dt_file, "w") as f:
    json.dump(coco_dt, f)

# Evaluate using pycocotools
coco_gt = COCO(coco_gt_file)
coco_dt = coco_gt.loadRes(coco_dt_file)
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

# Custom evaluation for precision, recall, and F1-score at IoU=0.5
def compute_metrics(predictions, iou_threshold=0.5):
    TP, FP, FN = 0, 0, 0
    for pred in predictions:
        pred_boxes = pred["boxes"]
        pred_labels = pred["labels"]
        pred_scores = pred["scores"]
        gt_boxes = pred["target_boxes"]
        gt_labels = pred["target_labels"]

        # Sort predictions by score (descending)
        sorted_indices = torch.argsort(pred_scores, descending=True)
        pred_boxes = pred_boxes[sorted_indices]
        pred_labels = pred_labels[sorted_indices]

        matched = set()
        for pred_box, pred_label in zip(pred_boxes, pred_labels):
            best_iou = 0
            best_gt_idx = -1
            for gt_idx, (gt_box, gt_label) in enumerate(zip(gt_boxes, gt_labels)):
                if gt_idx in matched or pred_label != gt_label:
                    continue
                # Compute IoU
                x1 = max(pred_box[0], gt_box[0])
                y1 = max(pred_box[1], gt_box[1])
                x2 = min(pred_box[2], gt_box[2])
                y2 = min(pred_box[3], gt_box[3])
                inter = max(0, x2 - x1) * max(0, y2 - y1)
                union = (pred_box[2] - pred_box[0]) * (pred_box[3] - pred_box[1]) + \
                        (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1]) - inter
                iou = inter / union if union > 0 else 0
                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = gt_idx

            if best_iou >= iou_threshold:
                TP += 1
                matched.add(best_gt_idx)
            else:
                FP += 1

        FN += len(gt_boxes) - len(matched)

    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    return precision, recall, f1

# Compute precision, recall, F1-score
precision, recall, f1 = compute_metrics(predictions, iou_threshold=0.5)
print(f"\nCustom Metrics at IoU=0.5:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print("Evaluation completed.")

Epoch 1/5, Loss: 0.8243498802185059
Epoch 2/5, Loss: 0.289709210395813
Epoch 3/5, Loss: 0.4312184154987335
Epoch 4/5, Loss: 0.41756078600883484
Epoch 5/5, Loss: 0.30114883184432983
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*


KeyError: 'area'

In [6]:
import os
import torch
from PIL import Image
import torchvision.transforms as transforms

# the root directory containing multiple subfolders of images
root_dir = 'furniture' 
save_dir = 'torch_imgs'  # new directory to store the tensors

transform = transforms.Compose([
    transforms.PILToTensor()
])

# go through the folders and save tensors
for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            image_path = os.path.join(subdir, file)
            try:
                image = Image.open(image_path).convert("RGB")
                img_tensor = transform(image)

                # consistent file name based on relative path
                relative_path = os.path.relpath(image_path, root_dir)
                tensor_filename = os.path.splitext(relative_path)[0] + '.pt'
                tensor_save_path = os.path.join(save_dir, tensor_filename)

                # make any necessary directories
                os.makedirs(os.path.dirname(tensor_save_path), exist_ok=True)

                # save the tensor
                torch.save(img_tensor, tensor_save_path)
                print(f"Saved tensor: {tensor_save_path}")

            except Exception as e:
                print(f"Error processing {image_path}: {e}")

Saved tensor: torch_imgs/chair/image_2341.pt
Saved tensor: torch_imgs/chair/image_947.pt
Saved tensor: torch_imgs/chair/image_1994.pt
Saved tensor: torch_imgs/chair/image_2711.pt
Saved tensor: torch_imgs/chair/image_1097.pt
Saved tensor: torch_imgs/chair/image_414.pt
Saved tensor: torch_imgs/chair/image_101.pt
Saved tensor: torch_imgs/chair/image_1428.pt
Saved tensor: torch_imgs/chair/image_551.pt
Saved tensor: torch_imgs/chair/image_1582.pt
Saved tensor: torch_imgs/chair/image_1078.pt
Saved tensor: torch_imgs/chair/image_802.pt
Saved tensor: torch_imgs/chair/image_2204.pt
Saved tensor: torch_imgs/chair/image_2654.pt
Saved tensor: torch_imgs/chair/image_2984.pt
Saved tensor: torch_imgs/chair/image_678.pt
Saved tensor: torch_imgs/chair/image_382.pt
Saved tensor: torch_imgs/chair/image_1351.pt
Saved tensor: torch_imgs/chair/image_228.pt
Saved tensor: torch_imgs/chair/image_1701.pt
Saved tensor: torch_imgs/chair/image_2087.pt
Saved tensor: torch_imgs/chair/image_33.pt
Saved tensor: torch_

In [15]:
import os
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import IterableDataset, DataLoader

# Define IterableDataset for tensor files
class TensorIterableDataset(IterableDataset):
    def __init__(self, tensor_dir):
        self.tensor_dir = tensor_dir

    def __iter__(self):
        # Walk through the tensor directory and yield tensors
        for subdir, _, files in os.walk(self.tensor_dir):
            for file in files:
                if file.endswith('.pt'):
                    tensor_path = os.path.join(subdir, file)
                    try:
                        # Load tensor
                        img_tensor = torch.load(tensor_path)
                        
                        # Ensure tensor is in correct format (C, H, W)
                        if img_tensor.dtype == torch.uint8:
                            img_tensor = img_tensor.float() / 255.0  # Convert to float and normalize to [0, 1]
                        
                        # Normalize for pretrained model
                        img_tensor = F.normalize(img_tensor, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                        
                        # Yield tensor and its path (for saving predictions)
                        yield img_tensor, tensor_path
                    except Exception as e:
                        print(f"Error loading {tensor_path}: {e}")

# Directory containing saved tensors
tensor_dir = 'torch_imgs'

# Create IterableDataset
dataset = TensorIterableDataset(tensor_dir)

# Create DataLoader
data_loader = DataLoader(dataset, batch_size=2, num_workers=0, collate_fn=lambda x: tuple(zip(*x)))

# Load pretrained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)
model.eval()  # Set to evaluation mode
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# COCO class names (for interpreting predictions)
COCO_CLASSES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Function to visualize predictions
def plot_predictions(img_tensor, predictions, save_path=None):
    img = img_tensor.permute(1, 2, 0).numpy()  # Convert CHW to HWC
    img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])  # Denormalize
    img = np.clip(img, 0, 1)  # Ensure values are in [0, 1]
    
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    ax = plt.gca()
    
    for box, label, score in zip(predictions['boxes'], predictions['labels'], predictions['scores']):
        if score > 0.5:  # Only show predictions with confidence > 0.5
            x_min, y_min, x_max, y_max = box.cpu().numpy()
            width = x_max - x_min
            height = y_max - y_min
            rect = plt.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x_min, y_min - 5, f'{COCO_CLASSES[label]}: {score:.2f}', color='red', fontsize=12, weight='bold')
    
    plt.axis('off')
    if save_path:
        plt.savefig(save_path, bbox_inches='tight')
        plt.close()
    else:
        plt.show()

# Perform inference with DataLoader
for batch_tensors, batch_paths in data_loader:
    try:
        # Move tensors to device
        batch_tensors = [t.to(device) for t in batch_tensors]
        
        # Perform inference
        with torch.no_grad():
            predictions = model(batch_tensors)  # List of prediction dicts for each image
        
        # Process each image in the batch
        for img_tensor, pred, tensor_path in zip(batch_tensors, predictions, batch_paths):
            # Print predictions
            print(f"\nProcessing {tensor_path}")
            for box, label, score in zip(pred['boxes'], pred['labels'], pred['scores']):
                if score > 0.5:  # Filter low-confidence predictions
                    print(f"Detected {COCO_CLASSES[label]} with confidence {score:.2f} at {box.cpu().numpy()}")
            
            # Visualize and save predictions
            save_path = os.path.join('predictions', os.path.relpath(tensor_path, tensor_dir).replace('.pt', '_pred.jpg'))
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            plot_predictions(img_tensor.cpu(), pred, save_path=save_path)
            print(f"Saved prediction visualization: {save_path}")

    except Exception as e:
        print(f"Error processing batch: {e}")


Processing torch_imgs/chair/image_2454.pt
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Processing torch_imgs/chair/image_2514.pt
Detected bench with confidence 0.86 at [ 68.61859 148.28452 114.75587 191.3078 ]
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Processing torch_imgs/chair/image_2897.pt
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Processing torch_imgs/chair/image_2405.pt
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Processing torch_imgs/chair/image_2767.pt
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Processing torch_imgs/chair/image_319.pt
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Processing torch_imgs/chair/image_259.pt
Error processing batch: unsupported operand type(s) for *: 'numpy.ndarray' and 'Tensor'

Pro

KeyboardInterrupt: 