## One Cell

In [None]:
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import matplotlib.pyplot as plt

# Load the annotations file
annotations_file = 'data/training_annotations.csv'
annotations_df = pd.read_csv(annotations_file)

# Function to check if image paths are correct and display images
def check_image_paths(df, image_column):
    for index, row in df.iterrows():
        image_path = row[image_column]
        try:
            # Open the image to check if the path is correct
            image = Image.open(image_path)
            # Display the image
            plt.imshow(image)
            plt.title(f"Image ID: {row['image_id']}")
            plt.show()
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")

# Run the check
check_image_paths(annotations_df, 'image_path')

class CustomDataset(Dataset):
    def __init__(self, annotations_df, transform=None):
        self.annotations = annotations_df
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations.iloc[idx]
        image_path = annotation['image_path']
        image = Image.open(image_path).convert("RGB")

        # Parse bounding boxes
        boxes = [[annotation['left'], annotation['top'],
                  annotation['left'] + annotation['width'], annotation['top'] + annotation['height']]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)  # Ensure the shape is [N, 4]

        # Parse labels
        labels = torch.tensor([1], dtype=torch.int64)

        if self.transform:
            image = self.transform(image)

        target = {"boxes": boxes, "labels": labels}
        return image, target
    
    
from torchvision import transforms

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
])


# Initialize the dataset
dataset = CustomDataset(annotations_df, transform=transform)

# Split dataset
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Ensure at least one sample in each set
if val_size == 0:
    val_size = 1
    train_size -= 1
if test_size == 0:
    test_size = 1
    train_size -= 1

print(f'Train size: {train_size}, Validation size: {val_size}, Test size: {test_size}')

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Data loaders with num_workers set to 0
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)

# Define model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class (frame) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

# Loss and optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-4)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in train_loader:
        optimizer.zero_grad()
        # # Check the structure of images and targets
        # print(f'Images: {type(images)}, Targets: {type(targets)}')
        # print(f'Images shape: {images.shape}')
        
        # # Check the keys in targets
        # print(f'Targets keys: {targets.keys()}')
        # # Check the first target's structure
        # print(f'First target structure: {targets}')
        
        # Convert targets to list of dictionaries
        targets_list = []
        for i in range(images.size(0)):
            target_dict = {}
            target_dict['boxes'] = targets['boxes'][i]
            target_dict['labels'] = targets['labels'][i]
            targets_list.append(target_dict)
        
        # Forward pass
        loss_dict = model(images, targets_list)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()
        # Backward pass
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Training Loss: {epoch_loss / len(train_loader)}")

    # # Validation loop
    # model.eval()
    # val_loss = 0
    # with torch.no_grad():
    #     for images, targets in val_loader:
    #         # Convert targets to list of dictionaries
    #         targets_list = []
    #         for i in range(images.size(0)):
    #             target_dict = {}
    #             target_dict['boxes'] = targets['boxes'][i]
    #             target_dict['labels'] = targets['labels'][i]
    #             targets_list.append(target_dict)

    #         # Forward pass
    #         outputs = model(images, targets_list)

    #         # Calculate validation loss if in training mode
    #         if model.training:
    #             loss_dict = outputs
    #             losses = sum(loss for loss in loss_dict.values())
    #             val_loss += losses.item()
    #         else:
    #             val_loss += 0  # No loss calculation in eval mode

    # if len(val_loader) > 0:
    #     print(f"Epoch {epoch+1}, Validation Loss: {val_loss / len(val_loader)}")
    # else:
    #     print(f"Epoch {epoch+1}, Validation Loss: No validation samples")

# Save the model
torch.save(model.state_dict(), 'model.pth')

# Load the model for inference
model.load_state_dict(torch.load('model.pth'))
model.eval()

## Cell by Cell

### Imports

In [None]:
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import matplotlib.pyplot as plt

### Data Loading

In [None]:
# Load the annotations file
annotations_file = 'data/training_annotations.csv'
annotations_df = pd.read_csv(annotations_file)

In [None]:
# Function to check if image paths are correct and display images
def check_image_paths(df, image_column):
    for index, row in df.iterrows():
        image_path = row[image_column]
        try:
            # Open the image to check if the path is correct
            image = Image.open(image_path)
            # Display the image
            plt.imshow(image)
            plt.title(f"Image ID: {row['image_id']}")
            plt.show()
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")

# Run the check
check_image_paths(annotations_df, 'image_path')

In [None]:
class CustomDataset(Dataset):
    def __init__(self, annotations_df, transform=None):
        self.annotations = annotations_df
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations.iloc[idx]
        image_path = annotation['image_path']
        image = Image.open(image_path).convert("RGB")

        # Parse bounding boxes
        boxes = [[annotation['left'], annotation['top'],
                  annotation['left'] + annotation['width'], annotation['top'] + annotation['height']]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)  # Ensure the shape is [N, 4]

        # Parse labels
        labels = torch.tensor([1], dtype=torch.int64)

        if self.transform:
            image = self.transform(image)

        target = {"boxes": boxes, "labels": labels}
        return image, target

### Transformations

In [None]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
])


In [None]:
# Initialize the dataset
dataset = CustomDataset(annotations_df, transform=transform)

# Split dataset
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Ensure at least one sample in each set
if val_size == 0:
    val_size = 1
    train_size -= 1
if test_size == 0:
    test_size = 1
    train_size -= 1

print(f'Train size: {train_size}, Validation size: {val_size}, Test size: {test_size}')

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Data loaders with num_workers set to 0
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)

### Model

In [None]:
# Define model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class (frame) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

# Loss and optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-4)


### Training

In [None]:
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in train_loader:
        optimizer.zero_grad()
        # # Check the structure of images and targets
        # print(f'Images: {type(images)}, Targets: {type(targets)}')
        # print(f'Images shape: {images.shape}')
        
        # # Check the keys in targets
        # print(f'Targets keys: {targets.keys()}')
        # # Check the first target's structure
        # print(f'First target structure: {targets}')
        
        # Convert targets to list of dictionaries
        targets_list = []
        for i in range(images.size(0)):
            target_dict = {}
            target_dict['boxes'] = targets['boxes'][i]
            target_dict['labels'] = targets['labels'][i]
            targets_list.append(target_dict)
        
        # Forward pass
        loss_dict = model(images, targets_list)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()
        # Backward pass
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Training Loss: {epoch_loss / len(train_loader)}")

    # Validation loop
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, targets in val_loader:
            # Convert targets to list of dictionaries
            targets_list = []
            for i in range(images.size(0)):
                target_dict = {}
                target_dict['boxes'] = targets['boxes'][i]
                target_dict['labels'] = targets['labels'][i]
                targets_list.append(target_dict)

            # Forward pass
            outputs = model(images, targets_list)

            # Calculate validation loss if in training mode
            if model.training:
                loss_dict = outputs
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()
            else:
                val_loss += 0  # No loss calculation in eval mode

    if len(val_loader) > 0:
        print(f"Epoch {epoch+1}, Validation Loss: {val_loss / len(val_loader)}")
    else:
        print(f"Epoch {epoch+1}, Validation Loss: No validation samples")


In [None]:
# Save the model
torch.save(model.state_dict(), 'model.pth')

# Load the model for inference
model.load_state_dict(torch.load('model.pth'))
model.eval()

### Validation
1. Prepare the test data
2. Load the trained model
3. Run inference on test images 
4. Evaluate predictions

In [None]:
# Load the model

model.load_state_dict(torch.load('model.pth'))
model.eval()

In [None]:
import os
import torch
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as T

# Function to run inference on a single image
def run_inference(model, image_path, transform):
    # Load and transform the image
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

    # Move the image to the same device as the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    image_tensor = image_tensor.to(device)
    model = model.to(device)

    # Run inference
    model.eval()
    with torch.no_grad():
        prediction = model(image_tensor)[0]

    return prediction

# Directory containing test images
test_image_dir = 'images/Testing'

# Transform for the images
transform = T.Compose([
    T.ToTensor(),
    # Add other transformations if needed
])

# Load the model
model = models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.load_state_dict(torch.load('model.pth'))

# Get list of test image paths
test_image_paths = [os.path.join(test_image_dir, fname) for fname in os.listdir(test_image_dir) if fname.endswith('.png')]

# Run inference on all test images
for image_path in test_image_paths:
    prediction = run_inference(model, image_path, transform)
    
    # Display the image and the predicted bounding boxes
    image = Image.open(image_path).convert("RGB")
    plt.imshow(image)
    
    ax = plt.gca()
    for box, label, score in zip(prediction['boxes'], prediction['labels'], prediction['scores']):
        if score > 0.5:  # Confidence threshold
            xmin, ymin, xmax, ymax = box
            rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color='red')
            ax.add_patch(rect)
            ax.text(xmin, ymin, f'{label.item()}:{score:.2f}', bbox=dict(facecolor='yellow', alpha=0.5))
    
    plt.title(f"Predictions for {os.path.basename(image_path)}")
    plt.show()


### Evaluate Predictions

In [None]:
import pandas as pd
from PIL import Image
import torch
from torchvision import transforms, models
import matplotlib.pyplot as plt
import os

# Function to load annotations from CSV
def load_annotations(csv_file):
    return pd.read_csv(csv_file)

# Load the ground truth annotations for test images
test_annotations_file = 'data/testing_annotations.csv'
test_annotations_df = load_annotations(test_annotations_file)

# Define the IoU calculation function
def calculate_iou(boxA, boxB):
    # Determine the coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # Compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yA - yB + 1)
    # Compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # Compute the intersection over union
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

# Run inference on a single image
def run_inference(model, image_path):
    # Load and transform the image
    image = Image.open(image_path).convert("RGB")
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    image_tensor = transform(image).unsqueeze(0)
    
    # Run the model
    model.eval()
    with torch.no_grad():
        prediction = model(image_tensor)[0]
    
    # Extract the boxes and labels
    boxes = prediction['boxes'].cpu().numpy()
    labels = prediction['labels'].cpu().numpy()
    scores = prediction['scores'].cpu().numpy()
    
    return image, boxes, labels, scores

# Evaluate the model on test images
def evaluate_model(model, test_annotations_df):
    for index, row in test_annotations_df.iterrows():
        image_path = row['image_path']
        ground_truth_box = [row['left'], row['top'], row['left'] + row['width'], row['top'] + row['height']]
        
        # Run inference
        image, predicted_boxes, labels, scores = run_inference(model, image_path)
        
        # Display the image and the predicted bounding boxes
        plt.imshow(image)
        ax = plt.gca()
        for box, label, score in zip(predicted_boxes, labels, scores):
            if score > 0.5:  # Confidence threshold
                xmin, ymin, xmax, ymax = box
                rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color='red')
                ax.add_patch(rect)
                ax.text(xmin, ymin, f'{label.item()}:{score:.2f}', bbox=dict(facecolor='yellow', alpha=0.5))
        
        # Draw the ground truth box in green
        xmin, ymin, xmax, ymax = ground_truth_box
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color='green')
        ax.add_patch(rect)
        
        plt.title(f"Predictions for {os.path.basename(image_path)}")
        plt.show()
        
        # Calculate IoU
        if len(predicted_boxes) > 0:
            iou = calculate_iou(predicted_boxes[0], ground_truth_box)
            print(f"IoU for {os.path.basename(image_path)}: {iou:.2f}")

# Load the model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class (frame) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.load_state_dict(torch.load('model.pth'))

# Evaluate the model on test images
evaluate_model(model, test_annotations_df)


## New

In [1]:
import pandas as pd
from PIL import Image, ImageDraw
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import matplotlib.pyplot as plt


In [2]:
# Load the annotations file
annotations_file = 'data/training_annotations.csv'
annotations_df = pd.read_csv(annotations_file)


Check image paths

In [3]:
# def check_image_paths(df, image_column):
#     for index, row in df.iterrows():
#         image_path = row[image_column]
#         try:
#             # Open the image to check if the path is correct
#             image = Image.open(image_path)
#             # Display the image
#             plt.imshow(image)
#             plt.title(f"Image ID: {row['image_id']}")
#             plt.show()
#         except Exception as e:
#             print(f"Error loading image {image_path}: {e}")

# # Run the check
# check_image_paths(annotations_df, 'image_path')


Dataset Class

In [4]:
class CustomDataset(Dataset):
    def __init__(self, annotations_df, transform=None):
        self.annotations = annotations_df
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations.iloc[idx]
        image_path = annotation['image_path']
        image = Image.open(image_path).convert("RGB")

        # Parse bounding boxes
        boxes = [[annotation['left'], annotation['top'],
                  annotation['left'] + annotation['width'], annotation['top'] + annotation['height']]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)  # Ensure the shape is [N, 4]

        # Parse labels
        labels = torch.tensor([1], dtype=torch.int64)

        if self.transform:
            image = self.transform(image)

        target = {"boxes": boxes, "labels": labels}
        return image, target


Transformations

In [5]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
])


why am I doing a train/validation and test sample? Why can't I just do a train?

In [None]:
# Initialize the dataset
dataset = CustomDataset(annotations_df, transform=transform)

# Split dataset
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Ensure at least one sample in each set
if val_size == 0:
    val_size = 1
    train_size -= 1
if test_size == 0:
    test_size = 1
    train_size -= 1

print(f'Train size: {train_size}, Validation size: {val_size}, Test size: {test_size}')

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Data loaders with num_workers set to 0
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)


Model

In [None]:
# Define model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class (frame) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)


Training

In [None]:
# Loss and optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-4)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in train_loader:
        optimizer.zero_grad()
        
        # Convert targets to list of dictionaries
        targets_list = []
        for i in range(len(targets)):
            target_dict = {}
            target_dict['boxes'] = targets[i]['boxes']
            target_dict['labels'] = targets[i]['labels']
            targets_list.append(target_dict)
        
        # Forward pass
        loss_dict = model(images, targets_list)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()
        # Backward pass
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Training Loss: {epoch_loss / len(train_loader)}")


Validation (optional)

In [None]:
# Validation loop
model.eval()
val_loss = 0
with torch.no_grad():
    for images, targets in val_loader:
        # Convert targets to list of dictionaries
        targets_list = []
        for i in range(len(targets)):
            target_dict = {}
            target_dict['boxes'] = targets[i]['boxes']
            target_dict['labels'] = targets[i]['labels']
            targets_list.append(target_dict)

        # Forward pass
        outputs = model(images, targets_list)
        loss_dict = model(images, targets_list)
        losses = sum(loss for loss in loss_dict.values())
        val_loss += losses.item()

if len(val_loader) > 0:
    print(f"Epoch {epoch+1}, Validation Loss: {val_loss / len(val_loader)}")
else:
    print(f"Epoch {epoch+1}, Validation Loss: No validation samples")


Save and Load Model

In [None]:
# Save the model
torch.save(model.state_dict(), 'model.pth')

# Load the model for inference
model.load_state_dict(torch.load('model.pth'))
model.eval()


Define Inference Function

In [None]:
def detect_frames(model, image, threshold=0.5):
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    
    image_tensor = transform(image).unsqueeze(0)  # Convert image to tensor and add batch dimension
    with torch.no_grad():
        outputs = model(image_tensor)
    
    # Filter out detections with low scores
    boxes = outputs[0]['boxes'][outputs[0]['scores'] > threshold]
    return boxes


Draw Bounding Boxes and Overlay Graphical Image

In [None]:
from PIL import Image, ImageDraw

def overlay_graphical_image(mockup_image, boxes, graphical_image):
    """
    Draws bounding boxes on the mockup image and overlays a graphical image onto the detected frames.
    
    Parameters:
    - mockup_image: PIL Image object, the mockup image where frames are detected.
    - boxes: Tensor, bounding boxes detected by the model.
    - graphical_image: PIL Image object, the graphical image that will be overlayed.
    
    Returns:
    - mockup_image: PIL Image object, the mockup image with bounding boxes and overlayed graphical image.
    """
    draw = ImageDraw.Draw(mockup_image)
    
    for box in boxes:
        left, top, right, bottom = box
        
        # Draw bounding box
        draw.rectangle([(left, top), (right, bottom)], outline="green", width=3)
        
        # Resize graphical image to fit the bounding box
        graphical_resized = graphical_image.resize((int(right - left), int(bottom - top)))
        
        # Paste graphical image onto the original image (mockup image)
        mockup_image.paste(graphical_resized, (int(left), int(top)), graphical_resized)
    
    return mockup_image


Test the Visualization

In [None]:
import os
import matplotlib.pyplot as plt

# Load the single graphical image
graphical_image_path = 'data/graphical_image.png'  # Replace with your graphical image path
graphical_image = Image.open(graphical_image_path).convert("RGBA")

# Directory containing mockup images to process
mockup_images_dir = 'data/mockup_images'  # Replace with your directory path

# Loop through each mockup image in the directory and apply overlay
for mockup_image_file in os.listdir(mockup_images_dir):
    mockup_image_path = os.path.join(mockup_images_dir, mockup_image_file)
    
    # Load the mockup image
    mockup_image = Image.open(mockup_image_path).convert("RGB")
    
    # Perform detection
    detected_boxes = detect_frames(model, mockup_image)
    
    # Overlay the graphical image and draw bounding boxes
    result_image = overlay_graphical_image(mockup_image.copy(), detected_boxes, graphical_image)
    
    # Display the result
    plt.imshow(result_image)
    plt.title(f"Detected Frames with Overlay: {mockup_image_file}")
    plt.show()
