In [1]:
# from ultralytics.nn.tasks import DetectionModel, ClassificationModel
# from ultralytics import YOLO

In [2]:
# # Load a model
# model = YOLO("yolov8n.pt")  # pretrained YOLOv8n model

# # Run batched inference on a list of images
# results = model(["cycle.jpg"])  # return a list of Results objects

In [3]:
import sys
import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from yolov1.model import Yolov1
from yolov1.dataset import BeanDataset
from yolov1.utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
    upscale_boxes,
)
import torchvision.models as models

In [4]:
seed = 123
torch.manual_seed(seed)

# Hyperparameters etc.
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
BATCH_SIZE = (
    1  # 64 in original paper but I don't have that much vram, grad accum?
)
WEIGHT_DECAY = 0
EPOCHS = 10
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = True
LOAD_MODEL_FILE = "model.pth.tar"
LOAD_MODEL_FILE2 = "vgg16.pth.tar"

TRAIN_IMG_DIR = "/home/paineni/MasterThesis/yolov8/images/train"
TRAIN_LABEL_DIR = "/home/paineni/MasterThesis/yolov8/labels/train"
TEST_IMG_DIR = "/home/paineni/MasterThesis/yolov8/images/test"
TEST_LABEL_DIR = "/home/paineni/MasterThesis/yolov8/labels/test"

In [5]:
import numpy as np


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose(
    [
        transforms.Resize((448, 448)),
        transforms.ToTensor(),
    ]
)

In [6]:
model = Yolov1(split_size=7, num_boxes=2, num_classes=2).to(DEVICE)

vgg16 = models.vgg16(pretrained=True)
optimizer = optim.Adam(
        [
            {"params": model.parameters()},  # parameters of YOLO model
            {"params": vgg16.classifier.parameters()},
        ],  # parameters of VGG-16 classifier
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY,
    )


if LOAD_MODEL:
        # Load YOLO model and optimizer
        load_checkpoint(LOAD_MODEL_FILE, model, optimizer)

        # Load VGG-16 model (no need to load optimizer again)
        load_checkpoint(LOAD_MODEL_FILE2, vgg16)

test_dataset = BeanDataset(
    transform=transform,
    img_dir=TEST_IMG_DIR,
    label_dir=TEST_LABEL_DIR,
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    shuffle=True,
    drop_last=True,
)



=> Loading checkpoint


TypeError: string indices must be integers, not 'str'

In [None]:
# for z, x, y in test_loader:
#     x = x.to(DEVICE)
#     z = z.to(
#         DEVICE
#     )  # Ensure original images are also moved to the correct device
#     for idx in range(1):  # Iterate over each sample in the batch
#         # Get bounding boxes from the model predictions
#         bboxes = cellboxes_to_boxes(model(x))
#         print(f"Number of bounding boxes before NMS: {len(bboxes)}")

#         # Apply non-max suppression
#         bboxes2 = non_max_suppression(
#             bboxes[idx],
#             iou_threshold=0.5,
#             threshold=0.4,
#             box_format="midpoint",
#         )
#         print(f"Number of bounding boxes after NMS: {len(bboxes2)}")

#         # Plot the transformed image with original bounding boxes
#         plot_image(
#             x[idx].permute(1, 2, 0).to("cpu"),
#             bboxes2,
#             "/home/paineni/MasterThesis/yolov8/images/output.jpg",
#         )

#         # Plot the original image with upscaled bounding boxes
#         print(z[idx].permute(1, 2, 0).to("cpu").shape)
#         plot_image(
#             z[idx].permute(1, 2, 0).to("cpu"),
#             bboxes2,
#             "/home/paineni/MasterThesis/yolov8/images/output2.jpg",
#         )
#         break
#     break

In [None]:
from PIL import Image


def process_images_with_bboxes(image, predictions, output_size=(224, 224)):
    """
    Processes images with bounding boxes: crops the regions and resizes them.

    Args:
    - image (PIL.Image or numpy.array): The input image.
    - predictions (list): A list of predictions, where each prediction is a tuple
                          (class, probability, x_center, y_center, width, height).
    - output_size (tuple): The desired output size of the cropped images (default is (224, 224)).

    Returns:
    - list: A list of PIL.Image objects cropped and resized according to the bounding boxes.
    """
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    elif not isinstance(image, Image.Image):
        raise TypeError("Image should be a PIL.Image object or a numpy array.")

    width, height = image.size
    cropped_images = []

    for prediction in predictions:
        class_label, prob, x_center, y_center, box_width, box_height = (
            prediction
        )

        # Calculate bounding box coordinates
        left = (x_center - box_width / 2) * width
        top = (y_center - box_height / 2) * height
        right = (x_center + box_width / 2) * width
        bottom = (y_center + box_height / 2) * height

        # Crop the image
        cropped_img = image.crop((left, top, right, bottom))

        # Resize the cropped image
        resized_img = cropped_img.resize(output_size, Image.LANCZOS)
        resized_img_tensor = (
            torch.from_numpy(np.array(resized_img)).permute(2, 0, 1).float()
        )
        cropped_images.append(resized_img_tensor)

    return cropped_images, class_label

In [None]:
cropped_images = process_images_with_bboxes(
    z[idx].permute(1, 2, 0).to("cpu").numpy(), bboxes2, output_size=(224, 224)
)

In [None]:
type(cropped_images[0])

In [None]:
import matplotlib.pyplot as plt


def plot_cropped_images(cropped_images):
    """
    Plots cropped images from the list of tensors.

    Args:
    - cropped_images (list): A list of torch.Tensor objects representing cropped images.
    """
    num_images = len(cropped_images)
    fig, axs = plt.subplots(1, num_images, figsize=(num_images * 5, 5))

    for idx, cropped_img in enumerate(cropped_images):
        # Convert tensor to numpy array for plotting
        img_np = cropped_img
        axs[idx].imshow(img_np)
        axs[idx].set_title(f"Image {idx + 1}")
        axs[idx].axis("off")

    plt.show()

In [None]:
plot_cropped_images(cropped_images)

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# from matplotlib.ticker import MultipleLocator

# # Internship Timeline
# timeline = [
#     ("Categorization of New Images", 3),
#     ("Validating Data Quality", 4),
#     ("Model Experimenting, Training and Validation", 6),
#     ("Finalizing More Promising Detection Model", 2),
#     ("Deploying Model in Production Env", 2),
#     ("Adding CI Pipeline (Optional)", 2),
# ]

# # Base time
# base_time = 0.5  # weeks

# # Create Gantt chart
# fig, ax = plt.subplots(figsize=(10, 6), dpi=300)
# fig.patch.set_facecolor('#18242c')  # Set the background color of the figure

# # Set initial position for bars
# y_positions = range(len(timeline))
# start = 0

# for i, (task, weeks) in enumerate(timeline):
#     relative_duration = max(0, weeks - base_time) / (
#         max([dur for _, dur in timeline]) - base_time
#     )
#     color = plt.cm.Oranges(relative_duration)
#     ax.barh(
#         y_positions[i],
#         width=weeks,
#         left=start,
#         color=color,
#         label=f"{weeks} weeks",
#     )
#     start += weeks

# # Beautify the plot
# ax.set_facecolor('#18242c')  # Set the background color of the axes
# plt.yticks(y_positions, [task for task, _ in timeline], color='white')  # Set y-ticks text to white
# plt.xlabel("Timeline (Weeks)", color='white')  # Set x-label text to white
# plt.title("PCB Defect Detection", color='white')  # Set title text to white

# # Set x-axis ticks at intervals of 2 weeks
# ax.xaxis.set_major_locator(MultipleLocator(2))

# # Change x and y axis ticks color
# ax.tick_params(axis='x', colors='white')
# ax.tick_params(axis='y', colors='white')

# # Change the color of the axes frame to white and lines to dotted
# ax.spines['top'].set_color('white')
# ax.spines['right'].set_color('white')
# ax.spines['bottom'].set_color('white')
# ax.spines['left'].set_color('white')

# # Set line style to dotted
# ax.spines['top'].set_linestyle(':')
# ax.spines['right'].set_linestyle(':')
# ax.spines['bottom'].set_linestyle(':')
# ax.spines['left'].set_linestyle(':')

# # Add legend
# handles, labels = ax.get_legend_handles_labels()
# legend = ax.legend(handles, labels, loc="lower right", title="Duration")
# plt.setp(legend.get_title(), color='white')  # Set legend title color to white
# plt.setp(legend.get_texts(), color='white')  # Set legend text color to white

# # Set the legend background color
# legend.get_frame().set_facecolor('#18242c')

# plt.tight_layout()
# plt.show()