In [None]:
import cv2
import numpy as np

# Load YOLO model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Load COCO dataset classes
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Define a list of colors for each class
colors = np.random.uniform(0, 255, size=(len(classes), 3))

# Load input image
image = cv2.imread("image.jpg")

# Convert image to blob
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)

# Set input of YOLO network to blob
net.setInput(blob)

# Perform forward pass and get output
output_layers = net.getUnconnectedOutLayersNames()
layer_outputs = net.forward(output_layers)

# Define confidence threshold and NMS threshold
conf_threshold = 0.5
nms_threshold = 0.4

# Parse output and get bounding boxes
boxes = []
confidences = []
class_ids = []

for output in layer_outputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > conf_threshold:
            center_x = int(detection[0] * image.shape[1])
            center_y = int(detection[1] * image.shape[0])
            w = int(detection[2] * image.shape[1])
            h = int(detection[3] * image.shape[0])
            x = center_x - w // 2
            y = center_y - h // 2
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

# Apply non-maximum suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

# Draw bounding boxes and class labels
for i in indices:
    i = i[0]
    x, y, w, h = boxes[i]
    label = str(classes[class_ids[i]])
    color = colors[class_ids[i]]
    cv2.rectangle(image, (x, y), (x+w, y+h), color, 2


SyntaxError: ignored

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define the transformer-based architecture
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes, num_heads, num_layers):
        super().__init__()
        
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(hidden_dim, num_heads, dim_feedforward=4*hidden_dim), 
            num_layers
        )
        self.classifier = nn.Linear(hidden_dim, num_classes)
        
    def forward(self, x):
        x = self.embedding(x)
        x = x.transpose(0, 1)
        x = self.transformer(x)
        x = x.mean(dim=0)
        x = self.classifier(x)
        return x

# Load the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

# Initialize the model and optimizer
model = TransformerClassifier(input_dim=3*32*32, hidden_dim=256, num_classes=10, num_heads=8, num_layers=6)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Train the model
criterion = nn.CrossEntropyLoss()
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader, 0):
        inputs = inputs.view(-1, 3*32*32)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Loss: {running_loss/100:.3f}")
            running_loss = 0.0

# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs = inputs.view(-1, 3*32*32)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f"Accuracy on test set: {accuracy:.2f}%")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


AssertionError: ignored

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define the masked autoencoder architecture
class MaskedAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_patches):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(input_dim//num_patches, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim//num_patches)
        )
        
    def forward(self, x, mask):
        visible = x * mask
        hidden = self.encoder(visible)
        reconstructed = self.decoder(hidden)
        return reconstructed

# Load the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

# Initialize the model and optimizer
model = MaskedAutoencoder(input_dim=3*32*32, hidden_dim=256, num_patches=4)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Define the masking function
def mask_input(x, prob):
    mask = torch.zeros_like(x).bernoulli_(prob)
    return x * mask, mask

# Train the model
criterion = nn.MSELoss()
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader, 0):
        inputs = inputs.view(-1, 3*32*32)
        inputs, mask = mask_input(inputs, 0.75)
        optimizer.zero_grad()
        outputs = model(inputs, mask)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Loss: {running_loss/100:.3f}")
            running_loss = 0.0

# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs = inputs.view(-1, 3*32*32)
        outputs = model(inputs, torch.ones_like(inputs))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f"Accuracy on test set: {accuracy:.2f}%")


Files already downloaded and verified
Files already downloaded and verified


RuntimeError: ignored

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the encoder and decoder networks
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU(inplace=True)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        return x

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.deconv1 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)
        self.deconv2 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1)
        self.deconv3 = nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1)
        self.relu = nn.ReLU(inplace=True)
        
    def forward(self, x):
        x = self.relu(self.deconv1(x))
        x = self.relu(self.deconv2(x))
        x = self.deconv3(x)
        return x

# Define the masked autoencoder model
class MaskedAutoencoder(nn.Module):
    def __init__(self):
        super(MaskedAutoencoder, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
        
    def forward(self, x, mask):
        # Mask the input
        masked_x = x * mask
        
        # Encode the visible subset of patches
        encoded = self.encoder(masked_x)
        
        # Decode the latent representation and mask tokens
        reconstructed = self.decoder(encoded) * mask
        
        return reconstructed
        
# Load the dataset and define the mask
dataset = ...
mask = torch.zeros((batch_size, channels, height, width))
mask[:, :, :height//2, :width//2] = 1

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch in dataset:
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        inputs = batch['image']
        outputs = model(inputs, mask)
        loss = criterion(outputs, inputs)

        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print('Epoch %d loss: %.3f' % (epoch + 1, running_loss / len(dataset)))


NameError: ignored

In [5]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

# Define the encoder-decoder model
class EncoderDecoder(nn.Module):
    def __init__(self):
        super(EncoderDecoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(16, 3, kernel_size=2, stride=2),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Define training parameters
batch_size = 128
num_epochs = 50
learning_rate = 0.001

# Load the CIFAR10 dataset
train_dataset = dsets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor())

# Create data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model and optimizer
model = EncoderDecoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for i, (images, _) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, images)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print progress
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy: {} %'.format(100 * correct / total))


Files already downloaded and verified
Epoch [1/50], Loss: 0.0210
Epoch [2/50], Loss: 0.0162
Epoch [3/50], Loss: 0.0150
Epoch [4/50], Loss: 0.0111
Epoch [5/50], Loss: 0.0137
Epoch [6/50], Loss: 0.0109
Epoch [7/50], Loss: 0.0115
Epoch [8/50], Loss: 0.0100
Epoch [9/50], Loss: 0.0095
Epoch [10/50], Loss: 0.0095
Epoch [11/50], Loss: 0.0096
Epoch [12/50], Loss: 0.0092
Epoch [13/50], Loss: 0.0092
Epoch [14/50], Loss: 0.0083
Epoch [15/50], Loss: 0.0088
Epoch [16/50], Loss: 0.0080
Epoch [17/50], Loss: 0.0082
Epoch [18/50], Loss: 0.0080
Epoch [19/50], Loss: 0.0083
Epoch [20/50], Loss: 0.0080
Epoch [21/50], Loss: 0.0070
Epoch [22/50], Loss: 0.0076
Epoch [23/50], Loss: 0.0071
Epoch [24/50], Loss: 0.0066
Epoch [25/50], Loss: 0.0067
Epoch [26/50], Loss: 0.0067
Epoch [27/50], Loss: 0.0074
Epoch [28/50], Loss: 0.0064
Epoch [29/50], Loss: 0.0064
Epoch [30/50], Loss: 0.0070
Epoch [31/50], Loss: 0.0063
Epoch [32/50], Loss: 0.0064
Epoch [33/50], Loss: 0.0063
Epoch [34/50], Loss: 0.0069
Epoch [35/50], Loss

RuntimeError: ignored

In [6]:
import torch
import torch.nn as nn

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        return x

class Classifier(nn.Module):
    def __init__(self, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(64*8*8, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class EncoderClassifier(nn.Module):
    def __init__(self, num_classes):
        super(EncoderClassifier, self).__init__()
        self.encoder = Encoder()
        self.classifier = Classifier(num_classes)

    def forward(self, x):
        x = self.encoder(x)
        x = self.classifier(x)
        return x


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

# Define transforms for the dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load the CIFAR-10 dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Define the data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Define the Encoder module
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        return x

# Define the Classifier module
class Classifier(nn.Module):
    def __init__(self, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(64*8*8, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the Encoder-Classifier module
class EncoderClassifier(nn.Module):
    def __init__(self, num_classes):
        super(EncoderClassifier, self).__init__()
        self.encoder = Encoder()
        self.classifier = Classifier(num_classes)

    def forward(self, x):
        x = self.encoder(x)
        x = self.classifier(x)
        return x

# Instantiate the model and define the loss and optimizer
model = EncoderClassifier(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Train the model
num_epochs = 50
for epoch in range(num_epochs):
    # Train for one epoch
    model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    #


Files already downloaded and verified
Files already downloaded and verified




In [None]:
import torch
from PIL import Image
from torchvision import transforms

# Load the model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Load the image
image = Image.open('path/to/image.jpg')

# Apply transformations to the image
transform = transforms.Compose([
    transforms.Resize(size=(640, 640)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
image = transform(image)

# Perform object detection on the image
results = model(image)

# Print the detected objects and their confidence scores
for obj in results.xyxy[0]:
    print(obj[-1], obj[-2])


In [None]:
import numpy as np

def generate_anchor_boxes(base_size, ratios, scales):
    """
    Generates a set of anchor boxes based on the given base size, aspect ratios and scales.
    
    Args:
        base_size (int): The base size of the anchor box.
        ratios (list of floats): The aspect ratios of the anchor boxes.
        scales (list of floats): The scales of the anchor boxes.
        
    Returns:
        anchor_boxes (numpy array): An array of shape (len(ratios) * len(scales), 4) containing the coordinates
                                    of the anchor boxes.
    """
    num_boxes = len(ratios) * len(scales)
    anchor_boxes = np.zeros((num_boxes, 4))
    
    # Compute the areas of the anchor boxes for each aspect ratio and scale
    areas = base_size * base_size
    areas_ratios = areas / np.array(ratios)
    areas_scales = areas_ratios[:, np.newaxis] * np.array(scales)[np.newaxis, :]
    
    # Compute the widths and heights of the anchor boxes for each aspect ratio and scale
    widths = np.round(np.sqrt(areas_scales.reshape(-1)))
    heights = np.round(widths * ratios[:, np.newaxis])
    
    # Compute the x and y coordinates of the center of the anchor boxes for each aspect ratio and scale
    x_centers = np.arange(0, base_size, base_size / len(scales))
    y_centers = np.arange(0, base_size, base_size / len(ratios))
    
    # Generate all possible combinations of x and y coordinates for each aspect ratio and scale
    x_centers, y_centers = np.meshgrid(x_centers, y_centers)
    x_centers = x_centers.reshape(-1)
    y_centers = y_centers.reshape(-1)
    
    # Assign the coordinates of the anchor boxes
    anchor_boxes[:, 0] = x_centers - 0.5 * widths.reshape(-1)
    anchor_boxes[:, 1] = y_centers - 0.5 * heights.reshape(-1)
    anchor_boxes[:, 2] = x_centers + 0.5 * widths.reshape(-1)
    anchor_boxes[:, 3] = y_centers + 0.5 * heights.reshape(-1)
    
    return anchor_boxes


In [None]:
import numpy as np

def get_box_dimensions(boxes, img_width, img_height):
    box_centers = np.zeros_like(boxes)
    box_sizes = np.zeros_like(boxes)

    # Compute center coordinates
    box_centers[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
    box_centers[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0

    # Compute box width and height
    box_sizes[:, 0] = boxes[:, 2] - boxes[:, 0]
    box_sizes[:, 1] = boxes[:, 3] - boxes[:, 1]

    # Normalize by image dimensions
    box_centers[:, 0] /= img_width
    box_centers[:, 1] /= img_height
    box_sizes[:, 0] /= img_width
    box_sizes[:, 1] /= img_height

    return box_centers, box_sizes


In [None]:
import matplotlib.pyplot as plt

def plot_box_dimensions(box_sizes):
    plt.scatter(box_sizes[:, 0], box_sizes[:, 1])
    plt.xlabel('Width')
    plt.ylabel('Height')
    plt.show()


In [None]:
from sklearn.cluster import KMeans

def compute_anchors(boxes, num_clusters):
    # Get box dimensions
    box_centers, box_sizes = get_box_dimensions(boxes)

    # Use K-means clustering to compute centroids
    kmeans = KMeans(n_clusters=num_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(box_sizes)

    # Get cluster centers
    anchors = kmeans.cluster_centers_

    return anchors


In [None]:
def iou(box1, box2):
    """
    Calculate Intersection over Union (IOU) between two bounding boxes
    :param box1: Tuple of (x1, y1, x2, y2)
    :param box2: Tuple of (x1, y1, x2, y2)
    :return: IOU between box1 and box2
    """
    # Calculate area of intersection
    x_left = max(box1[0], box2[0])
    y_top = max(box1[1], box2[1])
    x_right = min(box1[2], box2[2])
    y_bottom = min(box1[3], box2[3])
    if x_right < x_left or y_bottom < y_top:
        intersection_area = 0.0
    else:
        intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Calculate area of union
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - intersection_area

    # Calculate IOU
    iou = intersection_area / union_area if union_area != 0 else 0.0
    return iou


In [None]:
max_iou_list = []
for bbox in bboxes:
    max_iou = 0.0
    for anchor in anchors:
        iou_score = iou(bbox, anchor)
        max_iou = max(max_iou, iou_score)
    max_iou_list.append(max_iou)

mean_max_iou = sum(max_iou_list) / len(max_iou_list)
print("Mean of maximum IOU between bounding boxes and individual anchors:", mean_max_iou)


In [None]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

# Compute the centre location, width and height of each bounding box, and normalise it by image dimensions
centres = []
widths = []
heights = []

for bbox in bboxes:
    x1, y1, x2, y2 = bbox
    centre_x = (x1 + x2) / 2
    centre_y = (y1 + y2) / 2
    centre_norm_x = centre_x / img_width
    centre_norm_y = centre_y / img_height
    centres.append([centre_norm_x, centre_norm_y])

    width = x2 - x1
    width_norm = width / img_width
    widths.append(width_norm)

    height = y2 - y1
    height_norm = height / img_height
    heights.append(height_norm)

# Plot h vs w for each box
plt.scatter(widths, heights)
plt.xlabel("Width")
plt.ylabel("Height")
plt.title("Bounding Box Width vs Height")
plt.show()

# Create a list of anchor boxes
aspect_ratios = [1, 2, 3, 1/2, 1/3]


In [None]:
import numpy as np

# set IOU threshold
iou_threshold = 0.65

# get IOU values for each anchor box
ious = []
for anchor in anchors:
    iou = bbox_iou(anchor, np.zeros_like(anchor))
    ious.append(iou)
ious = np.array(ious)

# get the top 5 anchor boxes based on IOU threshold
top_anchors = np.argsort(ious.max(axis=1))[::-1][:5]
