In [None]:
import os
import pathlib
import cv2
import boto3
import matplotlib.pyplot as plt

## Read Data Connection Variables

In [None]:
AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
AWS_S3_BUCKET = os.environ.get("AWS_S3_BUCKET")
AWS_S3_ENDPOINT = os.environ.get("AWS_S3_ENDPOINT")

## Define S3 Connection

In [None]:
session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY
)
client = session.client("s3", endpoint_url=AWS_S3_ENDPOINT)

## Download and Extract the Dataset

In [None]:
if not AWS_ACCESS_KEY_ID:
    # If S3 is not used in the demo, we can fallback
    # to the _scenario_resources directory.
    !cp _scenario_resources/dataset.tar.gz dataset.tar.gz
else:
    client.download_file(AWS_S3_BUCKET, "dataset.tar.gz", "dataset.tar.gz")
    print("Files downloaded from S3")

!tar -xzf dataset.tar.gz

## Inspect the Data

In [None]:
!ls -l dataset/

In [None]:
!ls -l dataset/images

### Number of Training Images

In [None]:
!ls -1 dataset/images/train | wc -l

### Number of Validation Images

In [None]:
!ls -1 dataset/images/val | wc -l

### Number of Test Images

In [None]:
!ls -1 dataset/images/test | wc -l

## Visualize the Data

### Images

In [None]:
files = [f for f in os.listdir("dataset/images/train")][:9]
fig = plt.figure(figsize=(10, 7))
rows, columns = 3, 3
for i, file in enumerate(files):
    fig.add_subplot(rows, columns, i+1)
    image = cv2.imread(str(pathlib.Path("dataset/images/train") / file))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)

### Labels

In [None]:
labels = [f.replace("images", "labeles").replace(".jpg", ".txt") for f in files][2:]
for l in labels:
    with open(pathlib.Path("dataset/labels/train") / l) as f:
        print(l)
        print(f.read())

### Images and Labels

In [None]:
CLASSES = {0: "SpeedLimit", 1: "DangerAhead"}


# Utility function to draw bounding boxes on an image
def draw_bounding_boxes(image, bboxes, ):
    for bbox in bboxes:
        class_id, x_center, y_center, width, height = bbox

        label = CLASSES[int(class_id)] + f" ({class_id})"


        img_h, img_w = image.shape[:2]
        x1 = int((x_center - width / 2) * img_w)
        y1 = int((y_center - height / 2) * img_h)
        x2 = int((x_center + width / 2) * img_w)
        y2 = int((y_center + height / 2) * img_h)

        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 0), 2)
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    return image

# Function to read bounding boxes from a YOLO format text file
def read_bounding_boxes(label_file):
    bboxes = []
    with open(label_file, 'r') as f:
        for line in f.readlines():
            parts = line.strip().split()
            bboxes.append([float(x) for x in parts])
    return bboxes


image_path = "dataset/images/val/small_IMG_1522.jpg"
label_file = "dataset/labels/val/small_IMG_1522.txt"

image = cv2.imread(image_path)
bboxes = read_bounding_boxes(label_file)
image_with_boxes = draw_bounding_boxes(image, bboxes)

# Display the image with bounding boxes
plt.figure(figsize=(10, 10))
plt.imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()