### Visualise Loss and mAP

In [None]:
import matplotlib.pyplot as plt

losses = []
accuracies = []

# Losses and accurances are stored in the results.csv file in the save_dir
with open("chkpts/ART/yolov8x-semiseg-artnet/results.csv") as file:
    lines = file.readlines()
    losses = [float(line.split(",")[1]) for line in lines[1:]]
    accuracies = [float(line.split(",")[7]) for line in lines[1:]]    

# append artnet2 results
with open("chkpts/ART/yolov8x-semiseg-artnet2/results.csv") as file:
    lines = file.readlines()
    losses += [float(line.split(",")[1]) for line in lines[1:]]
    accuracies += [float(line.split(",")[7]) for line in lines[1:]]

# append artnet4 results
with open("chkpts/ART/yolov8x-semiseg-artnet4/results.csv") as file:
    lines = file.readlines()
    losses += [float(line.split(",")[1]) for line in lines[1:]]
    accuracies += [float(line.split(",")[7]) for line in lines[1:]]
    
# Plot the training loss and validation mAP
plt.figure(figsize=(10, 5))
# Epoch starts at 1 and are all integers
epochs = range(1, len(losses) + 1)
epochs = [int(epoch) for epoch in epochs]
plt.subplot(1, 2, 1)
plt.xticks(epochs[::10], rotation=45)
plt.plot(epochs, losses)
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.subplot(1, 2, 2)
plt.xticks(epochs[::10], rotation=45)
plt.plot(epochs, accuracies)
plt.title("Validation mAP")
plt.xlabel("Epoch")
plt.ylabel("mAP")
plt.tight_layout()
plt.savefig("results/ART/metrics.png")
plt.show()

# output best accuracy and epoch
best_accuracy = max(accuracies)
best_epoch = accuracies.index(best_accuracy) + 1
print(f"Best accuracy: {best_accuracy} at epoch {best_epoch}")

### Create validation split

In [None]:
import os
import random
import shutil

random.seed(42)

# Define paths
base_path = "D:/Data/PETRAW/"
# folders = ["images", "labels", "masks", "tool_tip_masks"]
folders = ["images", "labels"]
train_folder = "test"
val_folder = "one_test"
val_split = 0.01

# Create validation folders if they do not exist
for folder in folders:
    os.makedirs(os.path.join(base_path, folder, val_folder), exist_ok=True)

# Get list of all files in the train folders (using images as reference)
train_images_path = os.path.join(base_path, "images", train_folder)
all_files = [
    f
    for f in os.listdir(train_images_path)
    if os.path.isfile(os.path.join(train_images_path, f))
]

# Determine the number of validation samples
num_val_samples = int(len(all_files) * val_split)
print(len(all_files))
# Randomly select files for the validation set
val_files = random.sample(all_files, num_val_samples)

# Move selected files to the validation folders
for folder in folders:
    train_path = os.path.join(base_path, folder, train_folder)
    val_path = os.path.join(base_path, folder, val_folder)

    for file in val_files:
        file_name, file_ext = os.path.splitext(file)

        # Find corresponding file (allowing for different extensions)
        corresponding_file = None
        for ext in [".png", ".npy", ".txt"]:
            if os.path.exists(os.path.join(train_path, file_name + ext)):
                corresponding_file = file_name + ext
                break

        if corresponding_file:
            shutil.move(
                os.path.join(train_path, corresponding_file),
                os.path.join(val_path, corresponding_file),
            )

print("Test set created with {} samples.".format(num_val_samples))

56473 in val first
567 in 1% data in initial training and validation and test sets

### Combined Data Model

In [None]:
import os 

dataset_path = "D:/Data/Combined"

# Create a configuration file for YOLOv8
config_content = f"""
datasets: 
train: {dataset_path}/images/train
val: {dataset_path}/images/val

nc: 2  # number of classes
names: ['tool', 'tool']  # class names
"""

# config_content = f"""
# train: {dataset_path}/images/train
# val: {dataset_path}/images/val

# nc: 1  # number of classes
# names: ['tool']  # class names
# """

config_path = os.path.join("yaml/data-combined.yaml")
with open(config_path, "w") as file:
    file.write(config_content)

### Configuration Example 1

In [None]:
# Create a configuration file for YOLOv8 test
config_content_test = f"""
train: {dataset_path}/images/one
val: {dataset_path}/images/one_test

nc: 2  # number of classes
names: ['left_tool', 'right_tool']  # class names
"""
config_path_test = os.path.join(dataset_path, "data-small-test.yaml")
with open(config_path_test, "w") as file:
    file.write(config_content_test)

### Configuration Example 2

In [None]:
# Create a configuration file for YOLOv8 test
config_content_final = f"""
train: 
  - {dataset_path}/images/one
  - {dataset_path}/images/one_val
val: {dataset_path}/images/one_test

nc: 2  # number of classes
names: ['left_tool', 'right_tool']  # class names
"""
config_path_final = os.path.join(dataset_path, "data-small-final.yaml")
with open(config_path_final, "w") as file:
    file.write(config_content_final)

### Running Best Model

In [None]:
import os
import matplotlib.pyplot as plt
from ultralytics import YOLO
from multiprocessing import freeze_support
import torch

print(torch.cuda.is_available())
print(torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = YOLO("chkpts/ART/yolov8x-semiseg-artnet3/weights/best.pt")
model.to(device)
results = model.val(data=config_path_test)

### YOLO boxes information

    Calculates and aggregates detection and segmentation metrics over a given set of classes.

    Args:
        save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
        plot (bool): Whether to save the detection and segmentation plots. Default is False.
        on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
        names (list): List of class names. Default is an empty list.

    Attributes:
        save_dir (Path): Path to the directory where the output plots should be saved.
        plot (bool): Whether to save the detection and segmentation plots.
        on_plot (func): An optional callback to pass plots path and data when they are rendered.
        names (list): List of class names.
        box (Metric): An instance of the Metric class to calculate box detection metrics.
        seg (Metric): An instance of the Metric class to calculate mask segmentation metrics.
        speed (dict): Dictionary to store the time taken in different phases of inference.

    Methods:
        process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
        mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
        class_result(i): Returns the detection and segmentation metrics of class `i`.
        maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
        fitness: Returns the fitness scores, which are a single weighted combination of metrics.
        ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
        results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.

### Metrics Result

In [None]:
results.results_dict

### Tracking

In [None]:
# results = model.track("/Volumes/Exodus/Data/6DOF 2023/Test 1/Dataset.mp4", tracker="bytetrack.yaml", save=True, show=True)

results = model.track(
    "data/6DOF/Dataset.mp4",
    tracker="bytetrack.yaml",
    save=True,
    show=True,
)

# results = model.track(
#     "/Volumes/Exodus/Data/EndoVis 2015/Tracking (Raw Video)/Dataset1/Video.avi",
#     tracker="bytetrack.yaml",
#     save=True,
#     show=True,
# )

### Remove Extra Boxes

In [None]:
import numpy as np
import torch

# Function to remove overlapping boxes using Non-Max Suppression
def non_max_suppression(boxes, scores, iou_threshold):
    if len(boxes) == 0:
        return []

    # Convert to numpy arrays
    boxes = np.array(boxes)
    scores = np.array(scores)

    # Compute areas of each box
    areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
    order = scores.argsort()[::-1]  # Sort by score in descending order

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        if order.size == 1:
            break
        xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
        yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
        xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
        yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        inter = w * h

        iou = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(iou <= iou_threshold)[0]
        order = order[inds + 1]

    return keep


def process_image(image_path, file, model, iou_threshold=0.3, confidence_threshold=0.5, max_boxes=2):
    # Perform inference on a new image
    results = model(image_path, save=False, show=False, verbose=False)

    # Extract the results
    boxes = results[0].boxes.xyxy.cpu().numpy()
    scores = results[0].boxes.conf.cpu().numpy()

    # Step 1: Filter out boxes with confidence scores less than the threshold
    filtered_indices = np.where(scores >= confidence_threshold)[0]
    filtered_boxes = boxes[filtered_indices]
    filtered_scores = scores[filtered_indices]

    # Step 2: Remove overlapping boxes using Non-Max Suppression
    nms_indices = non_max_suppression(filtered_boxes, filtered_scores, iou_threshold)

    # Keep only the top boxes
    top_indices = nms_indices[:max_boxes]

    # Update results with filtered boxes
    results[0].boxes = results[0].boxes[top_indices]

    # Display the results
    # results[0].show()

    # Store image in tmp
    tmp = "tmp/" + file.replace(".bmp", "_tmp.png")
    results[0].save(tmp)

# test_dir = "/Volumes/Exodus/Data/6DOF 2023/Test 1/"
test_dir = "/Volumes/Exodus/Data/EndoVis 2015/Testing (Raw Images)/OP1/"

# For all bmp files
for file in sorted(os.listdir(test_dir)):
    if file.endswith(".png") and not file.startswith("._"):
        test_path = os.path.join(test_dir, file)
        process_image(test_path, file, model)

### Combine bmp files to mp4

In [None]:
import imageio.v2 as imageio


# Combine all bmp files to video
# Create video from BMP files
def stitch_images(bmp_directory, output_video_path):
    # List all BMP files in the directory
    bmp_files = sorted(
        [
            os.path.join(bmp_directory, f)
            for f in os.listdir(bmp_directory)
            if f.endswith(".png") and not f.startswith(".")
        ]
    )

    # Create a video writer object
    with imageio.get_writer(output_video_path, fps=24) as writer:
        for file_path in bmp_files:
            image = imageio.imread(file_path)
            writer.append_data(image)

    print(f"Video saved at {output_video_path}")


output_image_name = "results/Test 1/yolo8-1.mp4"

# Stitch the images together
# stitch_images("tmp", output_image_name)

### Plot ground truth and model prediction

In [None]:
import cv2

# Use label file to draw original bounding box on original image
sample_label = "/Volumes/Exodus/Data/ART-Net/labels/val/Test_Pos_sample_0001.txt"
sample_image = (
    "/Volumes/Exodus/Data/ART-Net/Test/Test_Positive/Test_Pos_sample_0001.png"
)

# Draw plot with two images, one of sample image and one with results[0]
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
image = cv2.imread(sample_image)
with open(sample_label, "r") as lf:
    for line in lf:
        label = line.strip().split(" ")
    x_center, y_center, width, height = map(float, label[1:])
    image = cv2.imread(sample_image)
    x = int((x_center - width / 2) * image.shape[1])
    y = int((y_center - height / 2) * image.shape[0])
    w = int(width * image.shape[1])
    h = int(height * image.shape[0])
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis("off")

# plt.subplot(1, 2, 2)
# save image
# results[0].save("tmp/results.png")
# # display image
# plt.imshow(cv2.cvtColor(cv2.imread("tmp/results.png"), cv2.COLOR_BGR2RGB))
# plt.axis("off")
# plt.show()

### Remove black border from frame

In [None]:
# I have a video data/6DOF/Dataset.mp4 which has a weird black border around it
# I want to crop the video to remove the black border

import cv2
import numpy
import os

# Read the video
video_path = "data/6DOF/Dataset.mp4"

if not os.path.exists("tmp_img"):
    os.makedirs("tmp_img")
    
# convert video to images and store in temp_img folder
cap = cv2.VideoCapture(video_path)
i = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    cv2.imwrite(f"tmp_img/{i}.png", frame)
    i += 1
cap.release()

img = cv2.imread(f"tmp_img/0.png")
# iterate across all 4 sides and find the first non-black pixel
top = 0
bottom = img.shape[0]
left = 0
right = img.shape[1]
for y in range(0, img.shape[0]):
    if not numpy.all(img[y] == 0):
        top = y
        break
for y in range(img.shape[0] - 1, 0, -1):
    if not numpy.all(img[y] == 0):
        bottom = y
        break
for x in range(0, img.shape[1]):
    if not numpy.all(img[:, x] == 0):
        left = x
        break
for x in range(img.shape[1] - 1, 0, -1):
    if not numpy.all(img[:, x] == 0):
        right = x
        break
        
# Now for all images, iterate over them and crop the black border in all 4 directions
img_array = []
for i in range(0, i):
    img = cv2.imread(f"tmp_img/{i}.png")
    # crop the image
    img = img[top:bottom, left:right]
    # save the image
    cv2.imwrite(f"tmp_img/{i}.png", img)
    height, width, layers = img.shape
    size = (width, height)
    img_array.append(img)
    
# Store video at 30 fps
out = cv2.VideoWriter("data/6DOF/Dataset_cropped.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size)
for i in range(len(img_array)):
    out.write(img_array[i])
out.release()

### Preprocess same image names

In [None]:
# Rename every image inside each folder D:\Data\PETRAW\Training\Training\Images to include its folder name (e.g. 001) to the front (as a string)
import os

# Path to the folder containing the images
path = "D:/Data/PETRAW/Test/Images/"
# path = "D:/Data/PETRAW/labels/test/"
labels = "D:/Data/PETRAW/images/test"
# labels = "D:/Data/PETRAW/labels/test"

# List all the directories in the folder
dirs = os.listdir(path)
# filter out .txt
dirs = [dir for dir in dirs if not dir.endswith(".txt")]
print(dirs)
# quit()

# For each directory
for dir in dirs:
    # List all the files in the directory
    files = os.listdir(path + dir)
    # For each file
    for file in files:
        if file.startswith("frame"):
            # make a copy of the file with the directory name in front in labels folder
            os.rename(path + dir + "/" + file, labels + "/" + dir + "_" + file)

    # delete the directory forcefully
    try:
        os.rmdir(path + dir)
    except OSError as e:
        # delete all files beginning with .
        files = os.listdir(path + dir)
        for file in files:
            if file.startswith("."):
                os.remove(path + dir + "/" + file)
        # delete the directory forcefully
        os.rmdir(path + dir)
    print(f"Deleted {dir}")

### Convert MP4 to PNG

In [2]:
# Write a script which takes in a mp4 file and converts it to a series of images, labelling each one with {frame_number}.png

import cv2
import os


def convert_to_imgs(path):
    os.makedirs("data/EndoVis 2015/Video 6", exist_ok=True)
    # Create a VideoCapture object
    cap = cv2.VideoCapture(path)
    # Get the number of frames
    n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    # For each frame
    for i in range(n):
        # Read the frame
        ret, frame = cap.read()
        # Save the frame as a png image
        cv2.imwrite(f"data/EndoVis 2015/Video 6/frame_{str(i).zfill(4)}.png", frame)
        print(f"Saved frame {i}")
    # Release the VideoCapture object
    cap.release()


# Path to the video file
path = "data\EndoVis 2015\Tracking Rigid Testing Revision 6.mp4"
convert_to_imgs(path)

Saved frame 0
Saved frame 1
Saved frame 2
Saved frame 3
Saved frame 4
Saved frame 5
Saved frame 6
Saved frame 7
Saved frame 8
Saved frame 9
Saved frame 10
Saved frame 11
Saved frame 12
Saved frame 13
Saved frame 14
Saved frame 15
Saved frame 16
Saved frame 17
Saved frame 18
Saved frame 19
Saved frame 20
Saved frame 21
Saved frame 22
Saved frame 23
Saved frame 24
Saved frame 25
Saved frame 26
Saved frame 27
Saved frame 28
Saved frame 29
Saved frame 30
Saved frame 31
Saved frame 32
Saved frame 33
Saved frame 34
Saved frame 35
Saved frame 36
Saved frame 37
Saved frame 38
Saved frame 39
Saved frame 40
Saved frame 41
Saved frame 42
Saved frame 43
Saved frame 44
Saved frame 45
Saved frame 46
Saved frame 47
Saved frame 48
Saved frame 49
Saved frame 50
Saved frame 51
Saved frame 52
Saved frame 53
Saved frame 54
Saved frame 55
Saved frame 56
Saved frame 57
Saved frame 58
Saved frame 59
Saved frame 60
Saved frame 61
Saved frame 62
Saved frame 63
Saved frame 64
Saved frame 65
Saved frame 66
Saved

### Creating csv file annotations

In [1]:
import os
import csv
import cv2

# Paths to the directories containing images and labels
image_dirs = {"train": "data/ART-Net/images/train", "val": "data/ART-Net/images/val"}
label_dirs = {"train": "data/ART-Net/labels/train", "val": "data/ART-Net/labels/val"}

# Output CSV files
output_csv_files = {"train": "data/ART-Net/train.csv", "val": "data/ART-Net/val.csv"}


def convert_bbox_to_xyxy(cls, cx, cy, w, h, img_width, img_height):
    x1 = int((cx - w / 2) * img_width)
    y1 = int((cy - h / 2) * img_height)
    x2 = int((cx + w / 2) * img_width)
    y2 = int((cy + h / 2) * img_height)
    return [x1, y1, x2, y2, cls]


def process_labels(image_dir, label_dir):
    data = []
    
    id = {0: "tool", 1: "tip"}

    for image_file in os.listdir(image_dir):
        if image_file.endswith((".png", ".jpg", ".jpeg")):
            # Construct the full image path
            image_path = os.path.join(image_dir, image_file)

            # Get the corresponding label file
            label_file = (
                image_file.replace(".png", ".txt")
                .replace(".jpg", ".txt")
                .replace(".jpeg", ".txt")
            )
            label_path = os.path.join(label_dir, label_file)

            if not os.path.exists(label_path):
                continue

            # Read the image to get its dimensions
            img = cv2.imread(image_path)
            img_height, img_width = img.shape[:2]

            with open(label_path, "r") as lf:
                lines = lf.readlines()
                for line in lines:
                    cls, cx, cy, w, h = map(float, line.strip().split())
                    bbox = convert_bbox_to_xyxy(
                        cls, cx, cy, w, h, img_width, img_height
                    )
                    data.append([image_file] + bbox[:4] + [id[int(bbox[4])]])

    return data


# Create CSV files for train and val datasets
for key in image_dirs:
    data = process_labels(image_dirs[key], label_dirs[key])
    output_csv = output_csv_files[key]

    with open(output_csv, "w", newline="") as csvfile:
        csvwriter = csv.writer(csvfile)
        for row in data:
            csvwriter.writerow(row)

    print(f"CSV file '{output_csv}' has been created successfully.")

CSV file 'data/ART-Net/train.csv' has been created successfully.
CSV file 'data/ART-Net/val.csv' has been created successfully.


In [3]:
import os
import json
import cv2

# Paths to the directories containing images and labels
image_dirs = {"train": "data/ART-Net/images/train", "val": "data/ART-Net/images/val"}
label_dirs = {"train": "data/ART-Net/labels/train", "val": "data/ART-Net/labels/val"}

# Output JSON files
output_json_files = {"train": "train_dataset.json", "val": "val_dataset.json"}

# Define the COCO categories
categories = [{"id": 0, "name": "tool"}, {"id": 1, "name": "tip"}]


def convert_bbox_to_coco_format(cx, cy, w, h, img_width, img_height):
    x = int((cx - w / 2) * img_width)
    y = int((cy - h / 2) * img_height)
    width = int(w * img_width)
    height = int(h * img_height)
    return [x, y, width, height]


def process_labels(image_dir, label_dir):
    images = []
    annotations = []
    annotation_id = 1
    image_id = 1

    for image_file in os.listdir(image_dir):
        if image_file.endswith((".png", ".jpg", ".jpeg")):
            # Construct the full image path
            image_path = os.path.join(image_dir, image_file)

            # Get the corresponding label file
            label_file = (
                image_file.replace(".png", ".txt")
                .replace(".jpg", ".txt")
                .replace(".jpeg", ".txt")
            )
            label_path = os.path.join(label_dir, label_file)

            if not os.path.exists(label_path):
                continue

            # Read the image to get its dimensions
            img = cv2.imread(image_path)
            img_height, img_width = img.shape[:2]

            # Add image info to images list
            images.append(
                {
                    "file_name": image_file,
                    "height": img_height,
                    "width": img_width,
                    "id": image_id,
                }
            )

            with open(label_path, "r") as lf:
                lines = lf.readlines()
                for line in lines:
                    cls, cx, cy, w, h = map(float, line.strip().split())
                    bbox = convert_bbox_to_coco_format(
                        cx, cy, w, h, img_width, img_height
                    )
                    annotations.append(
                        {
                            "id": annotation_id,
                            "image_id": image_id,
                            "category_id": int(cls),
                            "bbox": bbox,
                            "area": bbox[2] * bbox[3],
                            "iscrowd": 0,
                        }
                    )
                    annotation_id += 1

            image_id += 1

    return images, annotations


# Create JSON files for train and val datasets
for key in image_dirs:
    images, annotations = process_labels(image_dirs[key], label_dirs[key])
    data_coco = {
        "info": {
            "description": "ART-Net Dataset",
        },
        "images": images,
        "annotations": annotations,
        "categories": categories,
    }

    output_json = output_json_files[key]

    with open(output_json, "w") as jsonfile:
        json.dump(data_coco, jsonfile)

    print(f"JSON file '{output_json}' has been created successfully.")

JSON file 'train_dataset.json' has been created successfully.
JSON file 'val_dataset.json' has been created successfully.
