# Augmentation Analysis

Here we chose the 10 images that were determined as the top 10 hard negatives in the hard mining analysis and used these images for augmentation. All six augmentations were applied to each image and resulting images and corrected yolo annotations are saved in storage/output_augmentations. We then use model 2 to predict on these images and use the hard mining class to provide their loss score again so we can compare to their scores in hard mining analyisis.

In [2]:
# Append system path
import sys, os
import cv2 as cv
sys.path.append(os.path.abspath('../inference'))
sys.path.append(os.path.abspath('../rectification'))

from augmentation import Augmentation

output_dir = "../storage/output_augmentations"
augmenter = Augmentation(output_dir)

# Import Model and NMS classes
from object_detection import Model

model_2 = Model(weights_file="../storage/yolo_model_2/yolov4-tiny-logistics_size_416_2.weights", 
                config_file="../storage/yolo_model_2/yolov4-tiny-logistics_size_416_2.cfg", 
                names_file="../storage/yolo_model_2/logistics.names")

image_filenames = {
    '../storage/logistics/387a749f761852d0_jpg.rf.00080ecac3fb871dc5f936dab4fe9d80.jpg',
    '../storage/logistics/EAN13_09_0088_jpg.rf.2031e0c46b10aaa695a811ce27f1ef56.jpg',
    '../storage/logistics/Img_web_555_jpg.rf.29fb8649096f57f40d9456b2ab6bd8c1.jpg',
    '../storage/logistics/621-jpg__jpg_jpg.rf.23ce21d5f284132e5a37ea742622b97c.jpg',
    '../storage/logistics/007118_jpg.rf.08eb2afd28293afc0f53474348b3ccab.jpg',
    '../storage/logistics/LDR08AW1FPEB_jpg.rf.c23f27058cde6dd65c8f29d62d9a8e37.jpg',
    '../storage/logistics/LDR08AW1FPEB_jpg.rf.321e7a024d64f32232980dbb1ddcab3c.jpg',
    '../storage/logistics/WEJCF41ZFDDZ_jpg.rf.e72efe631ba5047517bc8012cbb302f6.jpg',
    '../storage/logistics/fire1_mp4-313_jpg.rf.63396291379b3c704d855a3d9ce07e6b.jpg',
    '../storage/logistics/74_resized_jpg.rf.da320b1886aebd8a6dac9fd79ca3578c.jpg'
}

yolo_filenames = {
    '../storage/logistics/387a749f761852d0_jpg.rf.00080ecac3fb871dc5f936dab4fe9d80.txt',
    '../storage/logistics/EAN13_09_0088_jpg.rf.2031e0c46b10aaa695a811ce27f1ef56.txt',
    '../storage/logistics/Img_web_555_jpg.rf.29fb8649096f57f40d9456b2ab6bd8c1.txt',
    '../storage/logistics/621-jpg__jpg_jpg.rf.23ce21d5f284132e5a37ea742622b97c.txt',
    '../storage/logistics/007118_jpg.rf.08eb2afd28293afc0f53474348b3ccab.txt',
    '../storage/logistics/LDR08AW1FPEB_jpg.rf.c23f27058cde6dd65c8f29d62d9a8e37.txt',
    '../storage/logistics/LDR08AW1FPEB_jpg.rf.321e7a024d64f32232980dbb1ddcab3c.txt',
    '../storage/logistics/WEJCF41ZFDDZ_jpg.rf.e72efe631ba5047517bc8012cbb302f6.txt',
    '../storage/logistics/fire1_mp4-313_jpg.rf.63396291379b3c704d855a3d9ce07e6b.txt',
    '../storage/logistics/74_resized_jpg.rf.da320b1886aebd8a6dac9fd79ca3578c.txt'
}

for image_filename, yolo_filename in zip(image_filenames, yolo_filenames):
    augmenter.apply_augmentations(image_filename, yolo_filename)


In [3]:
# Path to the test images and labels
image_dir = "../storage/output_augmentations/"
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')]
image_paths.sort(key=lambda x: os.path.splitext(os.path.basename(x))[0])

In [4]:
output_path = '../storage/augmented_predictions'
for image_path in image_paths:
    # Extract just the filename without the extension
    image_filename = os.path.splitext(os.path.basename(image_path))[0]
    # Read the image
    image = cv.imread(image_path)
    image_height, image_width = image.shape[:2]
    outputs, original_size = model_2.predict(image)
    model_2.save_predictions(outputs, original_size, output_path, image_filename)

In [5]:
from hard_negative_mining import HardNegativeMining

hm = HardNegativeMining(iou_threshold=0.5, lambda_bb=1, lambda_obj=1, lambda_cls=1, lambda_no_obj=2)

In [6]:
prediction_dir = '../storage/augmented_predictions/'
prediction_paths = [os.path.join(prediction_dir, f) for f in os.listdir(prediction_dir) if f.endswith('.txt')]
prediction_paths.sort(key=lambda x: os.path.splitext(os.path.basename(x))[0])

In [7]:
def test_sample_hard_negatives(prediction_dir, annotation_image_dir, num_samples, hm):
    """
    Test the sample_hard_negatives method from the HardNegativeMining class and return the corresponding images with losses.
    
    :param prediction_dir: Directory containing predicted YOLO bounding boxes (.txt).
    :param annotation_image_dir: Directory containing both the annotations (.txt) and the images (.jpg).
    :param num_samples: Number of hard negatives to return.
    :param hm: Initialized HardNegativeMining object.
    
    :return: List of tuples (image_path, loss) for the top hard negatives.
    """
    hard_negatives = hm.sample_hard_negatives(prediction_dir, annotation_image_dir, num_samples)
    
    # Prepare list to store the image paths and their corresponding losses
    results = []

    for pred_file, loss in hard_negatives:
        # Get the image file name from the prediction file (replace .txt with .jpg)
        image_filename = os.path.splitext(pred_file)[0] + '.jpg'
        image_path = os.path.join(annotation_image_dir, image_filename)
        
        # Check if the image file exists
        if os.path.exists(image_path):
            results.append((image_path, loss))
        else:
            print(f"Warning: Image {image_filename} does not exist.")
    
    return results


# Obtain hard negative results
hard_negatives_results = test_sample_hard_negatives(
    prediction_dir='../storage/augmented_predictions/',
    annotation_image_dir='../storage/output_augmentations/',  
    num_samples=len(prediction_paths),  
    hm=hm  # HardNegativeMining object
)

### Scores from the hard mining analysis module:

```
Image: ../storage/logistics/387a749f761852d0_jpg.rf.00080ecac3fb871dc5f936dab4fe9d80.jpg, Loss: 22.000000
Image: ../storage/logistics/EAN13_09_0088_jpg.rf.2031e0c46b10aaa695a811ce27f1ef56.jpg, Loss: 21.375287
Image: ../storage/logistics/Img_web_555_jpg.rf.29fb8649096f57f40d9456b2ab6bd8c1.jpg, Loss: 21.365474
Image: ../storage/logistics/621-jpg__jpg_jpg.rf.23ce21d5f284132e5a37ea742622b97c.jpg, Loss: 21.335702
Image: ../storage/logistics/007118_jpg.rf.08eb2afd28293afc0f53474348b3ccab.jpg, Loss: 21.331186
Image: ../storage/logistics/LDR08AW1FPEB_jpg.rf.c23f27058cde6dd65c8f29d62d9a8e37.jpg, Loss: 21.320602
Image: ../storage/logistics/LDR08AW1FPEB_jpg.rf.321e7a024d64f32232980dbb1ddcab3c.jpg, Loss: 21.297516
Image: ../storage/logistics/WEJCF41ZFDDZ_jpg.rf.e72efe631ba5047517bc8012cbb302f6.jpg, Loss: 21.289732
Image: ../storage/logistics/fire1_mp4-313_jpg.rf.63396291379b3c704d855a3d9ce07e6b.jpg, Loss: 21.245449
Image: ../storage/logistics/74_resized_jpg.rf.da320b1886aebd8a6dac9fd79ca3578c.jpg, Loss: 21.233575 
```

In [8]:
# Print the image paths and their corresponding losses outside of the function
for image_path, loss in hard_negatives_results:
    print(f"Image: {image_path}, Loss: {loss:.6f}")

Image: ../storage/output_augmentations/387a749f761852d0_jpg_brightness.jpg, Loss: 22.000000
Image: ../storage/output_augmentations/387a749f761852d0_jpg_contrast.jpg, Loss: 22.000000
Image: ../storage/output_augmentations/387a749f761852d0_jpg_resized.jpg, Loss: 22.000000
Image: ../storage/output_augmentations/387a749f761852d0_jpg_blurred.jpg, Loss: 22.000000
Image: ../storage/output_augmentations/LDR08AW1FPEB_jpg_flipped.jpg, Loss: 21.376539
Image: ../storage/output_augmentations/EAN13_09_0088_jpg_brightness.jpg, Loss: 21.375227
Image: ../storage/output_augmentations/621-jpg__jpg_jpg_resized.jpg, Loss: 21.363238
Image: ../storage/output_augmentations/621-jpg__jpg_jpg_contrast.jpg, Loss: 21.353002
Image: ../storage/output_augmentations/LDR08AW1FPEB_jpg_resized.jpg, Loss: 21.325368
Image: ../storage/output_augmentations/74_resized_jpg_blurred.jpg, Loss: 21.306260
Image: ../storage/output_augmentations/Img_web_555_jpg_flipped.jpg, Loss: 21.305775
Image: ../storage/output_augmentations/EAN1

## Analysis

Comparing the original losses to the losses of the augmented images, we can see that some augmentations were able to lower the losses significantly. The code below alows us to plot each augmented image with ground truth and predicted labels to further analyze. It has been commented out for submission to lower the notebook size. From the results above there does not seem to be a specific augmentations that performs the best. It seems like it really depends on the image what will work best and the reason for the high loss. It would be interesting to combine two or more augmentations on an image to see if different combinations can further improve performance on these images.

In [11]:
import matplotlib.pyplot as plt

def display_image_with_boxes(image, bboxes, class_ids, scores, model, title="Predictions"):
    """Function to display image with bounding boxes (converted from YOLO format)."""
    h, w = image.shape[:2]  # Get image dimensions

    for i, bbox in enumerate(bboxes):
        cx, cy, bw, bh = bbox  # YOLO format: center_x, center_y, width, height (normalized)
        
        # Convert from YOLO format (normalized) to pixel coordinates
        x1 = int((cx - bw / 2) * w)  # Top-left x
        y1 = int((cy - bh / 2) * h)  # Top-left y
        x2 = int((cx + bw / 2) * w)  # Bottom-right x
        y2 = int((cy + bh / 2) * h)  # Bottom-right y

        # Create label for the class and confidence score
        label = f"{model.classes[class_ids[i]]}: {scores[i]:.2f}"
        
        # Draw the rectangle on the image
        cv.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Add the label text above the bounding box
        cv.putText(image, label, (x1, y1 - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Convert image to RGB for matplotlib display
    image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    
    # Plot the image with bounding boxes
    plt.figure(figsize=(8, 8))
    plt.imshow(image_rgb)
    plt.title(title)
    plt.axis("off")  # Hide axes
    plt.show()


def display_images_with_gt_and_predictions(hard_negatives_results, hm, model, annotation_image_dir, prediction_dir):
    """
    Display images with ground truth and predicted bounding boxes labeled.
    
    :param hard_negatives_results: List of tuples (image_path, loss) from the sample_hard_negatives function.
    :param hm: Initialized HardNegativeMining object.
    :param model: The model object with class names to label the boxes.
    :param annotation_image_dir: Directory containing both the annotations (.txt) and the images (.jpg).
    :param prediction_dir: Directory containing predicted YOLO bounding boxes (.txt).
    """
    for image_path, _ in hard_negatives_results:
        image_filename = os.path.basename(image_path).replace('.jpg', '.txt')

        # Read the image
        image = cv.imread(image_path)
        img_height, img_width = image.shape[:2]

        # Read ground truth and predicted labels
        annotation_file = os.path.join(annotation_image_dir, image_filename)  # Ground truth
        prediction_file = os.path.join(prediction_dir, image_filename)  # Predictions

        ground_truth_labels = hm.read_yolo_labels(annotation_file)
        predicted_labels = hm.read_yolo_labels(prediction_file)

        # Ground truth boxes and class IDs
        gt_bboxes = [label[1:5] for label in ground_truth_labels]
        gt_class_ids = [label[0] for label in ground_truth_labels]

        # Predicted boxes, class IDs, and scores
        pred_bboxes = [label[1:5] for label in predicted_labels]
        pred_class_ids = [label[0] for label in predicted_labels]
        pred_scores = [label[5] for label in predicted_labels]  # Confidence scores

        # Display ground truth
        print(f"Displaying Ground Truth for Image: {image_path}")
        display_image_with_boxes(image.copy(), gt_bboxes, gt_class_ids, [1.0]*len(gt_class_ids), model, title="Ground Truth")

        # Display predictions
        print(f"Displaying Predictions for Image: {image_path}")
        display_image_with_boxes(image.copy(), pred_bboxes, pred_class_ids, pred_scores, model, title="Predictions")



In [None]:
# # Display the images with ground truth and predictions
# display_images_with_gt_and_predictions(
#     hard_negatives_results=hard_negatives_results, 
#     hm=hm, 
#     model=model_2, 
#     annotation_image_dir='../storage/output_augmentations/',  
#     prediction_dir='../storage/augmented_predictions/'
# )