# Training and inference of a YoloV5 model
For detailed documentation on how to train a model, see: https://docs.ultralytics.com/yolov5/

Training a custom YOLOV5 model requires several steps:
0. Pull the YOLOv5 Git Repo: https://github.com/ultralytics/yolov5/releases

1. Creating a custom dataset
    Images (JPEG) and labels (TXT) must be collected and properly formatted. See: https://roboflow.com/formats/yolov5-pytorch-txt
    
2. Select a model. Several types of Yolo models are available (e.g., Nano: YoloV5n ... Medium: YoloV5m ... XLarge: YoloV5X). In general, larger models perform better, but are slower for training and inference. https://docs.ultralytics.com/models/

3. Train the model. Setup a Conda/Python environment (here: Python 3.10; Pytorch with CUDA GPU enabled). Create YAML files specifying where to find the data, and what model architecture you are using. You can also specify different hyperparameters during model training.

With your conda environment active, run your model: 

python train.py --single-cls --rect --batch -1 --epochs 25 --data ./datasets/autoarborist_file_paths_classes_yolov5x.yml --cfg ./datasets/autoarborist_architecture_yolov5x.yml --weights '' --hyp ./datasets/hyp-scratch-custom.yaml

--single-cls specifies training with one class
--rect specifies images are rectangular (from the Autoarborist dataset)
--batch -1 specifies automatic batch size selection
--epochs 25 specifies total number of iterations through the entire training/testing set
--data points to the YAML file with your training data paths
--cfg points to the YAML file with your model architecture 
--weights specifies whether you want pre-trained weights
--hyp points to the YAML file with your hyperparameters

Once you train the model... you can view summary statistics (precision, recall) and evaluate it's performance.

# Import Libraries

In [None]:
# Guide on Prediction with Yolo Model: https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/#before-you-start

# Import Libraries

import torch

import matplotlib.pyplot as plt
from matplotlib.image import imread
import os
import shutil
import numpy as np
import pandas as pd
import cv2
import random


# Notebook and Vis Params
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline

# Load a trained YOLOv5 Object Detection Model

In [None]:
# Load a pretrained/ custom model from disk

# Model 1 Detects Trees
model1_name = 'yolov5x-oct2323-autoarborist-25epochs'
model1_path = f'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/runs/train/{model1_name}/weights/last.pt'
tree_model = torch.hub.load(r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5', 'custom', path=model1_path, source='local')


# From Trees, Model 2 Detects Ailanthus
model2_name = 'exp-autoarb-with-ailanthus-3600imgstrain-5ktrees-alleasyandhard-yolov5x-imagenet-10epochs-lowaug-imgweights-lr01-dec1123'
model2_path = f'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/runs/train/exp6/weights/last.pt'
ailanthus_model = torch.hub.load(r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5', 'custom', path=model2_path, source='local')

# Set tree and ailanthus model confidence and iou thresholds
tree_model.conf = 0.25  # confidence threshold (0-1)
tree_model.iou = 0.45  # NMS IoU threshold (0-1) 

ailanthus_model.conf = 0.25  # confidence threshold (0-1)
ailanthus_model.iou = 0.45  # NMS IoU threshold (0-1) 



In [None]:
# Load and Crop Image by Size

#img_path = r'C:/Users/talake2/Desktop/GSV_imgs_ailanthus/imgs/IC9PrL5RaujFWTNLQ6GrhA.jpg' # Simple test case for Ailanthus
img_path = r'C:\Users\talake2\Desktop\GSV_panoramic_images\GSV_imgs_ailanthus\r2048\_6r7Jpnswuc-wB5QNLt_IQ.jpg'
img = imread(img_path)
img.shape # Google Street View Panorama: 8192, 16384, 3
plt.imshow(img[..., 0])



# Detection on a Single Image


In [None]:
# Load and Crop Image by Size

img_path = r'C:\Users\talake2\Desktop\auto_arborist_cvpr2022_v015\auto_arborist_jpegs\jpegs_aerial_streetlevel_raw\all_cities_streetview\train\acer\streetlevel_4_2.jpg'
#img_path = r'C:\Users\talake2\Desktop\GSV_panoramic_images\GSV_imgs_ailanthus\r2048\_3DIEb8MnUP0LRYicY3dmQ.jpg'
img = imread(img_path)
img.shape # Google Street View Panorama: 8192, 16384, 3
#plt.imshow(img)

# Crop image to square
img_crop = img[0:8192, 0:8192, ...]

# Inference
model_results = tree_model(img_crop, size=640) #default size 640. Change for custom inference size

# Size 80 Speed: 1.0ms pre-process, 248.5ms inference, 40.1ms NMS per image at shape (1, 3, 64, 96), one tree detected (Ailanthus not detected)
# Size 160 Speed: 1.0ms pre-process, 123.8ms inference, 1.0ms NMS per image at shape (1, 3, 96, 160), four trees detected (Ailanthus conf 70)
# Size 320 Speed: 1.0ms pre-process, 11.0ms inference, 1.0ms NMS per image at shape (1, 3, 160, 320) six trees detected (Ailanthus conf 93)
# Size 640 Speed: 3.0ms pre-process, 10.0ms inference, 1.0ms NMS per image at shape (1, 3, 320, 640) 12 trees detected (Ailanthus conf 71)
# Size 768 Speed: 10.5ms pre-process, 163.3ms inference, 8.0ms NMS per image at shape (1, 3, 384, 768) 12 trees detected (Ailanthus not detected)
# Size 1280 Speed: 7.0ms pre-process, 15.0ms inference, 2.0ms NMS per image at shape (1, 3, 640, 1280) 18 trees detected (Ailanthus not detected)

# Examine results. Show number of bounding boxes and details of an image.
model_results.print()
model_results_df = model_results.pandas().xyxy[0]
print(model_results_df)
model_results.show()



# Double Inference on Single Panoramic Image: Detect Trees with YOLO-Tree model & Classify with YOLO-Ailanthus model

In [None]:
# Crop Detected Trees From Tree Detection on Google Street View Panoramic Image and Apply Detection Again

# Dataset Output Directory: googlestreetview_ailanthus_images_labels_dec23

# Iterate through a folder containing many .jpg images

# For each image in the folder, read:

# Original image
img_path = r'C:\Users\talake2\Desktop\GSV_panoramic_images\GSV_imgs_ailanthus\r2048\_3DIEb8MnUP0LRYicY3dmQ.jpg'
img = imread(img_path)

# Inference using the tree_detection model
results = tree_model(img_resize)

# Examine results. Show number of bounding boxes and details of an image.
results.print()
results_df = results.pandas().xyxy[0]

# Loop through each bounding box
for i in range(results_df.shape[0]):
    # Get bounding box coordinates
    xmin, ymin, xmax, ymax = results_df.loc[i, ['xmin', 'ymin', 'xmax', 'ymax']]


    # Crop the original image based on bounding box coordinates
    img_crop = img[int(ymin):int(ymax), int(xmin):int(xmax), ...]

    # Display the cropped image
    #plt.imshow(img_crop)
    #plt.show()

    # Inference on the cropped image using the ailanthus_detection model
    results_cropped = ailanthus_model(img_crop)
    
    results_cropped.show()

    # Examine results for the cropped image
    results_cropped.print()
    results_df_cropped = results_cropped.pandas().xyxy[0]

    # Display the results for the cropped image
    print(results_df_cropped)
    

# Double Inference on Directory Panoramic Images

First, Detect Trees with YOLO-Tree model. Then, Classify with YOLO-Ailanthus model

Ideas:

- Crop aspect ratio box around detected tree at 1.5x ratio (same ratio as Autoarborist data)? May help with resizing distortions?

- If Ailanthus is detected, apply image augmentations (image/bounding box horizontal shifts).

After augmentations, Save Predictions

In [None]:

# Script takes images from a Google Street View Folder, Applies two YOLO Models (First: Detect Trees, Second, Detect Ailanthus).

# Iterate through Google Street View Panorama folder containing many .jpg images
folder_path = r'C:/Users/talake2/Desktop/Ailanthus_altissima/r2048'
save_dir_images = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/googlestreetview_ailanthus_images_labels_dec23/images'
save_dir_labels = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/googlestreetview_ailanthus_images_labels_dec23/labels'

# Loop through each image in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.jpg'):
        img_path = os.path.join(folder_path, filename)
        img = imread(img_path)

        # Inference using the tree detection model
        results = tree_model(img)
        results_df = results.pandas().xyxy[0] # Dataframe of predictions: xmin, ymin, xmax, ymax, confidence, class, name
        print(f'Detected {results_df.shape[0]} trees in panoramic image')

        # Loop through each bounding box predicted as a tree
        for i in range(results_df.shape[0]):
            
            # Get bounding box predictions of one predicted tree from the tree_detection model
            xmin, ymin, xmax, ymax = results_df.loc[i, ['xmin', 'ymin', 'xmax', 'ymax']]
            
            # Crop panoramic image to bounding box of one predicted tree with padding
            buffer_pixels = 200  # You can adjust the buffer size as needed, scale proportion to detected bounding box size?

            ymin_cropped = max(int(ymin) - buffer_pixels, 0)
            ymax_cropped = min(int(ymax) + buffer_pixels, img.shape[0])
            xmin_cropped = max(int(xmin) - buffer_pixels, 0)
            xmax_cropped = min(int(xmax) + buffer_pixels, img.shape[1])

            img_crop = img[ymin_cropped:ymax_cropped, xmin_cropped:xmax_cropped, ...]

            # Inference on the cropped image using the ailanthus detection model
            print(f'Running Ailanthus Detection on Tree Number:{i}')
            results_cropped = ailanthus_model(img_crop)

            # Store bounding box and class results for the cropped image
            results_df_cropped = results_cropped.pandas().xyxy[0]
            print(results_df_cropped)
            
            # If results_cropped dataframe contains any instance of class = 1 (Ailanthus), save the image and bounding box
            ailanthus_instances = results_df_cropped[results_df_cropped['class'] == 1]

            if not ailanthus_instances.empty:
                
                # Get bounding boxes for cropped image with predicted Ailanthus
                xmin_values = results_df_cropped.loc[:, 'xmin']
                ymin_values = results_df_cropped.loc[:, 'ymin']
                xmax_values = results_df_cropped.loc[:, 'xmax']
                ymax_values = results_df_cropped.loc[:, 'ymax']
                
                # Display prediction on cropped image with Ailanthus
                results_cropped.show()
                
                # Create filestring to save predictions of Ailanthus images/labels
                #file_name = os.path.splitext(filename)[0]
                #image_save_path = os.path.join(save_dir_images, f'{file_name}_{i}.jpg')
                #txt_save_path = os.path.join(save_dir_labels, f'{file_name}_{i}.txt')

                # Resize image to match Autoarborist dataset, dimension (768 x 1152)
                #resized_img = cv2.resize(img_crop, (768, 1152))

                # Save resized image from Google Street View Pano
                #resized_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
                #cv2.imwrite(image_save_path, resized_img)

                #with open(txt_save_path, 'w') as txt_file:
                #    for j in range(len(xmin_values)):
                #        # Normalize bounding box coordinates
                #        class_id = results_df_cropped['class'].iloc[j]  # Class indices start with 0. 'Trees' class should be index 0, 'Ailanthus' class should be 1.
                #        center_x = ((xmin_values[j] + xmax_values[j]) / 2) / img_crop.shape[1]
                #        center_y = ((ymin_values[j] + ymax_values[j]) / 2) / img_crop.shape[0]
                #        width = (xmax_values[j] - xmin_values[j]) / img_crop.shape[1]
                #        height = (ymax_values[j] - ymin_values[j]) / img_crop.shape[0]

                        # Write bounding box information to the text file
                #        txt_file.write(f'{class_id} {center_x} {center_y} {width} {height}' + os.linesep)

                

In [None]:
# Imports for GradCAM (Explainable Object Detection) from: https://github.com/jacobgil/pytorch-grad-cam

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import torch    
import cv2
import numpy as np
import requests
import torchvision.transforms as transforms
from pytorch_grad_cam import EigenCAM, ScoreCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
from PIL import Image


# Experiments with EigenCAM and Explanable Object Detection Models


In [None]:
COLORS = np.random.uniform(0, 255, size=(80, 3))

def parse_detections(results):
    detections = results.pandas().xyxy[0]
    detections = detections.to_dict()
    boxes, colors, names = [], [], []

    for i in range(len(detections["xmin"])):
        confidence = detections["confidence"][i]
        if confidence < 0.2:
            continue
        xmin = int(detections["xmin"][i])
        ymin = int(detections["ymin"][i])
        xmax = int(detections["xmax"][i])
        ymax = int(detections["ymax"][i])
        name = detections["name"][i]
        category = int(detections["class"][i])
        color = COLORS[category]

        boxes.append((xmin, ymin, xmax, ymax))
        colors.append(color)
        names.append(name)
    return boxes, colors, names


def draw_detections(boxes, colors, names, img):
    for box, color, name in zip(boxes, colors, names):
        xmin, ymin, xmax, ymax = box
        cv2.rectangle(
            img,
            (xmin, ymin),
            (xmax, ymax),
            color, 
            5)

        cv2.putText(img, name, (xmin, ymin - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
                    lineType=cv2.LINE_AA)
    return img

layers = [-2, -5, -8, -12, -15, -17, -19, -21] 

folder_path = r'C:\Users\talake2\Desktop\auto_arborist_cvpr2022_v015\yolov5\datasets\autoarborist_googlestreetview_images_labels_7300ailanthus_22000trees_jan324\gradcam_images'
# Loop through each image in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.jpg'):
        img_path = os.path.join(folder_path, filename)

        img = imread(img_path)

        rgb_img = img.copy()
        #img = np.float32(img) / 255
        transform = transforms.ToTensor()
        tensor = transform(img).unsqueeze(0)

        model = ailanthus_model
        results = model([rgb_img])
        print(results.pandas().xyxy[0])
        boxes, colors, names = parse_detections(results)
        detections = draw_detections(boxes, colors, names, rgb_img.copy())
        Image.fromarray(detections)
        # Display CAM Activation Layers
        for j in layers:
            target_layers = [model.model.model.model[j]]

            cam = EigenCAM(model, target_layers)
            grayscale_cam = cam(tensor)[0, :, :]
            plt.imshow(rgb_img)
            plt.imshow(grayscale_cam, alpha=0.8)
            plt.show()

In [None]:
cam = EigenCAM(model, target_layers)

grayscale_cam = cam(tensor)[0, :, :]
plt.imshow(grayscale_cam)
plt.show()


# Inference on a directory of images

In [None]:
# Define an image to apply the model inference to each image in the directory

input_image_directory = r'C:/Users/talake2/Desktop/GSV_imgs_ailanthus/imgs'

for filename in os.listdir(input_image_directory):
    if filename.endswith('.jpg'):
         # Construct the full file path
        image_path = os.path.join(input_image_directory, filename)
        
        # Read image
        img = imread(image_path)
        
        # Crop bottom 1/3 of image
        img_resize = img[0:5406, ...]
        
        # Run model inference on the image
        results = model(img_resize)
        
        # Make into a pandas dataframe
        results_df = results.pandas().xyxy[0]
        
        # Check if the DataFrame is empty
        #if not results_df.empty:
            # Check if any value in the 'confidence' column is over 0.60
        #    if (results_df['confidence'] > 0.8).any():
                # Save the plotted image with results
        results.save()
        
    


In [None]:
# After inference, move output files to single directory

# Source directory containing 'exp', 'exp1', 'exp2', ... folders
source_directory = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/jupyter_notebooks/runs/detect'

# Destination directory where all JPEG files will be moved
destination_directory = r'C:/Users/talake2/Desktop/GSV_imgs_ailanthus/preds'

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_directory):
    os.makedirs(destination_directory)

# Recursively traverse through the source directory and move JPEG files
for root, _, files in os.walk(source_directory):
    for file in files:
        if file.endswith('.jpg') or file.endswith('.jpeg'):
            # Get the full path of the JPEG file
            source_file_path = os.path.join(root, file)
            # Move the file to the destination directory
            shutil.move(source_file_path, os.path.join(destination_directory, file))

print("JPEG files moved successfully to the destination directory.")



# Plotting Images and Bounding Boxes

In [None]:
# Plotting Images and Bounding Boxes

classes = ['ailanthus', 'tree']
        
colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(classes))]

# Path to the directory containing .txt files
images_directory = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/googlestreetview_ailanthus_images_labels_dec23/images'
labels_directory = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/googlestreetview_ailanthus_images_labels_dec23/labels'

# Output directory for saving images with bounding boxes
output_directory = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/googlestreetview_ailanthus_images_labels_dec23/images-boxes'

def plot_one_box(x, image, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(
        0.002 * (image.shape[0] + image.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(image, label, (c1[0], c1[1] - 2), 0, tl / 3,
                    [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
        

# Iterate through .txt files and copy corresponding images
for filename in os.listdir(labels_directory):
    if filename.endswith('.txt'):
        # Extract the image file name without extension from the .txt file
        image_filename = os.path.splitext(filename)[0] + '.jpg'
        
        source_image_file = os.path.join(images_directory, image_filename)
        source_label_file = os.path.join(labels_directory, filename)
        
        print(source_image_file)
        print(source_label_file)
        
        # Open the label and image file
        label = open(source_label_file)
        image = cv2.imread(source_image_file).copy()  # Create a copy of the image
        
        # Define image shape
        height, width, channels = image.shape
        
        print(height, width, channels)
        
        box_number = 0
        for line in label:
            # Skip empty lines
            if not line.strip():
                continue

            box = line.split()
            print(box)
            class_idx = int(box[0])
            
            x_center, y_center, w, h = float(box[1])*width, float(box[2])*height, float(box[3])*width, float(box[4])*height
            x1 = round(x_center - w/2)
            y1 = round(y_center - h/2)
            x2 = round(x_center + w/2)
            y2 = round(y_center + h/2)

            plot_one_box([x1, y1, x2, y2], image, color=colors[class_idx],
                         label=classes[class_idx], line_thickness=None)

            box_number += 1

        # Save the image after processing all bounding boxes
        save_file_path = os.path.join(output_directory, image_filename)
        cv2.imwrite(save_file_path, image)





# Model False Positives/ Error Assessment 
Apply YOLO model to detect a species across set of images from Autoarborist classified to Genus

In [None]:
# Define an image to apply the model inference to each image in the AutoArborist directory

# Define the base image directory
base_input_image_directory = r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/jpegs/all_cities_streetview/train'

# Get a list of genus names (folder names) in the specified directory
genus_names = [item for item in os.listdir(base_input_image_directory) if os.path.isdir(os.path.join(base_input_image_directory, item))]

# Initialize an empty DataFrame to store the summary statistics
summary_df = pd.DataFrame(columns=['Genus', 'Ailanthus Predictions', 'Tree Predictions', 'Average Ailanthus Confidence', 'Average Tree Confidence'])

for genus_name in genus_names:
    print(genus_name)
    
    # Construct the full directory path for the current genus
    input_image_directory = os.path.join(base_input_image_directory, genus_name)

    # Initialize variables to store aggregated results
    total_ail_predictions = 0
    total_tree_predictions = 0
    ail_confidences = []
    tree_confidences = []

    # Use enumerate to iterate over the first 1000 elements
    for i, filename in enumerate(os.listdir(input_image_directory)):
        if i >= 1000:
            break  # Stop after the first 1000 elements
            
        # Construct file path
        image_path = os.path.join(input_image_directory, filename)

        # Run model inference on the image (replace this with your actual model inference code)
        results = model(image_path)

        # Make into a pandas dataframe
        results_df = results.pandas().xyxy[0]

        # Check if the DataFrame is empty
        if not results_df.empty:
            # Count 'ailanthus' and 'tree' predictions
            total_ail_predictions += results_df[results_df['name'] == 'ailanthus'].shape[0]
            total_tree_predictions += results_df[results_df['name'] == 'tree'].shape[0]

            # Extract confidence scores for 'ailanthus' and 'tree'
            ail_confidences.extend(results_df[results_df['name'] == 'ailanthus']['confidence'].tolist())
            tree_confidences.extend(results_df[results_df['name'] == 'tree']['confidence'].tolist())

    # Calculate summary statistics
    avg_ail_confidence = np.mean(ail_confidences) if ail_confidences else 0
    avg_tree_confidence = np.mean(tree_confidences) if tree_confidences else 0
    
    std_ail_confidence = np.std(ail_confidences) if ail_confidences else 0
    std_tree_confidence = np.std(tree_confidences) if tree_confidences else 0

    # Add the summary statistics to the DataFrame
    summary_df = summary_df.append({
        'Genus': genus_name,
        'Ailanthus Predictions': total_ail_predictions,
        'Tree Predictions': total_tree_predictions,
        'Proportion Predicted Ailanthus:': total_ail_predictions/total_tree_predictions,
        'Average Ailanthus Confidence': avg_ail_confidence,
        'Average Tree Confidence': avg_tree_confidence,
        'STD Ailanthus Confidence': std_ail_confidence,
        'STD Tree Confidence': std_tree_confidence
    }, ignore_index=True)

# Display the summary DataFrame
print(summary_df)


In [None]:
summary_df

In [None]:
print(summary_df)
summary_df.to_csv(r'C:/Users/talake2/Desktop/auto_arborist_cvpr2022_v015/model_classification_by_genus_full.csv')

# Diagnostics: Check if all training/testing images: label pairs are present.

Each image (e.g.,  000038a4-b9a1-4e11-9e92-e8a98a3ebdb7.jpg ) should have a unique, matching label (e.g., 000038a4-b9a1-4e11-9e92-e8a98a3ebdb7.txt)

If an image is missing a label, or if we have extra labels, delete these data.

In [None]:

import os

# Paths to the image and label folders
image_folder = r'/mnt/c/users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/autoarborist/train/images'
label_folder = r'/mnt/c/users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/autoarborist/train/labels'

# Get the list of image files
image_files = os.listdir(image_folder)

#print(image_files[0:10])

# Get the list of label files
label_files = os.listdir(label_folder)

#print(label_files[0:10])

# Iterate through image files
for image_file in image_files:
    # Construct the corresponding label file name
    label_file = os.path.splitext(image_file)[0] + '.txt'
    
    # Check if the label file exists
    if label_file not in label_files:
        # If label file doesn't exist, remove the image file
        os.remove(os.path.join(image_folder, image_file))
        print(f"Removed {image_file} because corresponding label was not found.")

# Iterate through label files
for label_file in label_files:
    # Construct the corresponding image file name
    image_file = os.path.splitext(label_file)[0] + '.jpeg'
    
    # Check if the image file exists
    if image_file not in image_files:
        # If image file doesn't exist, remove the label file
        os.remove(os.path.join(label_folder, label_file))
        print(f"Removed {label_file} because corresponding image was not found.")



# Paths to the image and label folders
image_folder = r'/mnt/c/users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/autoarborist/test/images'
label_folder = r'/mnt/c/users/talake2/Desktop/auto_arborist_cvpr2022_v015/yolov5/datasets/autoarborist/test/labels'

# Get the list of image files
image_files = os.listdir(image_folder)

#print(image_files[0:10])

# Get the list of label files
label_files = os.listdir(label_folder)

#print(label_files[0:10])

# Iterate through image files
for image_file in image_files:
    # Construct the corresponding label file name
    label_file = os.path.splitext(image_file)[0] + '.txt'
    
    # Check if the label file exists
    if label_file not in label_files:
        # If label file doesn't exist, remove the image file
        os.remove(os.path.join(image_folder, image_file))
        print(f"Removed {image_file} because corresponding label was not found.")

# Iterate through label files
for label_file in label_files:
    # Construct the corresponding image file name
    image_file = os.path.splitext(label_file)[0] + '.jpeg'
    
    # Check if the image file exists
    if image_file not in image_files:
        # If image file doesn't exist, remove the label file
        os.remove(os.path.join(label_folder, label_file))
        print(f"Removed {label_file} because corresponding image was not found.")





In [None]:
# Given a directory of images, create text files matching that directory

import os

# Path to the directory containing .jpeg images
image_directory = r'C:/Users/talake2/Downloads/img-20231030T191435Z-003/img'

# Path to the directory where text files will be saved
labels_directory = r'C:/Users/talake2/Downloads/img-20231030T191435Z-003/labels'

# Get a list of .jpeg files in the image directory
jpeg_files = [file for file in os.listdir(image_directory) if file.lower().endswith('.jpg')]

# Create the labels directory if it doesn't exist
if not os.path.exists(labels_directory):
    os.makedirs(labels_directory)

# Iterate through the .jpeg files and create corresponding text files
for jpeg_file in jpeg_files:
    # Extract the file name (without extension)
    file_name = os.path.splitext(jpeg_file)[0]
    # Create the corresponding text file
    text_file_path = os.path.join(labels_directory, file_name + '.txt')
    with open(text_file_path, 'w') as text_file:
        # You can optionally add content to the text files if needed
        pass
    print(f"Created {text_file_path}")

print("Text files creation completed.")
