# Splitting Data into Train and Validation Sets
* This code defines a Python function called split_data that takes in a folder_path parameter, which represents the path to the folder containing the image files that need to be split into train and validation sets.

* Within the function, the os and random modules are imported to allow for directory and file handling, as well as randomizing the order of the image files.

* The os.listdir method is used to retrieve a list of all the files in the folder_path directory that end with the .jpg file extension. These file names are then concatenated with the folder_path to form absolute file paths.

* The random.shuffle method is called on the list of image file paths to randomly shuffle the order of the files.

* The split_index variable is set to 80% of the total number of image files using int(0.8 * len(image_files)).

* The train_files variable is assigned to a slice of the shuffled image file list from the beginning to the index of split_index, representing the 80% of files used for training.

* The valid_files variable is assigned to a slice of the shuffled image file list from the index of split_index to the end, representing the 20% of files used for validation.

* Two text files are then created and written to, using the open method in write mode and the with statement to ensure the files are properly closed after writing. The train.txt file contains the file paths for the training images, while the val.txt file contains the file paths for the validation images.

* Finally, the function prints a message that indicates the total number of images in the directory, as well as the number of images allocated for training and validation.

*To use this function, simply call it and pass in the path to your image files directory, for example: `split_data('path/to/your/folder')`.

In [None]:
import os 
import random

def split_data(folder_path):
  image_files=[os.path.join(folder_path,f) for f in os.listdir(folder_path) if f.endswith('.jpg')]
  random.shuffle(image_files)
  split_index=int(0.8 *len(image_files))
  train_files=image_files[:split_index]
  valid_files=image_files[split_index:]
  with open('train.txt','w') as train_f:
    train_f.write('\n'.join(train_files))
  with open('val.txt','w') as val:
    val.write('\n'.join(valid_files))
  print(f'Split {len(image_files)} images into {len(train_files)} train and {len(valid_files)} validation images.')
  return
split_data('Your folder path')

# Create a YAML file for a YOLO model.

    Args:
        classes (list): A list of class names.
        anchors (list): A list of anchor boxes in the format (width, height).
        input_shape (tuple): The input shape of the model in the format (height, width).
        yaml_path (str, optional): The path to save the YAML file. Defaults to 'yolo.yaml'.

In [None]:
import os
import yaml

def create_yaml_file(classes, anchors, input_shape, yaml_path='yolo.yaml'):
    data = {
        'train': 'train.txt',
        'val': 'val.txt',
        'nc': len(classes),
        'names': classes,
        'anchors': anchors,
        'img_size': input_shape,
    }
    with open(yaml_path, 'w') as file:
        yaml.dump(data, file)



# Reads class names from a file.

    Args:
        file_path (str): The path to the class names file.

    Returns:
        list: A list of class names.

In [None]:
def read_class_names(file_path):
    with open(file_path, 'r') as file:
        classes = file.read().split('\n')
    return classes

# Generate anchor boxes for a YOLO model.

    Args:
        num_anchors (int): The number of anchor boxes to generate.
        image_shape (tuple): The shape of the input images in the format (height, width).
        grid_shape (tuple): The shape of the grid cells in the format (height, width).

    Returns:
        list: A list of anchor boxes in the format (width, height).

In [None]:
def generate_anchors(num_anchors, image_shape, grid_shape):
    anchor_boxes = []
    for i in range(num_anchors):
        anchor_width = image_shape[1] / grid_shape[1]
        anchor_height = image_shape[0] / grid_shape[0]
        anchor_boxes.append((anchor_width, anchor_height))
    return anchor_boxes

# Create a file with class names.

    Args:
        classes (list): A list of class names.
        file_path (str, optional): The path to save the class names file. Defaults to 'classes.txt'.
  

In [None]:
def create_class_names_file(classes, file_path='classes.txt'):
    with open(file_path, 'w') as file:
        file.write('\n'.join(classes))

# Draws a bounding box on an image.

    Args:
        image (numpy.ndarray): The image to draw the bounding box on.
        box (list): A list of bounding box coordinates in the format [x, y, width, height].
        color (tuple, optional): The color of the bounding box. Defaults to (0, 255, 0).
        thickness (int, optional): The thickness of the bounding box. Defaults to 2.

    Returns:
        numpy.ndarray: The image with the bounding box drawn on it.

In [None]:
import cv2
import numpy as np

def draw_bounding_box(image, box, color=(0, 255, 0), thickness=2):
    x, y, w, h = box
    cv2.rectangle(image, (x, y), (x + w, y + h), color, thickness)
    return image

# Preprocesses an image for input into a YOLO model.

    Args:
        image (numpy.ndarray): The image to preprocess.
        input_shape (tuple): The input shape of the model in the format (height, width).

    Returns:
        numpy.ndarray: The preprocessed image.

In [None]:
import cv2
import numpy as np

def preprocess_image(image, input_shape):
    image = cv2.resize(image, input_shape)
    image = image / 255.
    image = np.expand_dims(image, axis=0)
    return image

# Crop image on Region Of Interest

Reads an image from disk.

    Args:
        image_path (str): The path to the image file.

    Returns:
        numpy.ndarray: The image as a NumPy array.


In [None]:
def read_image(image_path):
    return cv2.imread(image_path)

# Crops an image to the specified bounding box.

    Args:
        image (numpy.ndarray): The image to crop.
        box (list): A list of bounding box coordinates in the format [x, y, width, height].

    Returns:
        numpy.ndarray: The cropped image.


In [None]:
def crop_image(image, box):
    x, y, w, h = box
    return image[y:y+h, x:x+w]

# Draws a bounding box on an image.

    Args:
        image (numpy.ndarray): The image to draw the bounding box on.
        box (list): A list of bounding box coordinates in the format [x, y, width, height].
        color (tuple, optional): The color of the bounding box. Defaults to (0, 255, 0).
        thickness (int, optional): The thickness of the bounding box. Defaults to 2.

    Returns:
        numpy.ndarray: The image with the bounding box drawn on it.


In [None]:
def draw_bounding_box_on_image(image, box, color=(0, 255, 0), thickness=2):
    x, y, w, h = box
    cv2.rectangle(image, (x, y), (x + w, y + h), color, thickness)
    return image