# Ray Hyperparameter Tuning

In [None]:
from ultralytics import YOLO
from ray import tune

# Define a YOLO model
model = YOLO("yolov8l-cls.pt",)

# Run Ray Tune on the model
result_grid = model.tune(
    data="datasets/crop-datasets/classify-generated/val",
    dropout=0.2,
    label_smoothing=0.1,
    space={"lr0": tune.uniform(1e-5, 1e-1),
           "imgsz": tune.choice([480, 512, 600, 640]),
           "optimizer": tune.choice(["adam", "sgd", "AdamW", "RMSProp"]),
           },
    batch=12,
    epochs=20,
    use_ray=True, name='yolov5l-cls')

## Mean and STD of mosquito data

In [9]:
import os
import cv2
import numpy as np
from tqdm import tqdm 
def calculate_mean_and_std(folder_path):
    # Initialize variables to store mean and std
    total_pixels = 0
    pixel_sum = np.zeros(3)
    pixel_sum_squared = np.zeros(3)

    # Iterate through the folder and its subfolders
    for root, _, files in os.walk(folder_path):
        for file in tqdm(files):
            if file.lower().endswith(".jpeg"):
                # Read the image using OpenCV
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path)

                # Convert to float to avoid overflow when summing large images
                image = image.astype(float)

                # Add pixel values to the sum
                pixel_sum += np.sum(image, axis=(0, 1))
                pixel_sum_squared += np.sum(image ** 2, axis=(0, 1))
                total_pixels += image.shape[0] * image.shape[1]

    # Calculate the mean and standard deviation
    mean = pixel_sum / total_pixels
    std = np.sqrt(pixel_sum_squared / total_pixels - (mean ** 2))

    return mean, std

# Specify the folder path
folder_path = "/home/saidinesh/Desktop/Projects/yolov5/datasets/classify-crop/train"

# Call the function to calculate mean and std
mean, std = calculate_mean_and_std(folder_path)

# Print the results
print(f"Mean: {mean}")
print(f"Standard Deviation: {std}")


0it [00:00, ?it/s]
100%|██████████| 622/622 [01:52<00:00,  5.53it/s]
100%|██████████| 47/47 [00:04<00:00, 11.73it/s]
100%|██████████| 4612/4612 [04:28<00:00, 17.17it/s]
100%|██████████| 429/429 [00:49<00:00,  8.72it/s]
100%|██████████| 84/84 [00:04<00:00, 19.39it/s]
100%|██████████| 4563/4563 [05:12<00:00, 14.58it/s]

Mean: [138.36356524 148.52860967 161.277854  ]
Standard Deviation: [53.20503549 49.51324015 48.71902218]





In [2]:
Mean=[138.36356524/255,148.52860967/255,161.277854/255]
STD= [53.20503549/255,49.51324015/255,48.71902218/255]

In [10]:
Mean= [138.36356524, 148.52860967, 161.277854  ]
Standard_Deviation=[53.20503549, 49.51324015 ,48.71902218]
# Now convert them to normalized
Mean = [item / 255 for item in Mean]
Standard_Deviation = [item / 255 for item in Standard_Deviation]
print(Mean)
print(Standard_Deviation)


[0.542602216627451, 0.5824651359607843, 0.6324621725490196]
[0.208647198, 0.1941695692156863, 0.19105498894117648]


## Croped Images dataset

In [8]:
import os
import shutil
import cv2
from tqdm import tqdm 
# Path to the YOLO dataset Change based on Train and Test
yolo_dataset_path = '/home/saidinesh/Desktop/Projects/yolov5/datasets/train'
yolo_dataset_path_val='/home/saidinesh/Desktop/Projects/yolov5/datasets/val'
# Path to the image classification dataset
image_classification_path = '/home/saidinesh/Desktop/Projects/yolov5/datasets/classify-crop'

# List of class names
class_names = ['aegypti', 'albopictus', 'anopheles', 'culex', 'culiseta', 'japonicus-koreicus']

# Create class directories in train and val folders
for class_name in class_names:
    os.makedirs(os.path.join(image_classification_path, 'train/images', class_name), exist_ok=True)
    os.makedirs(os.path.join(image_classification_path, 'val/images', class_name), exist_ok=True)

def crop_dataset(yolo_dataset_path=yolo_dataset_path):
    # Copy cropped object images to the appropriate class folders in train and val
    for root, _, files in os.walk(os.path.join(yolo_dataset_path, 'images')):
        for file in tqdm(files):
            image_path = os.path.join(root, file)
            label_path = os.path.join(yolo_dataset_path, 'labels', os.path.splitext(file)[0] + '.txt')
            if not os.path.exists(label_path):
                continue
            # Read the label file to check the object's class
            with open(label_path, 'r') as label_file:
                line = label_file.readline().strip().split()
                if len(line) == 5:  # YOLO format: class x_center y_center width height
                    class_id = int(line[0])

                    # Get the class name based on class_id
                    if class_id < len(class_names):
                        class_name = class_names[class_id]

                        # Decide whether to put the image in train or val
                        if 'train' in root:
                            destination_folder = os.path.join(image_classification_path, 'train/images', class_name)
                        else:
                            destination_folder = os.path.join(image_classification_path, 'val/images', class_name)

                        # Read the original image
                        image = cv2.imread(image_path)

                        # Extract bounding box coordinates
                        x_center = float(line[1]) * image.shape[1]
                        y_center = float(line[2]) * image.shape[0]
                        width = float(line[3]) * image.shape[1]
                        height = float(line[4]) * image.shape[0]

                        # Calculate bounding box coordinates
                        x1 = int(x_center - width / 2)
                        y1 = int(y_center - height / 2)
                        x2 = int(x_center + width / 2)
                        y2 = int(y_center + height / 2)

                        # Crop the object from the image
                        cropped_object = image[y1:y2, x1:x2]

                        # Save the cropped object as a new image
                        object_filename = os.path.splitext(file)[0] + '.jpeg'
                        object_path = os.path.join(destination_folder, object_filename)
                        cv2.imwrite(object_path, cropped_object)

crop_dataset(yolo_dataset_path)
crop_dataset(yolo_dataset_path_val)

100%|██████████| 9840/9840 [09:55<00:00, 16.52it/s]


## image classification dataset structure

In [5]:
import os
import shutil

# Define the paths and class labels
dataset_path = "/home/saidinesh/Desktop/Projects/yolov5/datasets/"
class_labels = ["aegypti", "albopictus", "anopheles", "culex", "culiseta", "japonicus-koreicus"]
class_count = len(class_labels)

# Define the train and validation directories
train_image_dir = os.path.join(dataset_path, "train/images/")
val_image_dir = os.path.join(dataset_path, "val/images/")
labels_dir = os.path.join(dataset_path, "val/labels/")

# Create class directories if they don't exist
for label in class_labels:
    os.makedirs(os.path.join(train_image_dir, label), exist_ok=True)
    os.makedirs(os.path.join(val_image_dir, label), exist_ok=True)

# Function to parse YOLO annotation files and copy images to the appropriate class folders
def process_annotation(annotation_file, image_path, output_dir):
    with open(annotation_file, "r") as f:
        lines = f.readlines()
        if len(lines) > 0:
            line = lines[0].strip().split()
            if len(line) > 0:
                class_index = int(line[0])
                if class_index >= 0 and class_index < class_count:
                    class_label = class_labels[class_index]
                    output_class_dir = os.path.join(output_dir, class_label)
                    output_image_path = os.path.join(output_class_dir, os.path.basename(image_path))
                    if not os.path.exists(output_image_path):  # Check if the file exists before copying
                        shutil.copy(image_path, output_image_path)

# # Process train images and annotations
# for root, _, files in os.walk(train_image_dir):
#     for file in files:
#         if file.endswith(".jpeg"):
#             image_path = os.path.join(root, file)
#             annotation_path = os.path.join(labels_dir, file.replace(".jpeg", ".txt"))
#             process_annotation(annotation_path, image_path, train_image_dir)

# Process validation images and annotations
for root, _, files in os.walk(val_image_dir):
    for file in files:
        if file.endswith(".jpeg"):
            image_path = os.path.join(root, file)
            annotation_path = os.path.join(labels_dir, file.replace(".jpeg", ".txt"))
            process_annotation(annotation_path, image_path, val_image_dir)

print("Dataset conversion completed.")


Dataset conversion completed.


# Validation creaation

In [5]:
import os
import random
import shutil
from tqdm import tqdm 
random.seed(43)

# Define the path to your dataset folders
train_folder = '/home/saidinesh/Desktop/Projects/yolov5/datasets/crop-datasets/ensemble1/train/'
validation_folder = '/home/saidinesh/Desktop/Projects/yolov5/datasets/crop-datasets/ensemble1/val/'

# Define the percentage of data to move to the validation dataset
validation_percentage = 0.05

# Iterate through each class folder in the train dataset
for class_name in tqdm(os.listdir(train_folder)):

    class_path = os.path.join(train_folder, class_name)
    
    # Get a list of all files (images) in the class folder
    files = os.listdir(class_path)
    print(len(files))
    # Calculate the number of files to move to the validation dataset
    num_files_to_move = int(validation_percentage * len(files))
    
    # Randomly select files to move
    files_to_move = random.sample(files, num_files_to_move)
    
    # Move selected files to the validation dataset
    for file_name in files_to_move:
        file_path = os.path.join(class_path, file_name)
        destination_path = os.path.join(validation_folder, class_name, file_name)
        
        # Create the destination directory if it doesn't exist
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        
        # Move the file
        shutil.move(file_path, destination_path)

print(f"{validation_percentage * 100}% of the images have been moved to the validation dataset.")


 50%|█████     | 3/6 [00:00<00:00, 19.47it/s]

591
45
4381
407
80
4335


100%|██████████| 6/6 [00:00<00:00, 15.95it/s]

5.0% of the images have been moved to the validation dataset.





In [None]:

from ultralytics import RTDETR 
model = RTDETR('runs/detect/rt-detr-l-640/weights/last.pt',) # build from YAML and transfer weights

# Train the model
results = model.train(data='datasets/dataset.yaml',amp=False,
                      cache=True,
                      batch=4,
                       epochs=72, imgsz=640,resume='runs/detect/rt-detr-l-640/weights/last.pt')


# yolov5


In [None]:
# Train YOLOv5s on COCO128 for 3 epochs
%cd /home/saidinesh/Desktop/Projects/yolov5/
!python train.py --img 640 --cfg models/yolo-tph.yaml --batch 2 --epochs 100 --data datasets/dataset.yaml --weights yolov5s.pt --cache --name "yolov5s-tph"

# predictions exploration

In [None]:
import torch
import torchvision

# Run the model on GPU if it is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = torch.hub.load('ultralytics/yolov5', 'custom', 'runs/train/baseline-yolov5s/weights/last.pt')
model.to(device)
model.eval()

In [None]:
!python detect.py --weights runs/train/baseline-yolov5s/weights/last.pt \
    --data datasets/dataset.yaml \
    --source datasets/val/images --save-txt --save-conf \
    --img 640 --half

In [None]:
import fiftyone as fo
name = "my-dataset"
dataset_dir = "datasets/"

# The splits to load
splits = ["train", "val"]
try:
    dataset = fo.load_dataset(name)
    dataset.delete()
except:
    pass
dataset = fo.Dataset(name)    
for split in splits:
    dataset.add_dir(
        dataset_dir=dataset_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        split=split,
        tags=split,
)

# Get some summary information about the dataset
print(dataset.info)
print(dataset.stats)
session = fo.Session(dataset=dataset)

In [5]:
import fiftyone as fo
from PIL import Image
from torchvision.transforms import functional as func

name = "my-dataset"
dataset_dir = "datasets/"

# The splits to load
splits = ["train", "val"]
try:
    dataset = fo.load_dataset(name)
    dataset.delete()
except:
    pass
dataset = fo.Dataset(name)    
for split in splits:
    dataset.add_dir(
        dataset_dir=dataset_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        split=split,
        tags=split,
)

# Get some summary information about the dataset
print('datasetInfo')
print(dataset.info)
# print(dataset.stats)
#session = fo.Session(dataset=dataset)



# Get class list
classes = dataset.default_classes
test_view = dataset.match_tags("val")
# Add predictions to samples
with fo.ProgressBar() as pb:
    for sample in pb(test_view):
        # Load image
        image = Image.open(sample.filepath)

        # Perform inference
        preds = model(image)
        pd  = preds.pandas().xyxy[0]

        image = func.to_tensor(image).to(device)
        c, h, w = image.shape

        detections = []

        for i in pd.values: 
            x1, y1, x2, y2 = i[0],i[1],i[2],i[3]
            rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

            detections.append(
                fo.Detection(
                    label=classes[i[5]],
                    bounding_box=rel_box,
                    confidence=i[4]
                )
            )

        # Save predictions to dataset
        sample["predictions"] = fo.Detections(detections=detections)
        sample.save()

print("Finished adding predictions to the test_view")

 100% |█████████████████████| 0/0 [46.9us elapsed, ? remaining, ? samples/s]  100% |█████████████████████| 0/0 [2.5ms elapsed, ? remaining, ? samples/s]  
 100% |█████████████████████| 0/0 [3.1ms elapsed, ? remaining, ? samples/s]  
datasetInfo
{}
 100% |█████████████████████| 0/0 [5.1ms elapsed, ? remaining, ? samples/s] 
Finished adding predictions to the test_view


In [12]:
test_view.info

{}

In [1]:
# Training CV model with kfold

In [None]:
%cd /home/saidinesh/Desktop/Projects/yolov5/
from ultralytics import YOLO
weights_path = 'runs/detect/yolo-v5s-base_fold_4/weights/last.pt'
results = {}
for k in range(4,5):
    model = YOLO(weights_path, task='detect')
    dataset_yaml = f'datasets/train/2023-08-30_5-Fold_Cross-val/split_{k+1}/split_{k+1}_dataset.yaml' #ds_yamls[k]
    model.train(data=dataset_yaml,name = f'yolo-v5s-base_fold_{k}',device=0,batch=8,resume=True)  # Include any training arguments
    results[k] = model.metrics  # save output metrics for further analysis
    del model

In [None]:
print('Completed Successfully')

In [None]:
import os
import shutil

# Define the paths to the val and train folders
val_folder = '/home/saidinesh/Desktop/Projects/yolov5/datasets/val/images'
train_folder = '/home/saidinesh/Desktop/Projects/yolov5/datasets/train/images'

# Get a list of filenames in both folders
val_files = os.listdir(val_folder)
train_files = os.listdir(train_folder)

# Find common filenames between the two folders
common_files = set(val_files) & set(train_files)

# Move common files from train to val folder
for filename in common_files:
    src_path = os.path.join(train_folder, filename)
    dest_path = os.path.join(val_folder, filename)
    
    # Check if the file already exists in the val folder
    if os.path.exists(dest_path):
        shutil.move(src_path, dest_path)
        print(f"Moved '{filename}' from train to val folder.")
        

print("Done.")
