In [None]:
import os
import cv2
from ultralytics import YOLO

# Load the default pretrained YOLOv8 model
model = YOLO("yolov8n.pt")  # Use the default YOLOv8n model

# Function to process a single image
def process_image(image_path, output_path, model):
    # Read the image
    frame = cv2.imread(image_path)

    # Run inference on the frame
    results = model.predict(frame)

    # Add bounding boxes to the frame
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Draw bounding box and label
            color = (0, 255, 0)  # Green color for bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, class_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    # Save the processed image
    cv2.imwrite(output_path, frame)

# Process all images in the folder
def process_images_in_folder(input_folder, output_folder, model):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for image_filename in os.listdir(input_folder):
        input_image_path = os.path.join(input_folder, image_filename)
        output_image_path = os.path.join(output_folder, image_filename)

        # Process each image
        process_image(input_image_path, output_image_path, model)

# Define input and output folder paths
input_folder_path = "/content/drive/MyDrive/resize"  # Replace with your input folder path
output_folder_path = "/content/drive/MyDrive/resize/detect"  # Replace with your output folder path

# Process the images in the folder
process_images_in_folder(input_folder_path, output_folder_path, model)


## mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Pakages

In [None]:
!pip install ultralytics==8.0.196


Collecting ultralytics==8.0.196
  Downloading ultralytics-8.0.196-py3-none-any.whl.metadata (31 kB)
Collecting thop>=0.1.1 (from ultralytics==8.0.196)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics==8.0.196)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics==8.0.196)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics==8.0.196)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.8.0->ultralytics==8.0.196)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>

###stator


In [None]:
!unzip '/content/drive/MyDrive/stator2.zip'

Archive:  /content/drive/MyDrive/stator2.zip
   creating: images/
   creating: labels/
  inflating: classes.txt             
  inflating: notes.json              
  inflating: images/005d367c-stator_130.jpg  
  inflating: images/007abaaf-stator_9.jpg  
  inflating: images/021b1a0a-stator_259.jpg  
  inflating: images/02c72f8f-stator_18.jpg  
  inflating: images/02d25124-stator_189.jpg  
  inflating: images/05cc8b3c-stator_175.jpg  
  inflating: images/0625c6b8-stator_124.jpg  
  inflating: images/0660ff21-stator_171.jpg  
  inflating: images/07cc06a4-stator_53.jpg  
  inflating: images/07e43399-stator_290.jpg  
  inflating: images/07e7112d-stator_86.jpg  
  inflating: images/0859e02b-stator_255.jpg  
  inflating: images/086e8c4c-stator_46.jpg  
  inflating: images/09009d66-stator_161.jpg  
  inflating: images/0b688603-stator_233.jpg  
  inflating: images/0c947fb0-stator_22.jpg  
  inflating: images/0d12affd-stator_284.jpg  
  inflating: images/0ef3ccbf-stator_120.jpg  
  inflating: ima

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define folder paths (Adjust these paths as needed)
base_folder = 'stator'
images_folder = os.path.join(base_folder, '/content/images')
labels_folder = os.path.join(base_folder, '/content/labels')

# Create directories for training and validation sets
val_images_folder = os.path.join(base_folder, 'val', 'images')
val_labels_folder = os.path.join(base_folder, 'val', 'labels')
train_images_folder = os.path.join(base_folder, 'train', 'images')
train_labels_folder = os.path.join(base_folder, 'train', 'labels')

os.makedirs(val_images_folder, exist_ok=True)
os.makedirs(val_labels_folder, exist_ok=True)
os.makedirs(train_images_folder, exist_ok=True)
os.makedirs(train_labels_folder, exist_ok=True)

# Get list of image files and ensure corresponding labels exist
image_files = [f for f in os.listdir(images_folder) if f.endswith('.jpg')]
label_files = [f for f in os.listdir(labels_folder) if f.endswith('.txt')]

# Ensure that the images and labels files match
image_names = {os.path.splitext(f)[0] for f in image_files}
label_names = {os.path.splitext(f)[0] for f in label_files}

if image_names != label_names:
    raise ValueError("The image and label files do not match.")

# Split files into train and validation sets
image_files = sorted(image_files)  # Sorting to ensure consistent splitting
train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)

# Move files to the appropriate directories
def move_files(file_names, src_image_folder, src_label_folder, dst_image_folder, dst_label_folder):
    for file_name in file_names:
        base_name = os.path.splitext(file_name)[0]
        # Move image
        shutil.copy(os.path.join(src_image_folder, file_name), os.path.join(dst_image_folder, file_name))
        # Move corresponding label
        label_file = base_name + '.txt'
        if label_file in label_files:
            shutil.copy(os.path.join(src_label_folder, label_file), os.path.join(dst_label_folder, label_file))
        else:
            print(f"Warning: No corresponding label file for {file_name}")

move_files(val_files, images_folder, labels_folder, val_images_folder, val_labels_folder)
move_files(train_files, images_folder, labels_folder, train_images_folder, train_labels_folder)

print("Files have been successfully organized into training and validation sets.")


Files have been successfully organized into training and validation sets.


In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from YAML
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
model = YOLO("yolov8n.yaml").load("yolov8n.pt")  # build from YAML and transfer weights

# Train the model
results = model.train(data="/content/stator/data.yaml", optimizer="Adam", save=True, batch=2, lr0 = 0.01 , epochs=250,  imgsz=640, name = "002" , patience = 20)


                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128

In [None]:
!yolo predict model=//content/runs/detect/02/weights/best.pt source='/content/episode_6.mp4' save=True


In [None]:
!cp -r /content/your_folder /content/drive/My\ Drive/your_folder_copy


## preprocessing

Rainy or stormy weather effects

IR effects


In [None]:
!pip install opencv-python-headless



In [None]:
# in directory conversion

import cv2
import numpy as np
import os
from google.colab.patches import cv2_imshow

def simulate_detailed_infrared(image_path, output_path):
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        print("Failed to load image:", image_path)
        return
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

    # Apply Gaussian Blur
    blur = cv2.GaussianBlur(img, (3,3), 0)

    # Use Laplacian to enhance edges with reduced effect
    laplacian = cv2.Laplacian(blur, cv2.CV_16S, ksize=3)
    laplacian = cv2.convertScaleAbs(laplacian)

    # Blend the original image with the enhanced edges more conservatively
    alpha = 0.3  # Lower blending factor to reduce edge effect
    enhanced_img = cv2.addWeighted(img, 1 - alpha, laplacian, alpha, 0)

    # Apply localized blurring in corners
    mask = np.zeros_like(enhanced_img, dtype='uint8')
    cv2.rectangle(mask, (0, 0), (mask.shape[1], mask.shape[0]), 255, -1)
    cv2.ellipse(mask, (mask.shape[1]//2, mask.shape[0]//2), (mask.shape[1]//2, mask.shape[0]//2), 0, 0, 360, 0, -1)
    smoothed_corners = cv2.GaussianBlur(enhanced_img, (21, 21), 30)
    enhanced_img = np.where(mask, smoothed_corners, enhanced_img)

    # Adjust contrast and brightness slightly
    contrast_enhanced_img = cv2.convertScaleAbs(enhanced_img, alpha=1.2, beta=10)

    # Save the processed image
    cv2.imwrite(output_path, contrast_enhanced_img)
    print("Processed and saved:", output_path)

def process_directory(directory_path, output_directory):
    # Check and create output directory if it does not exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Process each file in the directory
    for filename in os.listdir(directory_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):  # check for image files
            file_path = os.path.join(directory_path, filename)
            output_path = os.path.join(output_directory, filename)
            simulate_detailed_infrared(file_path, output_path)

# Specify the directory containing the images and the directory to save processed images
directory_path = '/content/drive/MyDrive/nvyyolo/hyp'
output_directory = '/content/drive/MyDrive/nvyyolo/hyp/infrared'
process_directory(directory_path, output_directory)


In [None]:
import cv2
import numpy as np
import os

def simulate_detailed_infrared(image_path, output_path):
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        print("Failed to load image:", image_path)
        return

    original_shape = img.shape  # Store original dimensions

    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

    # Apply Gaussian Blur
    blur = cv2.GaussianBlur(img, (3,3), 0)

    # Use Laplacian to enhance edges with reduced effect
    laplacian = cv2.Laplacian(blur, cv2.CV_16S, ksize=3)
    laplacian = cv2.convertScaleAbs(laplacian)

    # Blend the original image with the enhanced edges more conservatively
    alpha = 0.3  # Lower blending factor to reduce edge effect
    enhanced_img = cv2.addWeighted(img, 1 - alpha, laplacian, alpha, 0)

    # Prevent complete blacks by mapping lower intensity values to a minimum threshold
    min_intensity = 50  # Adjust this value to set how dark the darkest parts can be
    enhanced_img[enhanced_img < min_intensity] = min_intensity

    # Apply localized blurring in corners to smooth out edges
    mask = np.zeros_like(enhanced_img, dtype='uint8')
    cv2.rectangle(mask, (0, 0), (mask.shape[1], mask.shape[0]), 255, -1)
    cv2.ellipse(mask, (mask.shape[1]//2, mask.shape[0]//2), (mask.shape[1]//2, mask.shape[0]//2), 0, 0, 360, 0, -1)
    smoothed_corners = cv2.GaussianBlur(enhanced_img, (21, 21), 30)
    enhanced_img = np.where(mask, smoothed_corners, enhanced_img)

    # Adjust contrast and brightness
    contrast_enhanced_img = cv2.convertScaleAbs(enhanced_img, alpha=1.1, beta=20)

    # Ensure the output image has the same size as the original
    contrast_enhanced_img = cv2.resize(contrast_enhanced_img, (original_shape[1], original_shape[0]))

    # Save the processed image
    cv2.imwrite(output_path, contrast_enhanced_img)
    print("Processed and saved:", output_path)

def process_directory(directory_path):
    output_directory = os.path.join(directory_path, 'infrared')  # Create a subfolder for the outputs
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Process each file in the directory
    for filename in os.listdir(directory_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):  # check for image files
            file_path = os.path.join(directory_path, filename)
            output_path = os.path.join(output_directory, filename)
            simulate_detailed_infrared(file_path, output_path)

# Specify the directory containing the images
directory_path = '/content/drive/MyDrive/nvyyolo/data13/val/images'
process_directory(directory_path)


video processing

In [None]:
import cv2
import numpy as np

def simulate_detailed_infrared(frame):
    # Assume frame is already in BGR format as it comes from a video
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3,3), 0)
    laplacian = cv2.Laplacian(blur, cv2.CV_16S, ksize=3)
    laplacian = cv2.convertScaleAbs(laplacian)
    enhanced_img = cv2.addWeighted(gray, 1 - 0.3, laplacian, 0.3, 0)
    enhanced_img[enhanced_img < 50] = 50
    mask = np.zeros_like(enhanced_img, dtype='uint8')
    cv2.ellipse(mask, (mask.shape[1]//2, mask.shape[0]//2), (mask.shape[1]//2, mask.shape[0]//2), 0, 0, 360, 0, -1)
    smoothed_corners = cv2.GaussianBlur(enhanced_img, (21, 21), 30)
    enhanced_img = np.where(mask, smoothed_corners, enhanced_img)
    contrast_enhanced_img = cv2.convertScaleAbs(enhanced_img, alpha=1.1, beta=20)
    # Convert back to BGR for video writer compatibility
    final_img = cv2.cvtColor(contrast_enhanced_img, cv2.COLOR_GRAY2BGR)
    return final_img

def process_video(video_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error opening video stream or file")
        return

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        processed_frame = simulate_detailed_infrared(frame)
        out.write(processed_frame)

    # Properly close video files to make sure all frames are saved.
    cap.release()
    out.release()
    print("Video processing complete. Check the output directory for the modified video.")



# Example usage
video_path = '/content/drive/MyDrive/final_demo.mp4'
output_video_path = '/content/drive/MyDrive/final_demoIR.mp4'
process_video(video_path, output_video_path)


Video processing complete. Check the output directory for the modified video.


## training


In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from YAML
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
model = YOLO("yolov8n.yaml").load("yolov8n.pt")  # build from YAML and transfer weights

# Train the model
results = model.train(data="/content/ir/data.yaml", epochs=70,  imgsz=640)

for infrared dataset

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from YAML
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
model = YOLO("yolov8n.yaml").load("yolov8n.pt")  # build from YAML and transfer weights

# Train the model
results = model.train(data="/content/drive/MyDrive/nvyyolo/data13/data.yaml", epochs=200,  imgsz=640,optimizer="SGD",weight_decay= 0.0005, save=True, batch=8, lr0 = 0.0001 , name = "fast" , patience = 15)

In [None]:
!cp -r '/content/runs' '/content/drive/MyDrive/'

## predictions

In [None]:
!cp -r '/content/drive/MyDrive/demo/ship_testing_video/storm/Storm - HD stock video - Getty Images.mp4' '/content/drive/MyDrive/test'

In [None]:
!yolo detect predict model=/content/drive/MyDrive/nvyyolo/model/weights/best.pt source='/content/drive/MyDrive/test/Storm - HD stock video - Getty Images.mp4'

In [None]:
!cp -r '/content/runs/' '/content/drive/MyDrive/nvyyolo/model/IR-model/'

for rainy dataset

## details ading

### Simple model

In [None]:
import os
from ultralytics import YOLO
import cv2

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/nvyyolo/model/detect/fast/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'size': '1 meter'},
    'container': {'weight': '24000KG', 'size': '6 meters'},
    'container ship': {'weight': '1000000KG', 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'size': '20 meters'}
}

# Define lighter colors for each class
class_colors = {
    'buoy': (255, 102, 102),
    'container': (102, 255, 102),
    'container ship': (255, 0, 0),
    'ferry boat': (255, 255, 102),
    'lifeboat': (128, 0, 128),
    'lighthouse': (255, 165, 0),
    'passenger ferries': (204, 153, 255),
    'sailboat': (255, 255, 153),
    'unknown': (231, 84, 128),
    'yatch': (102, 255, 255)
}

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    seen_classes = {}
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if class_name in seen_classes and seen_classes[class_name][0] > conf:
                continue
            seen_classes[class_name] = (conf, x1, y1, x2, y2)

    # Draw the highest confidence boxes
    for class_name, (conf, x1, y1, x2, y2) in seen_classes.items():
        color = class_colors.get(class_name, (255, 255, 255))
        display_name = "boat" if class_name == "ferry boat" else class_name
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        if class_name in additional_details:
            detail_text_weight = f"Weight: {additional_details[class_name]['weight']}"
            detail_text_size = f"Size: {additional_details[class_name]['size']}"
            cv2.putText(frame, detail_text_weight, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_size, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Process all videos in the input folder function remains the same
# Process all videos in the input folder
def process_videos_in_folder(input_folder, output_folder, model, details):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in os.listdir(input_folder):
        if video_file.endswith(".mp4"):
            input_video_path = os.path.join(input_folder, video_file)
            output_video_path = os.path.join(output_folder, video_file)
            process_video(input_video_path, output_video_path, model, details)
#

# Define input and output video folder paths
input_folder_path = "/content/drive/MyDrive/nvyyolo/test_ship_videos/infrared_type"
output_folder_path = "/content/drive/MyDrive/demo/ship_testing_video/infrared"

# Process all videos in the folder
process_videos_in_folder(input_folder_path, output_folder_path, model, additional_details)


details ading according to confidence


In [None]:
/content/drive/MyDrive/nvyyolo/model/IR-model/detect/fast/weights/best.pt

IR model

In [None]:
import os
from ultralytics import YOLO
import cv2

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/nvyyolo/model/IR-model/detect/fast/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'size': '1 meter'},
    'container': {'weight': '24000KG', 'size': '6 meters'},
    'cruise ship': {'weight': '1000000KG', 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'size': '20 meters'}
}

# Define lighter colors for each class
class_colors = {
    'buoy': (255, 102, 102),
    'container': (102, 255, 102),
    'cruise ship': (255, 0, 0),
    'ferry boat': (255, 255, 102),
    'lifeboat': (128, 0, 128),
    'lighthouse': (255, 165, 0),
    'passenger ferries': (204, 153, 255),
    'sailboat': (255, 255, 153),
    'unknown': (231, 84, 128),
    'yatch': (102, 255, 255)
}

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    seen_classes = {}
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if class_name in seen_classes and seen_classes[class_name][0] > conf:
                continue
            seen_classes[class_name] = (conf, x1, y1, x2, y2)

    # Draw the highest confidence boxes if confidence is 50% or higher
    for class_name, (conf, x1, y1, x2, y2) in seen_classes.items():
        if conf < 0.5:  # Check if confidence is lower than 50%
            continue

        color = class_colors.get(class_name, (255, 255, 255))
        display_name = "boat" if class_name == "ferry boat" else class_name
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        if class_name in additional_details:
            detail_text_weight = f"Weight: {additional_details[class_name]['weight']}"
            detail_text_size = f"Size: {additional_details[class_name]['size']}"
            cv2.putText(frame, detail_text_weight, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_size, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Process all videos in the input folder
def process_videos_in_folder(input_folder, output_folder, model, details):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in os.listdir(input_folder):
        if video_file.endswith(".mp4"):
            input_video_path = os.path.join(input_folder, video_file)
            output_video_path = os.path.join(output_folder, video_file)
            process_video(input_video_path, output_video_path, model, details)

# Define input and output video folder paths
input_folder_path = "/content/IR.com).mp4"
output_folder_path = "/content/video3.mp4"

# Process all videos in the folder
process_videos_in_folder(input_folder_path, output_folder_path, model, additional_details)


rainy model

In [None]:
!yolo detect predict model=/content/runs/detect/train2/weights/best.pt source='col.mp4'


Ultralytics YOLOv8.0.196 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8n summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs

video 1/1 (1/1143) /content/col.mp4: 512x640 2 container ships, 94.0ms
video 1/1 (2/1143) /content/col.mp4: 512x640 2 container ships, 6.8ms
video 1/1 (3/1143) /content/col.mp4: 512x640 2 container ships, 6.8ms
video 1/1 (4/1143) /content/col.mp4: 512x640 2 container ships, 9.4ms
video 1/1 (5/1143) /content/col.mp4: 512x640 2 container ships, 8.3ms
video 1/1 (6/1143) /content/col.mp4: 512x640 2 container ships, 7.1ms
video 1/1 (7/1143) /content/col.mp4: 512x640 2 container ships, 7.8ms
video 1/1 (8/1143) /content/col.mp4: 512x640 2 container ships, 7.3ms
video 1/1 (9/1143) /content/col.mp4: 512x640 2 container ships, 6.2ms
video 1/1 (10/1143) /content/col.mp4: 512x640 2 container ships, 6.2ms
video 1/1 (11/1143) /content/col.mp4: 512x640 2 container ships, 6.4ms
video 1/1 (12/1143) /content/col.mp4: 512x640 2 container

## main latest details

In [None]:
#sailboat
import os
import cv2
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/nvyyolo/model/detect/fast/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'height': 1.0, 'size': '1 meter'},
    'container': {'weight': '24000KG', 'height': 2.5, 'size': '6 meters'},
    'cruise ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'height': 15.0, 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'height': 2.5, 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'height': 30.0, 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'height': 20.0, 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'height': 4.0, 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'height': 10.0, 'size': '20 meters'},
    'boat': {'weight': '2000KG', 'height': 5.0, 'size': '15 meters'}
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if conf < 0.4:
                continue

            # Ignore frames where the predicted class is "airplane"
            if class_name == "airplane":
                continue

            # Proceed with frames where the predicted class is "boat"
            if class_name == "boat":
                color = (255, 153, 51)  # Color for boat class
                display_name = "boat"
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    detail_text_name = f"Object name: {class_name}"
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"
                    cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

            # Proceed with frames where the predicted class is "sailboat"
            elif class_name == "sailboat":
                color = (255, 255, 153)  # Color for sailboat class
                display_name = "sailboat"
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    detail_text_name = f"Object name: {class_name}"
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"
                    cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    if not out.isOpened():
        print(f"Error: Could not open output video file {output_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Define input and output video paths
input_video_path = "/content/sail(online_video_cutter).mp4"
output_video_path = "/content/sail_pre.mp4"  # Replace with your output video path

# Process the single video
process_video(input_video_path, output_video_path, model, additional_details)


In [None]:
import os
import cv2
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/nvyyolo/model/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'height': 1.0, 'size': '1 meter'},
    'container': {'weight': '24000KG', 'height': 2.5, 'size': '6 meters'},
    'cruise ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'height': 15.0, 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'height': 2.5, 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'height': 30.0, 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'height': 20.0, 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'height': 4.0, 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'height': 10.0, 'size': '20 meters'},
    'boat': {'weight': '2000KG', 'height': 5.0, 'size': '15 meters'}
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if conf < 0.4:
                continue

            # Ignore frames where the predicted class is "airplane"
            if class_name == "airplane":
                continue

            # Proceed with frames where the predicted class is "boat"
            if class_name == "boat":
                color = (255, 153, 51)  # Color for boat class
                display_name = "boat"
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    detail_text_name = f"Object name: {class_name}"
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"
                    cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    if not out.isOpened():
        print(f"Error: Could not open output video file {output_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Define input and output video paths
input_video_path = "/content/baot.mp4"  # Replace with your input video path
output_video_path = "/content/baot_pred.mp4"  # Replace with your output video path

# Process the single video
process_video(input_video_path, output_video_path, model, additional_details)


In [None]:
    colors = [(0, 255, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255), (0, 255, 255)]


In [None]:
import cv2
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/detection-model/runs/detect/train2/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'container ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    colors = [(0, 255, 255),     (255, 255, 0), (255, 0, 0),  (255, 255, 0), (255, 0, 255), (0, 255, 255)]
    color_index = 0

    detected_objects = 0

    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if conf < 0.4:
                continue

            # Annotate any detected object as "container ship"
            if class_name == "container ship":
                color = colors[color_index % len(colors)]
                color_index += 1
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    # Round distance to three decimal places
                    if distance != 'N/A':
                        distance = round(distance, 3)

                    # Text size
                    text_size = 0.5

                    # Detail texts
                    detail_text_name = f"Object name: {class_name}"
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"

                    # Determine text position
                    if detected_objects == 0:
                        text_y_position = y1 - 55  # Top
                    else:
                        text_y_position = y2 + 5  # Bottom

                    # Draw details
                    cv2.putText(frame, detail_text_name, (x1, text_y_position), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_weight, (x1, text_y_position + 15), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_height, (x1, text_y_position + 30), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_distance, (x1, text_y_position + 45), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_size, (x1, text_y_position + 60), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)

                detected_objects += 1

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    if not out.isOpened():
        print(f"Error: Could not open output video file {output_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Define input and output video paths
input_video_path = "/content/col.mp4"  # Replace with your input video path
output_video_path = "/content/drive/MyDrive/detection-model/video.mp4"  # Replace with your output video path

# Process the single video
process_video(input_video_path, output_video_path, model, additional_details)



0: 512x640 2 container ships, 11.5ms
Speed: 2.5ms preprocess, 11.5ms inference, 1.9ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 9.4ms
Speed: 3.2ms preprocess, 9.4ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 9.6ms
Speed: 3.0ms preprocess, 9.6ms inference, 4.1ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.2ms
Speed: 4.6ms preprocess, 6.2ms inference, 1.5ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.2ms
Speed: 3.0ms preprocess, 6.2ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 7.0ms
Speed: 2.9ms preprocess, 7.0ms inference, 1.4ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 1.2ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 8.2ms
Speed: 3.0ms preprocess, 8.2ms

In [None]:
import cv2
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/detection-model/runs/detect/train2/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'container ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    colors = [(255, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0)]  # All green
    color_index = 0

    detected_objects = 0

    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if conf < 0.4:
                continue

            # Annotate any detected object as "container ship"
            if class_name == "container ship":
                color = colors[color_index % len(colors)]
                color_index += 1
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    # Round distance to three decimal places
                    if distance != 'N/A':
                        distance = round(distance, 3)

                    # Text size
                    text_size = 0.6

                    # Detail texts
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"

                    # Determine text position
                    if detected_objects == 0:
                        # First detected object: Top-left corner inside the bounding box
                        text_x_position = x1
                        text_y_position = y1
                    else:
                        # Second detected object: Top-right corner inside the bounding box
                        text_x_position = x2 - 200
                        text_y_position = y1

                    # Draw details
                    cv2.putText(frame, detail_text_weight, (text_x_position, text_y_position), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_height, (text_x_position, text_y_position + 15), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_distance, (text_x_position, text_y_position + 30), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)
                    cv2.putText(frame, detail_text_size, (text_x_position, text_y_position + 45), cv2.FONT_HERSHEY_SIMPLEX, text_size, color, 1)

                detected_objects += 1

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    if not out.isOpened():
        print(f"Error: Could not open output video file {output_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Define input and output video paths
input_video_path = "/content/col.mp4"  # Replace with your input video path
output_video_path = "/content/video2.mp4"  # Replace with your output video path

# Process the single video
process_video(input_video_path, output_video_path, model, additional_details)

# import cv2
# from ultralytics import YOLO

# # Load a pretrained YOLOv8n model
# model = YOLO("/content/runs/detect/train3/weights/best.pt")

# # Dictionary with additional details for each class
# additional_details = {
#     'container ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
#     'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
# }

# # Focal length of the camera (in pixels)
# FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# # Function to add additional details to a frame
# def add_details_to_frame(frame, results, details):
#     for result in results:
#         boxes = result.boxes
#         for box in boxes:
#             x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
#             conf = box.conf[0]
#             class_idx = int(box.cls[0])
#             class_name = result.names[class_idx]

#             # Filter by highest confidence if the object has been detected already
#             if conf < 0.4:
#                 continue

#             # Annotate any detected object as "container ship"
#             if class_name == "container ship":
#                 color = (0, 255, 255)  # Color for container ship class
#                 display_name = "container ship"
#                 cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
#                 cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

#                 if class_name in details:
#                     height_in_pixels = y2 - y1
#                     real_world_height = details[class_name]['height']
#                     distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

#                     # Reduce the size of text to half
#                     text_size = 0.4

#                     detail_text_name = f"Object name: {class_name}"
#                     detail_text_weight = f"Weight: {details[class_name]['weight']}"
#                     detail_text_height = f"Height: {details[class_name]['height']} meters"
#                     detail_text_distance = f"Distance from the cam: {distance} meters"
#                     detail_text_size = f"Size: {details[class_name]['size']}"
#                     cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 1)
#                     cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 1)
#                     cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 1)
#                     cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 1)
#                     cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 1)

#     return frame

# # Process video
# def process_video(video_path, output_path, model, details):
#     cap = cv2.VideoCapture(video_path)
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
#     out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

#     if not out.isOpened():
#         print(f"Error: Could not open output video file {output_path}")
#         return

#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break

#         # Run inference on the frame
#         results = model.predict(frame)

#         # Add details to the frame
#         frame = add_details_to_frame(frame, results, details)

#         # Write the frame to the output video
#         out.write(frame)

#     cap.release()
#     out.release()

# # Define input and output video paths
# input_video_path = "/content/col.mp4"  # Replace with your input video path
# output_video_path = "/content/video2.mp4"  # Replace with your output video path

# # Process the single video
# process_video(input_video_path, output_video_path, model, additional_details)



0: 512x640 2 container ships, 10.8ms
Speed: 3.1ms preprocess, 10.8ms inference, 3.1ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 9.9ms
Speed: 3.1ms preprocess, 9.9ms inference, 2.6ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 10.5ms
Speed: 3.1ms preprocess, 10.5ms inference, 2.1ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.2ms
Speed: 2.6ms preprocess, 6.2ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.6ms
Speed: 2.8ms preprocess, 6.6ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.5ms
Speed: 2.9ms preprocess, 6.5ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.6ms
Speed: 3.5ms preprocess, 6.6ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.8ms
Speed: 2.2ms preprocess, 6.8

auto distance

In [None]:
import os
from ultralytics import YOLO
import cv2

# Load a pretrained YOLOv8n model
model = YOLO("yolov8n.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'height': 1.0, 'size': '1 meter'},
    'container': {'weight': '24000KG', 'height': 2.5, 'size': '6 meters'},
    'cruise ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'height': 15.0, 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'height': 2.5, 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'height': 30.0, 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'height': 20.0, 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'height': 4.0, 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'height': 10.0, 'size': '20 meters'}
}

# Define lighter colors for each class
class_colors = {
    'buoy': (255, 102, 102),
    'container': (102, 255, 102),
    'cruise ship': (255, 0, 0),
    'ferry boat': (255, 255, 102),
    'lifeboat': (128, 0, 128),
    'lighthouse': (255, 165, 0),
    'passenger ferries': (204, 153, 255),
    'sailboat': (255, 255, 153),
    'unknown': (231, 84, 128),
    'yatch': (102, 255, 255)
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    seen_classes = {}
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if class_name in seen_classes and seen_classes[class_name][0] > conf:
                continue
            seen_classes[class_name] = (conf, x1, y1, x2, y2)

    # Draw the highest confidence boxes if confidence is 40% or higher
    for class_name, (conf, x1, y1, x2, y2) in seen_classes.items():
        if conf < 0.4:
            continue

        color = class_colors.get(class_name, (255, 255, 255))
        display_name = "boat" if class_name == "ferry boat" else class_name
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        if class_name in additional_details:
            height_in_pixels = y2 - y1
            real_world_height = additional_details[class_name]['height']
            distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

            detail_text_name = f"Object name: {class_name}"
            detail_text_weight = f"Weight: {additional_details[class_name]['weight']}"
            detail_text_height = f"Height: {additional_details[class_name]['height']} meters"
            detail_text_distance = f"Distance from the cam: {distance} meters"
            detail_text_size = f"Size: {additional_details[class_name]['size']}"
            cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Process all videos in the input folder
def process_videos_in_folder(input_folder, output_folder, model, details):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_file in os.listdir(input_folder):
        if video_file.endswith(".mp4"):
            input_video_path = os.path.join(input_folder, video_file)
            output_video_path = os.path.join(output_folder, video_file)
            process_video(input_video_path, output_video_path, model, details)

# Define input and output video folder paths
input_folder_path = "/content/drive/MyDrive/test"
output_folder_path = "/content/drive/MyDrive/test/pred4/"

# Process all videos in the folder
process_videos_in_folder(input_folder_path, output_folder_path, model, additional_details)


### simple model
/content/drive/MyDrive/nvyyolo/model/weights/best.pt
### rainy model\
/content/drive/MyDrive/nvyyolo/model/detect/fast/weights/best.pt
### IR model

In [None]:
import os
import cv2
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("/content/runs/detect/train3/weights/best.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'height': 1.0, 'size': '1 meter'},
    'container': {'weight': '24000KG', 'height': 2.5, 'size': '6 meters'},
    'container ships': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'height': 15.0, 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'height': 2.5, 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'height': 30.0, 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'height': 20.0, 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'height': 4.0, 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'height': 10.0, 'size': '20 meters'},
    'boat': {'weight': '2000KG', 'height': 5.0, 'size': '15 meters'}
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if conf < 0.4:
                continue

            # Ignore frames where the predicted class is "airplane"
            if class_name == "airplane":
                continue

            # Proceed with frames where the predicted class is "boat"
            if class_name == "boat":
                color = (255, 153, 51)  # Color for boat class
                display_name = "boat"
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    detail_text_name = f"Object name: {class_name}"
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"
                    cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

            # Proceed with frames where the predicted class is "sailboat"
            elif class_name == "sailboat":
                color = (255, 255, 153)  # Color for sailboat class
                display_name = "sailboat"
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

                if class_name in details:
                    height_in_pixels = y2 - y1
                    real_world_height = details[class_name]['height']
                    distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

                    detail_text_name = f"Object name: {class_name}"
                    detail_text_weight = f"Weight: {details[class_name]['weight']}"
                    detail_text_height = f"Height: {details[class_name]['height']} meters"
                    detail_text_distance = f"Distance from the cam: {distance} meters"
                    detail_text_size = f"Size: {details[class_name]['size']}"
                    cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    if not out.isOpened():
        print(f"Error: Could not open output video file {output_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Define input and output video paths
input_video_path = "/content/col.mp4"  # Replace with your input video path
output_video_path = "/content/video3.mp4"  # Replace with your output video path

# Process the single video
process_video(input_video_path, output_video_path, model, additional_details)



0: 512x640 2 container ships, 12.1ms
Speed: 3.2ms preprocess, 12.1ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 11.3ms
Speed: 3.5ms preprocess, 11.3ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 11.2ms
Speed: 3.4ms preprocess, 11.2ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.5ms
Speed: 3.8ms preprocess, 6.5ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.5ms
Speed: 2.4ms preprocess, 6.5ms inference, 1.9ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.7ms
Speed: 3.9ms preprocess, 6.7ms inference, 2.3ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 6.6ms
Speed: 2.3ms preprocess, 6.6ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 2 container ships, 10.0ms
Speed: 3.5ms preprocess, 

In [None]:
import os
from ultralytics import YOLO
import cv2

# Load a pretrained YOLOv8n model
model = YOLO("yolov8n.pt")

# Dictionary with additional details for each class
additional_details = {
    'buoy': {'weight': '100KG', 'height': 1.0, 'size': '1 meter'},
    'container': {'weight': '24000KG', 'height': 2.5, 'size': '6 meters'},
    'cruise ship': {'weight': '1000000KG', 'height': 30.0, 'size': '300 meters'},
    'ferry boat': {'weight': '50000KG', 'height': 15.0, 'size': '50 meters'},
    'lifeboat': {'weight': '2000KG', 'height': 2.5, 'size': '8 meters'},
    'lighthouse': {'weight': '500000KG', 'height': 30.0, 'size': '30 meters'},
    'passenger ferries': {'weight': '70000KG', 'height': 20.0, 'size': '70 meters'},
    'sailboat': {'weight': '300KG', 'height': 4.0, 'size': '3 meters'},
    'unknown': {'weight': 'N/A', 'height': 'N/A', 'size': 'N/A'},
    'yatch': {'weight': '10000KG', 'height': 10.0, 'size': '20 meters'},
    'boat': {'weight': '2000KG', 'height': 5.0, 'size': '15 meters'}  # Added boat class
}

# Define lighter colors for each class
class_colors = {
    'buoy': (255, 102, 102),
    'container': (102, 255, 102),
    'cruise ship': (255, 0, 0),
    'ferry boat': (255, 255, 102),
    'lifeboat': (128, 0, 128),
    'lighthouse': (255, 165, 0),
    'passenger ferries': (204, 153, 255),
    'sailboat': (255, 255, 153),
    'unknown': (231, 84, 128),
    'yatch': (102, 255, 255),
    'boat': (255, 153, 51)  # Added color for boat class
}

# Focal length of the camera (in pixels)
FOCAL_LENGTH = 800  # Example value, replace with your camera's focal length

# Function to add additional details to a frame
def add_details_to_frame(frame, results, details):
    seen_classes = {}
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            conf = box.conf[0]
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Filter by highest confidence if the object has been detected already
            if class_name in seen_classes and seen_classes[class_name][0] > conf:
                continue
            seen_classes[class_name] = (conf, x1, y1, x2, y2)

    # Draw the highest confidence boxes if confidence is 40% or higher
    for class_name, (conf, x1, y1, x2, y2) in seen_classes.items():
        if conf < 0.4:
            continue

        color = class_colors.get(class_name, (255, 255, 255))
        display_name = "boat" if class_name == "ferry boat" else class_name
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, display_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        if class_name in additional_details:
            height_in_pixels = y2 - y1
            real_world_height = additional_details[class_name]['height']
            distance = (real_world_height * FOCAL_LENGTH) / height_in_pixels if real_world_height != 'N/A' else 'N/A'

            detail_text_name = f"Object name: {class_name}"
            detail_text_weight = f"Weight: {additional_details[class_name]['weight']}"
            detail_text_height = f"Height: {additional_details[class_name]['height']} meters"
            detail_text_distance = f"Distance from the cam: {distance} meters"
            detail_text_size = f"Size: {additional_details[class_name]['size']}"
            cv2.putText(frame, detail_text_name, (x1, y2 + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_weight, (x1, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_height, (x1, y2 + 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_distance, (x1, y2 + 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, detail_text_size, (x1, y2 + 125), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

# Process video
def process_video(video_path, output_path, model, details):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
    out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    if not out.isOpened():
        print(f"Error: Could not open output video file {output_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(frame)

        # Add details to the frame
        frame = add_details_to_frame(frame, results, details)

        # Write the frame to the output video
        out.write(frame)

    cap.release()
    out.release()

# Define input and output video paths
input_video_path = "/content/vid.com).mp4"
output_video_path = "/content/video2.mp4"

# Process the single video
process_video(input_video_path, output_video_path, model, additional_details)


Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 73.1MB/s]

0: 384x640 1 boat, 313.3ms
Speed: 23.7ms preprocess, 313.3ms inference, 35.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 146.9ms
Speed: 4.4ms preprocess, 146.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 154.2ms
Speed: 5.1ms preprocess, 154.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 144.9ms
Speed: 2.7ms preprocess, 144.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 142.4ms
Speed: 2.6ms preprocess, 142.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 145.9ms
Speed: 4.4ms preprocess, 145.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 162.5ms
Sp

# Segmentation

## distribution

In [None]:
%cd '/content/drive/MyDrive/foods'

/content/drive/MyDrive/foods


In [None]:
!gdown '1Y3XrvmkPUxptNYkP0ko15czjvJ25O4yZ'

Downloading...
From (original): https://drive.google.com/uc?id=1Y3XrvmkPUxptNYkP0ko15czjvJ25O4yZ
From (redirected): https://drive.google.com/uc?id=1Y3XrvmkPUxptNYkP0ko15czjvJ25O4yZ&confirm=t&uuid=1e354692-48e9-4ef1-8972-45fd2ab31e65
To: /content/food_dataset.zip
100% 518M/518M [00:14<00:00, 35.2MB/s]


In [None]:
!unzip '/content/food_dataset.zip'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: food_dataset/labels/969fc653-coffee_111.txt  
  inflating: food_dataset/labels/96a01634-mozzarella_cheese_97.txt  
  inflating: food_dataset/labels/96a2316d-cocktail_102.txt  
  inflating: food_dataset/labels/96b2ab83-trail_mix_48.txt  
  inflating: food_dataset/labels/96b7b2c4-cucumber_214.txt  
  inflating: food_dataset/labels/96b8482f-grapes_178.txt  
  inflating: food_dataset/labels/96beb9cd-elderberry_5.txt  
  inflating: food_dataset/labels/96c56f9e-buckwheat_202.txt  
  inflating: food_dataset/labels/96c5b519-lentil_217.txt  
  inflating: food_dataset/labels/96c61fde-smoothie_206.txt  
  inflating: food_dataset/labels/96cb7827-almonds_124.txt  
  inflating: food_dataset/labels/96ccbff4-cake_63.txt  
  inflating: food_dataset/labels/96dad520-carrot_56.txt  
  inflating: food_dataset/labels/96dc8051-mozzarella_cheese_203.txt  
  inflating: food_dataset/labels/96e310c9-orange_172.txt  
  inflating: food_d

to make data half of 100 images

In [None]:
import os
import shutil
import random
from pathlib import Path

# Define paths
source_images = Path("/content/food_dataset/images")
source_labels = Path("/content/food_dataset/labels")
destination = Path("/content/drive/MyDrive/data/dataset1")

# Define the number of files for each split
num_train = 5000
num_val = 1300
num_test = 300

# Ensure the destination directories exist
for split in ['train', 'val', 'test']:
    (destination / split / 'images').mkdir(parents=True, exist_ok=True)
    (destination / split / 'labels').mkdir(parents=True, exist_ok=True)

# Get all image files
image_files = list(source_images.glob("*.jpg"))

# Shuffle the files
random.shuffle(image_files)

# Check if we have enough images
total_files = len(image_files)
if total_files < (num_train + num_val + num_test):
    raise ValueError(f"Not enough images. Available: {total_files}, Required: {num_train + num_val + num_test}")

# Select the number of files for each split
train_files = image_files[:num_train]
val_files = image_files[num_train:num_train + num_val]
test_files = image_files[num_train + num_val:num_train + num_val + num_test]

def copy_files(files, split):
    for file in files:
        # Copy image
        shutil.copy(file, destination / split / 'images' / file.name)
        # Copy corresponding label
        label_file = source_labels / (file.stem + '.txt')
        if label_file.exists():
            shutil.copy(label_file, destination / split / 'labels' / label_file.name)

# Copy the files to their respective directories
copy_files(train_files, 'train')
copy_files(val_files, 'val')
copy_files(test_files, 'test')

print("Dataset organized successfully.")


Dataset organized successfully.


distribution of the data

In [None]:
import os
import shutil
from random import seed, shuffle

def split_data(base_dir, train_ratio=0.7, val_ratio=0.2):
    """
    Splits the data into training, validation, and test sets.

    :param base_dir: The base directory where 'images' and 'labels' folders are located.
    :param train_ratio: The ratio of data to be used for training (default is 0.7).
    :param val_ratio: The ratio of data to be used for validation (default is 0.2).
    """
    seed(101)  # For reproducible random shuffling
    images_dir = os.path.join(base_dir, 'images')
    labels_dir = os.path.join(base_dir, 'labels')

    # Create directories for the splits
    split_dirs = {
        'train': {'images': os.path.join(base_dir, 'train', 'images'),
                  'labels': os.path.join(base_dir, 'train', 'labels')},
        'val': {'images': os.path.join(base_dir, 'val', 'images'),
                'labels': os.path.join(base_dir, 'val', 'labels')},
        'test': {'images': os.path.join(base_dir, 'test', 'images'),
                 'labels': os.path.join(base_dir, 'test', 'labels')}
    }

    for split in split_dirs:
        os.makedirs(split_dirs[split]['images'], exist_ok=True)
        os.makedirs(split_dirs[split]['labels'], exist_ok=True)

    # List all images and shuffle them
    images = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    shuffle(images)

    # Calculate split indices
    num_images = len(images)
    num_train = int(num_images * train_ratio)
    num_val = int(num_images * val_ratio)
    num_test = num_images - num_train - num_val

    train_images = images[:num_train]
    val_images = images[num_train:num_train + num_val]
    test_images = images[num_train + num_val:]

    # Move the images and labels
    for split, image_files in zip(['train', 'val', 'test'], [train_images, val_images, test_images]):
        for image_file in image_files:
            shutil.move(os.path.join(images_dir, image_file), split_dirs[split]['images'])
            label_file = image_file.replace(image_file.split('.')[-1], 'txt')  # Assuming label extension is .txt
            shutil.move(os.path.join(labels_dir, label_file), split_dirs[split]['labels'])

    print(f"Moved {len(train_images)} to training, {len(val_images)} to validation, and {len(test_images)} to test.")

# Example usage:
base_dir = '/content/drive/MyDrive/foods/food_dataset'
split_data(base_dir)


Moved 8546 to training, 2441 to validation, and 1222 to test.


## Training

In [None]:
%cd /content/drive/MyDrive/data/results

/content/drive/MyDrive/data/results


In [None]:
!yolo detect train data='/content/drive/MyDrive/data/dataset1/data.yaml' model=yolov8n-seg.pt epochs=100 imgsz=640 val= True optimizer="AdamW" weight_decay= 0.0001 save=True batch=4 lr0 = 1E-3 name = "f3" patience = 8


New https://pypi.org/project/ultralytics/8.2.58 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.196 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=/content/drive/MyDrive/data/dataset1/data.yaml, epochs=100, patience=8, batch=4, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=f3, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None,

In [None]:
!yolo detect predict model=/content/drive/MyDrive/foods/dataset/runs/segment/train2/weights/best.pt source='/content/drive/MyDrive/foods/dataset/test'

In [None]:
import os
from ultralytics import YOLO
import cv2

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/foods/best.pt")

# Process image
def process_image(image_path, output_path, model):
    # Read the image
    frame = cv2.imread(image_path)

    # Run inference on the frame
    results = model.predict(frame)

    # Draw the highest confidence boxes
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            class_idx = int(box.cls[0])
            class_name = result.names[class_idx]

            # Draw bounding box
            color = (255, 255, 255)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, class_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    # Save the output image
    cv2.imwrite(output_path, frame)

# Process all images in the input folder
def process_images_in_folder(input_folder, output_folder, model):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for image_file in os.listdir(input_folder):
        if image_file.lower().endswith((".png", ".jpg", ".jpeg")):
            input_image_path = os.path.join(input_folder, image_file)
            output_image_path = os.path.join(output_folder, image_file)
            process_image(input_image_path, output_image_path, model)

# Define input and output image folder paths
input_folder_path = "/content/drive/MyDrive/foods/food_dataset/images"
output_folder_path = "/content/drive/MyDrive/foods/results/test1"

# Process all images in the folder
process_images_in_folder(input_folder_path, output_folder_path, model)


In [None]:
import os
from ultralytics import YOLO
import cv2

# Load a pretrained YOLOv8 model
model = YOLO("/content/drive/MyDrive/foods/best.pt")

# Process and save image with bounding boxes
def process_and_save_image(result, output_path):
    # Get image from result
    img = result.orig_img

    # Draw bounding boxes
    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        class_idx = int(box.cls[0])
        class_name = result.names[class_idx]

        # Draw bounding box and label
        color = (255, 255, 255)
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
        cv2.putText(img, class_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    # Save the output image
    cv2.imwrite(output_path, img)

# Define paths
input_folder_path = "/content/drive/MyDrive/foods/food_dataset/images"
output_folder_path = "/content/drive/MyDrive/foods/results/test"

if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Run inference on the source
results = model(input_folder_path, stream=True)

# Process and save each image
for result in results:
    image_filename = os.path.basename(result.path)
    output_image_path = os.path.join(output_folder_path, image_filename)
    process_and_save_image(result, output_image_path)

print(f"Detection results saved in {output_folder_path}")



image 1/2 /content/drive/MyDrive/foods/food_dataset/images/25f1b305-nuts_walnuts_walnuts.webp: 640x640 1 class_3, 1 class_60, 465.6ms
image 2/2 /content/drive/MyDrive/foods/food_dataset/images/f46d6e68-nuts_walnuts_walnuts_1.webp: 640x640 5 class_60s, 436.3ms
Speed: 4.7ms preprocess, 450.9ms inference, 22.7ms postprocess per image at shape (1, 3, 640, 640)


Detection results saved in /content/drive/MyDrive/foods/results/test


In [None]:
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("/content/drive/MyDrive/foods/best.pt")

# Define path to the image file
source = "/content/drive/MyDrive/foods/food_dataset/images/f46d6e68-nuts_walnuts_walnuts_1.webp"

# Run inference on the source
results = model(source)  # list of Results objects


image 1/1 /content/drive/MyDrive/foods/food_dataset/images/f46d6e68-nuts_walnuts_walnuts_1.webp: 640x640 5 class_60s, 323.9ms
Speed: 2.5ms preprocess, 323.9ms inference, 18.8ms postprocess per image at shape (1, 3, 640, 640)


In [None]:
%cd '/content/'

/content


In [None]:
!yolo detect predict model=/content/drive/MyDrive/foods/best.pt source='/content/drive/MyDrive/foods/food_dataset/images/f46d6e68-nuts_walnuts_walnuts_1.webp'

Ultralytics YOLOv8.0.196 🚀 Python-3.10.12 torch-2.3.0+cu121 CPU (Intel Xeon 2.20GHz)
YOLOv8n-seg summary (fused): 195 layers, 3286426 parameters, 0 gradients, 12.1 GFLOPs

image 1/1 /content/drive/MyDrive/foods/food_dataset/images/f46d6e68-nuts_walnuts_walnuts_1.webp: 640x640 5 class_60s, 395.3ms
Speed: 31.2ms preprocess, 395.3ms inference, 34.8ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/segment/predict[0m
💡 Learn more at https://docs.ultralytics.com/modes/predict
