In [3]:
# Code to remove images from annotation folder

import os
import shutil

def move_image_files(source_dir, destination_dir):
    """
    Move image files from source directory to destination directory.

    Args:
    source_dir (str): Path to the source directory containing files
    destination_dir (str): Path to the destination directory for image files
    """
    # Common image file extensions (case-insensitive)
    image_extensions = [
        '.jpg', '.jpeg', '.png', '.gif',
        '.bmp', '.tiff', '.webp', '.svg',
        '.raw', '.heic', '.avif'
    ]

    # Create destination directory if it doesn't exist
    os.makedirs(destination_dir, exist_ok=True)

    # Counter for moved files
    moved_files_count = 0

    # Iterate through all files in the source directory
    for filename in os.listdir(source_dir):
        # Check if the file has an image extension (case-insensitive)
        if any(filename.lower().endswith(ext) for ext in image_extensions):
            # Construct full file paths
            source_path = os.path.join(source_dir, filename)
            destination_path = os.path.join(destination_dir, filename)

            # Move the file
            try:
                shutil.move(source_path, destination_path)
                moved_files_count += 1
                print(f"Moved: {filename}")
            except Exception as e:
                print(f"Error moving {filename}: {e}")

    print(f"\nTotal image files moved: {moved_files_count}")

def main():
    # Example usage
    source_directory = "C:\\Users\\User2\\Desktop\\train" # Replace with your source directory path
    image_destination_directory = "C:\\Users\\User2\\Desktop\\cowImagesFromRobo"  # Replace with your image destination directory path

    move_image_files(source_directory, image_destination_directory)

if __name__ == "__main__":
    main()


Moved: Event20240705045915003-mp4_frame_0000_jpg.rf.019487fc28d743d7c6eac5c203a5d225.jpg
Moved: Event20240705045915003-mp4_frame_0060_jpg.rf.4ab8c4a8d086dd7efe2ea6c2763bebdc.jpg
Moved: Event20240705045915003-mp4_frame_0120_jpg.rf.38c8e9664fb36e0681cf88f4360a71c5.jpg
Moved: Event20240705045915003-mp4_frame_0180_jpg.rf.5df47f5d067f5abfa1c4b8ac95f138b9.jpg
Moved: Event20240705045915003-mp4_frame_0240_jpg.rf.488a066a20ecc15e71aa12925ed6b1a0.jpg
Moved: Event20240705045915003-mp4_frame_0300_jpg.rf.33667a7403e74f467bb0809fbbc4a24e.jpg
Moved: Event20240705045915003-mp4_frame_0360_jpg.rf.8d4c1f63ba943414d4f25915b816b548.jpg
Moved: Event20240705045915003-mp4_frame_0420_jpg.rf.c07f33d4f9ebaae2ddd721a1fb7291e7.jpg
Moved: Event20240705045915003-mp4_frame_0480_jpg.rf.2e305abfdb2d7702e3328e3524462471.jpg
Moved: Event20240705045915003-mp4_frame_0540_jpg.rf.233a0d9a374738818eee818722b0dd8b.jpg
Moved: Event20240705045915003-mp4_frame_0600_jpg.rf.3618c7e4462dface7185e3f37fa437b8.jpg
Moved: Event202407050

In [5]:
# Code to remove XML files from annotation folder

import os
import shutil

def move_files_by_extension(source_dir, destination_dir, file_extension):
    """
    Move files with a specific extension from source directory to destination directory.

    Args:
    source_dir (str): Path to the source directory containing files
    destination_dir (str): Path to the destination directory for specific file types
    file_extension (str): File extension to move (e.g., '.xml', '.jpg')
    """
    # Create destination directory if it doesn't exist
    os.makedirs(destination_dir, exist_ok=True)

    # Counter for moved files
    moved_files_count = 0

    # Iterate through all files in the source directory
    for filename in os.listdir(source_dir):
        # Check if the file has the specified extension (case-insensitive)
        if filename.lower().endswith(file_extension.lower()):
            # Construct full file paths
            source_path = os.path.join(source_dir, filename)
            destination_path = os.path.join(destination_dir, filename)

            # Move the file
            try:
                shutil.move(source_path, destination_path)
                moved_files_count += 1
                print(f"Moved: {filename}")
            except Exception as e:
                print(f"Error moving {filename}: {e}")

    print(f"\nTotal {file_extension} files moved: {moved_files_count}")

def main():
    # Example usage
    source_directory = "C:\\Users\\User2\\Desktop\\annotations\\Annotations"  # Replace with your source directory path

    # Move XML files
    xml_destination_directory = "C:\\Users\\User2\\Desktop\\cowAnnotationsFromRobo"  # Replace with your XML destination directory path
    move_files_by_extension(source_directory, xml_destination_directory, '.xml')

    # Move JPG files
    jpg_destination_directory = "C:\\Users\\User2\\Desktop\\imagesFromRobo"  # Replace with your JPG destination directory path
    move_files_by_extension(source_directory, jpg_destination_directory, '.jpg')

if __name__ == "__main__":
    main()


Moved: Event20240705045915003-mp4_frame_0000_jpg.rf.019487fc28d743d7c6eac5c203a5d225.xml
Moved: Event20240705045915003-mp4_frame_0060_jpg.rf.4ab8c4a8d086dd7efe2ea6c2763bebdc.xml
Moved: Event20240705045915003-mp4_frame_0120_jpg.rf.38c8e9664fb36e0681cf88f4360a71c5.xml
Moved: Event20240705045915003-mp4_frame_0180_jpg.rf.5df47f5d067f5abfa1c4b8ac95f138b9.xml
Moved: Event20240705045915003-mp4_frame_0240_jpg.rf.488a066a20ecc15e71aa12925ed6b1a0.xml
Moved: Event20240705045915003-mp4_frame_0300_jpg.rf.33667a7403e74f467bb0809fbbc4a24e.xml
Moved: Event20240705045915003-mp4_frame_0360_jpg.rf.8d4c1f63ba943414d4f25915b816b548.xml
Moved: Event20240705045915003-mp4_frame_0420_jpg.rf.c07f33d4f9ebaae2ddd721a1fb7291e7.xml
Moved: Event20240705045915003-mp4_frame_0480_jpg.rf.2e305abfdb2d7702e3328e3524462471.xml
Moved: Event20240705045915003-mp4_frame_0540_jpg.rf.233a0d9a374738818eee818722b0dd8b.xml
Moved: Event20240705045915003-mp4_frame_0600_jpg.rf.3618c7e4462dface7185e3f37fa437b8.xml
Moved: Event202407050

In [3]:
# Cropping Cow Objects

import os
import xml.etree.ElementTree as ET
from PIL import Image

def crop_images_from_annotations(images_dir, annotations_dir, output_dir):
    """
    Crop images based on XML annotations.

    Args:
    images_dir (str): Directory containing JPG images
    annotations_dir (str): Directory containing XML annotation files
    output_dir (str): Directory to save cropped images
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Counter for cropped images
    cropped_images_count = 0

    # Iterate through XML files in the annotations directory
    for annotation_filename in os.listdir(annotations_dir):
        if not annotation_filename.lower().endswith('.xml'):
            continue

        # Construct full paths
        annotation_path = os.path.join(annotations_dir, annotation_filename)

        try:
            # Parse XML annotation
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # Extract image filename (assuming it's in the filename tag)
            image_filename = root.findtext('.//filename')
            if not image_filename:
                # If no filename tag, use XML filename (without .xml extension)
                image_filename = os.path.splitext(annotation_filename)[0] + '.jpg'

            # Construct full image path
            image_path = os.path.join(images_dir, image_filename)

            # Check if image exists
            if not os.path.exists(image_path):
                print(f"Image not found: {image_path}")
                continue

            # Open the image
            with Image.open(image_path) as img:
                # Find all bounding boxes in the XML
                for obj in root.findall('.//object'):
                    # Extract bounding box coordinates
                    bndbox = obj.find('bndbox')
                    if bndbox is None:
                        continue

                    # Parse coordinates (converting to integers)
                    xmin = int(float(bndbox.findtext('xmin')))
                    ymin = int(float(bndbox.findtext('ymin')))
                    xmax = int(float(bndbox.findtext('xmax')))
                    ymax = int(float(bndbox.findtext('ymax')))

                    # Get object class/name for filename
                    obj_name = obj.findtext('name', 'unknown')

                    # Crop the image
                    cropped_img = img.crop((xmin, ymin, xmax, ymax))

                    # Generate output filename
                    output_filename = f"{os.path.splitext(image_filename)[0]}_{obj_name}_{xmin}_{ymin}_{xmax}_{ymax}.jpg"
                    output_path = os.path.join(output_dir, output_filename)

                    # Save cropped image
                    cropped_img.save(output_path)
                    cropped_images_count += 1
                    print(f"Cropped: {output_filename}")

        except Exception as e:
            print(f"Error processing {annotation_filename}: {e}")

    print(f"\nTotal images cropped: {cropped_images_count}")


def main():
    # Specify directories
    images_directory = "C:\\Users\\User2\\Downloads\\Latest_Cow Headbutting and Drinking_dataset\\test\\images"   # Replace with path to folder containing JPG files
    annotations_directory = "C:\\Users\\User2\\Downloads\\Latest_Cow Headbutting and Drinking_dataset\\test\\labels" # Replace with path to folder containing XML files
    output_directory = "C:\\Users\\User2\\Desktop\\test_cow_objects"  # Replace with path to save cropped images

    # Run the cropping process
    crop_images_from_annotations(images_directory, annotations_directory, output_directory)

if __name__ == "__main__":
    main()


Total images cropped: 0


In [29]:
#Cropping cow object from dataset downloaded in yoloV8 format

import os
from PIL import Image

def crop_yolo_objects(images_dir, labels_dir, output_dir, class_id_to_crop):
    """
    Crop cow objects from YOLOv8 annotations (format: class x_center y_center width height).

    Args:
    images_dir (str): Path to folder with .jpg or .png images
    labels_dir (str): Path to folder with corresponding .txt annotations
    output_dir (str): Where to save cropped images
    class_id_to_crop (int): Class ID to extract (e.g., 2 for "cow")
    """
    os.makedirs(output_dir, exist_ok=True)
    count = 0

    for label_file in os.listdir(labels_dir):
        if not label_file.endswith(".txt"):
            continue

        label_path = os.path.join(labels_dir, label_file)
        image_filename = os.path.splitext(label_file)[0] + ".jpg"
        image_path = os.path.join(images_dir, image_filename)

        if not os.path.exists(image_path):
            print(f"⚠️ Image not found for {label_file}")
            continue

        with open(label_path, 'r') as f:
            lines = f.readlines()

        with Image.open(image_path) as img:
            img_w, img_h = img.size

            for idx, line in enumerate(lines):
                parts = line.strip().split()
                if len(parts) != 5:
                    continue  # skip malformed lines

                class_id, x_center, y_center, width, height = map(float, parts)

                if int(class_id) != class_id_to_crop:
                    continue

                # Convert normalized YOLO to pixel box coordinates
                x_center *= img_w
                y_center *= img_h
                width *= img_w
                height *= img_h

                x1 = int(x_center - width / 2)
                y1 = int(y_center - height / 2)
                x2 = int(x_center + width / 2)
                y2 = int(y_center + height / 2)

                cropped = img.crop((x1, y1, x2, y2))

                output_filename = f"{os.path.splitext(image_filename)[0]}_cow_{idx}.jpg"
                cropped.save(os.path.join(output_dir, output_filename))
                count += 1

                print(f"✅ Cropped cow: {output_filename}")

    print(f"\n🎉 Done! Total cows cropped: {count}")

# Example usage
if __name__ == "__main__":
    images_folder = "C:\\Users\\User2\\Downloads\\Latest_Cow Headbutting and Drinking_dataset\\valid\\images"   # Replace with path to folder containing JPG files
    labels_folder = "C:\\Users\\User2\\Downloads\\Latest_Cow Headbutting and Drinking_dataset\\valid\\labels" # Replace with path to folder containing XML files
    output_folder = "C:\\Users\\User2\\Desktop\\valid_cow_objects"  # Replace with path to save cropped images
    cow_class_id = 1  # Replace with correct ID from data.yaml

    crop_yolo_objects(images_folder, labels_folder, output_folder, cow_class_id)


✅ Cropped cow: Event20240705045915003_mp4_frame_0120_jpg.rf.9a4a6a40532b3e86cdd60ddd2ca88b07_cow_1.jpg
✅ Cropped cow: Event20240705045915003_mp4_frame_1560_jpg.rf.a5ffd5e9448d19a9680abe10f3ac0a5c_cow_0.jpg
✅ Cropped cow: Event20240705045915003_mp4_frame_2760_jpg.rf.ccc87578b1bb25f6c993346b29f44fd5_cow_1.jpg
✅ Cropped cow: Event20240705050311006_mp4_frame_1440_jpg.rf.ff49427188a641501a2f0c2d34c1fc9b_cow_1.jpg
✅ Cropped cow: Event20240705050311006_mp4_frame_1680_jpg.rf.7ada21b2e9a79cc9a02e7c8b8ce7a025_cow_1.jpg
✅ Cropped cow: Event20240705050415003_mp4_frame_0000_jpg.rf.83252c1cadd1e083e3dc65aee7f70a08_cow_0.jpg
✅ Cropped cow: Event20240705050811002_mp4_frame_0540_jpg.rf.7c881b3289d6b47a4d9353bacea0294d_cow_0.jpg
✅ Cropped cow: Event20240705050811002_mp4_frame_1500_jpg.rf.4f5e30d6be3e8e9edd6bec14b0909936_cow_0.jpg
✅ Cropped cow: Event20240705051415003_mp4_frame_0360_jpg.rf.6ccedf43fb1d602f32d065887db01382_cow_0.jpg
✅ Cropped cow: Event20240705051415003_mp4_frame_0360_jpg.rf.6ccedf43fb1d6

In [17]:
# Cropping all classes: brush, cow, cow_head, water tub

import os
from PIL import Image

def crop_yolo_objects(images_dir, labels_dir, output_dir, selected_classes=[0, 1, 2, 3]):
    """
    Crop and save objects from images based on YOLOv8 annotations for selected classes.

    Args:
        images_dir (str): Directory containing image files (JPG/PNG).
        labels_dir (str): Directory containing .txt label files (YOLO format).
        output_dir (str): Directory to save cropped object images.
        selected_classes (list): List of class indices to crop. Example: [1, 2] for 'cow' and 'cow_head'.
    """
    os.makedirs(output_dir, exist_ok=True)
    cropped_count = 0

    for filename in os.listdir(labels_dir):
        if not filename.endswith(".txt"):
            continue

        label_path = os.path.join(labels_dir, filename)
        image_base = os.path.splitext(filename)[0]
        image_path_jpg = os.path.join(images_dir, image_base + ".jpg")
        image_path_png = os.path.join(images_dir, image_base + ".png")

        image_path = image_path_jpg if os.path.exists(image_path_jpg) else image_path_png
        if not os.path.exists(image_path):
            print(f"⚠️ Image file not found for: {filename}")
            continue

        try:
            with Image.open(image_path) as img:
                width, height = img.size

                with open(label_path, 'r') as file:
                    for idx, line in enumerate(file):
                        parts = line.strip().split()
                        if len(parts) != 5:
                            continue

                        class_id = int(parts[0])
                        if class_id not in selected_classes:
                            continue

                        x_center, y_center, w, h = map(float, parts[1:])
                        x1 = int((x_center - w / 2) * width)
                        y1 = int((y_center - h / 2) * height)
                        x2 = int((x_center + w / 2) * width)
                        y2 = int((y_center + h / 2) * height)

                        x1, y1 = max(0, x1), max(0, y1)
                        x2, y2 = min(width, x2), min(height, y2)

                        cropped = img.crop((x1, y1, x2, y2))
                        class_name = f"class{class_id}"
                        output_filename = f"{image_base}_{class_name}_{idx}.jpg"
                        output_path = os.path.join(output_dir, output_filename)
                        cropped.save(output_path)
                        cropped_count += 1

        except Exception as e:
            print(f"❌ Error processing {image_path}: {e}")

    print(f"\n✅ Cropping complete. Total cropped objects: {cropped_count}")


if __name__ == "__main__":
    # ✅ EDIT THESE PATHS
    images_dir = r"C:\Users\User2\Downloads\Latest_Cow Headbutting and Drinking_dataset\valid\images"
    labels_dir = r"C:\Users\User2\Downloads\Latest_Cow Headbutting and Drinking_dataset\valid\labels"
    output_dir = r"C:\Users\User2\Desktop\headbutt_drinking_brushing_cropped_dataset\valid"
    # ✅ EDIT CLASS INDICES (e.g., 0: brush, 1: cow, 2: cow_head, 3: watertub)
    selected_classes = [0, 1, 2, 3]  # Example: only cow and cow_head

    crop_yolo_objects(images_dir, labels_dir, output_dir, selected_classes)



✅ Cropping complete. Total cropped objects: 1021


In [7]:
pip install --force-reinstall tensorflow streamlit numba pywavelets contourpy


Collecting tensorflow
  Using cached tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting numba
  Downloading numba-0.61.2-cp312-cp312-win_amd64.whl.metadata (2.9 kB)
Collecting pywavelets
  Downloading pywavelets-1.8.0-cp312-cp312-win_amd64.whl.metadata (9.0 kB)
Collecting contourpy
  Downloading contourpy-1.3.2-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Using

  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mdit-py-plugins 0.3.0 requires markdown-it-py<3.0.0,>=1.0.0, but you have markdown-it-py 3.0.0 which is incompatible.
torchaudio 2.4.1 requires torch==2.4.1, but you have torch 2.6.0 which is incompatible.


In [15]:
!pip install matplotlib==3.10.0


Collecting matplotlib==3.10.0
  Using cached matplotlib-3.10.0-cp312-cp312-win_amd64.whl.metadata (11 kB)
Using cached matplotlib-3.10.0-cp312-cp312-win_amd64.whl (8.0 MB)
Installing collected packages: matplotlib
Successfully installed matplotlib-3.10.0


In [17]:
!pip install numpy==1.26.4



Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.1.3
    Uninstalling numpy-2.1.3:
      Successfully uninstalled numpy-2.1.3
Successfully installed numpy-1.26.4


In [9]:
pip install notebook ipykernel ipympl matplotlib --upgrade


Collecting notebook
  Downloading notebook-7.4.0-py3-none-any.whl.metadata (10 kB)
Collecting ipykernel
  Using cached ipykernel-6.29.5-py3-none-any.whl.metadata (6.3 kB)
Collecting ipympl
  Downloading ipympl-0.9.7-py3-none-any.whl.metadata (8.7 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.1-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting jupyterlab-server<3,>=2.27.1 (from notebook)
  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)
Collecting jupyterlab<4.5,>=4.4.0rc0 (from notebook)
  Downloading jupyterlab-4.4.0-py3-none-any.whl.metadata (16 kB)
Collecting matplotlib-inline>=0.1 (from ipykernel)
  Using cached matplotlib_inline-0.1.7-py3-none-any.whl.metadata (3.9 kB)
Collecting fqdn (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook)
  Downloading fqdn-1.5.1-py3-none-any.whl.metadata (1.4 kB)
Collecting isoduration (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<

  You can safely remove it manually.


In [7]:
pip install --upgrade threadpoolctl scikit-learn


Collecting threadpoolctl
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl (11.1 MB)
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------  11.0/11.1 MB 173.4 MB/s eta 0:00:01
   ---------------------------------------- 11.1/11.1 MB 31.5 MB/s eta 0:00:00
Installing collected packages: threadpoolctl, scikit-learn
  Attempting uninstall: threadpoolctl
    Found existing installation: threadpoolctl 2.2.0
    Uninstalling threadpoolctl-2.2.0:
      Successfully uninstalled threadpoolctl-2.2.0
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.4.2
    Uninstalling scikit-learn-1.4.2:
      Successfully uninstalled scikit-learn-1.4.2
Successfully installed scikit-learn-

  You can safely remove it manually.


In [9]:
# Clustering Images

import os
import cv2
import shutil
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from skimage.feature import local_binary_pattern
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"


# Parameters for LBP (Local Binary Pattern)
RADIUS = 3
N_POINTS = 24
METHOD = 'uniform'
HIST_BINS = 64  # Color histogram bins

def extract_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"❌ Failed to read image: {image_path}")
        return None

    try:
        # Preprocessing
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.equalizeHist(gray)

        # LBP feature
        lbp = local_binary_pattern(gray, N_POINTS, RADIUS, METHOD)
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, N_POINTS + 3), range=(0, N_POINTS + 2))

        # Color histograms
        chans = cv2.split(image)
        features = []
        for chan in chans:
            hist = cv2.calcHist([chan], [0], None, [HIST_BINS], [0, 256])
            features.extend(hist.flatten())

        # Normalize & combine
        combined = np.concatenate([lbp_hist, np.array(features)]).flatten()
        return combined / (lbp_hist.sum() + 1e-6)
    except Exception as e:
        print(f"❌ Error processing {image_path}: {e}")
        return None

def organize_images(clusters, image_paths, output_dir):
    for cluster_id, image_path in zip(clusters, image_paths):
        cluster_dir = os.path.join(output_dir, f"cluster_{cluster_id}")
        os.makedirs(cluster_dir, exist_ok=True)
        shutil.copy(image_path, os.path.join(cluster_dir, os.path.basename(image_path)))

def clear_directory(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)

def main():
    image_dir = r"C:\Users\User2\Desktop\headbutt_cropped_cow_objects\cow_objects"
    output_dir = r"C:\Users\User2\Desktop\headbutt_cropped_cow_objects\cow_clusters"

    clear_directory(output_dir)

    image_paths = [
        os.path.join(image_dir, f) for f in os.listdir(image_dir)
        if f.lower().endswith(('.jpg', '.png'))
    ]

    print(f"🔍 Found {len(image_paths)} images.")

    valid_image_paths = []
    features = []
    for path in image_paths:
        feat = extract_features(path)
        if feat is not None:
            valid_image_paths.append(path)
            features.append(feat)

    if not features:
        print("⚠️ No valid images with features found. Exiting.")
        return

    # Normalize features
    scaler = StandardScaler()
    scaled = scaler.fit_transform(features)

    # PCA (Optional for dimensionality reduction)
    pca = PCA(n_components=0.95)  # Keep 95% variance
    reduced = pca.fit_transform(scaled)

    print(f"📉 Reduced to {reduced.shape[1]} features using PCA.")

    # KMeans clustering
    k = 80  # You can change this number based on your dataset
    kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
    clusters = kmeans.fit_predict(reduced)

    # Organize images
    organize_images(clusters, valid_image_paths, output_dir)

    print(f"✅ Clustering complete. Images saved to {output_dir}")

if __name__ == "__main__":
    main()


# import cv2
# import os
# import numpy as np
# import shutil
# from sklearn.cluster import KMeans
# from sklearn.preprocessing import StandardScaler
# from scipy.spatial import distance
# from skimage.feature import local_binary_pattern
# from sklearn.decomposition import PCA
# import matplotlib.pyplot as plt
# # matplotlib.use("TkAgg")  # or "TkAgg" if you're in a GUI environment


# # Parameters for LBP
# RADIUS = 3
# N_POINTS = 24
# METHOD = 'uniform'
# HIST_BINS = 64  # for color histogram

# def extract_features(image_path):
#     image = cv2.imread(image_path)

#     # Check if image is read correctly
#     if image is None:
#         print(f"Failed to read image: {image_path}")
#         return []

#     # Preprocessing: Histogram Equalization
#     img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     img_gray = cv2.equalizeHist(img_gray)
#     #local binary pattern
#     lbp = local_binary_pattern(img_gray, N_POINTS, RADIUS, METHOD)
#     lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, N_POINTS + 3), range=(0, N_POINTS + 2))

#     # Color histograms
#     chans = cv2.split(image)
#     colors = ("b", "g", "r")
#     features = []

#     for (chan, color) in zip(chans, colors):
#         hist = cv2.calcHist([chan], [0], None, [HIST_BINS], [0, 256])
#         features.extend(hist.flatten())  # Flatten the histogram here

#     return np.concatenate([lbp_hist, np.asarray(features)]).flatten() / (lbp_hist.sum() + 1e-6)

# def organize_images(clusters, image_paths, output_dir):#organizes images into clusters
#     for cluster_id, image_path in zip(clusters, image_paths):
#         cluster_path = os.path.join(output_dir, f"cluster_{cluster_id}")
#         os.makedirs(cluster_path, exist_ok=True)
#         image_name = os.path.basename(image_path)
#         #os.rename(image_path, os.path.join(cluster_path, image_name))
#         shutil.copy(image_path, os.path.join(cluster_path, image_name))
# def clear_directory(dir_path):
#     if os.path.exists(dir_path):
#         shutil.rmtree(dir_path)
#     os.makedirs(dir_path)
# def visualize_pca_variance(pca):#This function is intended for visualizing the explained variance of PCA components but is currently commented out.
#     explained_var_ratio = pca.explained_variance_ratio_
#     plt.figure(figsize=(10, 5))
#     plt.bar(range(len(explained_var_ratio)), explained_var_ratio, alpha=0.5, align='center', label='individual explained variance')
#     plt.ylabel('Explained variance ratio')
#     plt.xlabel('Principal components')
#     plt.legend(loc='best')
#     plt.tight_layout()
#     plt.show()

# def main():
#     image_dir = "C:\\Users\\User2\\Desktop\\headbutt_cropped_cow_objects\\cow_objects"
#     output_dir = "C:\\Users\\User2\\Desktop\\headbutt_cropped_cow_objects\\cow_clusters"

#     #clear_directory(output_dir)
#     # Check if the image directory exists
#     if not os.path.exists(image_dir):
#         print(f"Image directory does not exist: {image_dir}")
#         return

#     image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir) if fname.endswith(('.png', '.jpg'))]

#     print("Image Paths:", image_paths)

#     features = [extract_features(img_path) for img_path in image_paths]
#     features = [f for f in features if len(f) != 0]

#     if not features:
#         print("No valid features extracted. Exiting.")
#         return
#     # Standardization
#     scaler = StandardScaler()
#     scaled_features = scaler.fit_transform(features)
#     #PCA (Principal Component Analysis)
#     pca = PCA()
#     reduced_features = pca.fit_transform(scaled_features)

#     # Visualize explained variance of PCA components
#     #visualize_pca_variance(pca)

#     # Use KMeans++ initialization
#     kmeans = KMeans(n_clusters=150, init='k-means++', random_state=42)
#     clusters = kmeans.fit_predict(reduced_features)

#     organize_images(clusters, image_paths, output_dir)


#     """pca = PCA(n_components=25)  # changed PCA components
#     reduced_features = pca.fit_transform(features)

#     kmeans = KMeans(n_clusters=5, random_state=42)  # clustering into more groups
#     clusters = kmeans.fit_predict(reduced_features)

#     organize_images(clusters, image_paths, output_dir)"""

# if __name__ == "__main__":
#     main()

🔍 Found 2141 images.
📉 Reduced to 27 features using PCA.
✅ Clustering complete. Images saved to C:\Users\User2\Desktop\headbutt_cropped_cow_objects\cow_clusters


In [11]:
#Splitting Dataset into Train, Test, Validate

import os
import shutil
import random

def split_folder_contents(folder_path, output_base, ratios=(0.6, 0.2, 0.2)):
    for subfolder in os.listdir(folder_path):
        full_path = os.path.join(folder_path, subfolder)
        if os.path.isdir(full_path):
            files = [f for f in os.listdir(full_path) if os.path.isfile(os.path.join(full_path, f))]
            random.shuffle(files)  # Shuffle for random split

            # Ensure each subset has at least one file
            n = max(1, len(files))
            train_end = int(n * ratios[0])
            test_end = train_end + max(1, int(n * ratios[1]))

            train_files = files[:train_end]
            test_files = files[train_end:test_end]
            val_files = files[test_end:]

            # Function to handle file copy/move
            def handle_files(files, target_folder):
                target_path = os.path.join(output_base, target_folder, subfolder)
                os.makedirs(target_path, exist_ok=True)
                for file in files:
                    shutil.copy(os.path.join(full_path, file), os.path.join(target_path, file))

            # Handle each subset
            handle_files(train_files, 'train')
            handle_files(test_files, 'test')
            handle_files(val_files, 'val')

            print(f"Processed {subfolder}: {len(train_files)} train, {len(test_files)} test, {len(val_files)} val")

# Usage

source_folder = "C:\\Users\\User2\\Desktop\\headbutt_cropped_cow_objects\\cow_clusters"  # Replace with your folder path
output_folder = "C:\\Users\\User2\\Desktop\\headbutt_cropped_cow_objects\\splitted_cow_clusters"  # Replace with your output folder path
split_folder_contents(source_folder, output_folder)

Processed cluster_0: 23 train, 7 test, 9 val
Processed cluster_1: 13 train, 4 test, 6 val
Processed cluster_10: 18 train, 6 test, 7 val
Processed cluster_11: 10 train, 3 test, 4 val
Processed cluster_12: 21 train, 7 test, 7 val
Processed cluster_13: 16 train, 5 test, 6 val
Processed cluster_14: 15 train, 5 test, 6 val
Processed cluster_15: 24 train, 8 test, 9 val
Processed cluster_16: 21 train, 7 test, 7 val
Processed cluster_17: 14 train, 4 test, 6 val
Processed cluster_18: 13 train, 4 test, 5 val
Processed cluster_19: 18 train, 6 test, 7 val
Processed cluster_2: 19 train, 6 test, 7 val
Processed cluster_20: 33 train, 11 test, 12 val
Processed cluster_21: 16 train, 5 test, 6 val
Processed cluster_22: 6 train, 2 test, 2 val
Processed cluster_23: 36 train, 12 test, 13 val
Processed cluster_24: 19 train, 6 test, 8 val
Processed cluster_25: 21 train, 7 test, 8 val
Processed cluster_26: 13 train, 4 test, 6 val
Processed cluster_27: 13 train, 4 test, 5 val
Processed cluster_28: 12 train, 4 