In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
!pip install scikit-learn
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

def load_and_preprocess_image(dataset_paths, scaler):
    """Loads and preprocesses a single image.

    Args:
        image_path (str): Path to the image file.
        scaler (sklearn.preprocessing.StandardScaler): Scaler used for preprocessing.

    Returns:
        ndarray: Preprocessed image data.
    """
    image = cv2.imread(dataset_paths, cv2.IMREAD_GRAYSCALE)
    if image is not None:
        image = cv2.resize(image, (100, 100))
        image = image.flatten()
        scaler.fit([image])  # Fit the scaler with the image data
        image = scaler.transform([image])
    return image

def load_and_preprocess_dataset(dataset_paths):
    """Loads and preprocesses a dataset of images.

    Args:
        dataset_paths (list): List of paths to the image files.
        scaler (sklearn.preprocessing.StandardScaler): Scaler used for preprocessing.

    Returns:
        np.ndarray: A NumPy array containing preprocessed images.
    """

    dataset_features = []
    num_processed_images = 0

    scaler = StandardScaler()  # Initialize the scaler

    for path in dataset_paths:
        image = load_and_preprocess_image(path, scaler)  # Pass the scaler argument
        if image is not None:
            dataset_features.append(image)
            num_processed_images += 1
        else:
            print(f"Error: Skipping image {path}")  # Log specific errors (optional)

    print(f"{num_processed_images} images successfully preprocessed.")

    if len(dataset_features) == 0:
        raise ValueError("Dataset is empty. Please ensure images are loaded correctly.")

    return np.array(dataset_features)

def apply_kmeans(dataset, k):
    """Trains a KMeans clustering model on the provided dataset.

    Args:
        dataset (np.ndarray): The preprocessed dataset features.
        k (int): The number of clusters to use in KMeans.

    Returns:
        sklearn.cluster.KMeans: The trained KMeans model.
    """

    kmeans = KMeans(n_clusters=k, random_state=0)
    kmeans.fit(dataset)
    return kmeans

def detect_defect(dataset_paths, kmeans_model, scaler):
    """Detects defects in images using the trained KMeans model.

    Args:
        dataset_paths (list): List of paths to the image files.
        kmeans_model (sklearn.cluster.KMeans): The trained KMeans model.
        scaler (sklearn.preprocessing.StandardScaler): The scaler used for preprocessing.

    Returns:
        list: List of defect messages for each image.
    """

    defect_messages = []
    if isinstance(dataset_paths, list):
        for path in dataset_paths:
            message = detect_single_defect(path, kmeans_model, scaler)
            defect_messages.append(message)
    else:
        message = detect_single_defect(dataset_paths, kmeans_model, scaler)
        defect_messages.append(message)
    return defect_messages

def detect_single_defect(dataset_paths, kmeans_model, scaler):
    """Detects defect in a single image using the trained KMeans model.

    Args:
        image_path (str): Path to the image file.
        kmeans_model (sklearn.cluster.KMeans): Trained KMeans model.
        scaler (sklearn.preprocessing.StandardScaler): Scaler used for preprocessing.

    Returns:
        str: Message containing the result of defect detection.
    """

    image = load_and_preprocess_image(dataset_paths, scaler)

    if image is not None:
        scaled_features = image[0]  # Extract the scaled features from the returned array
        scaled_features = scaled_features.reshape((1, -1))
        classification = kmeans_model.predict(scaled_features)

        labels = ['Defect Type 1', 'Defect Type 2', 'No Defect']

        return f"Image: {dataset_paths}, Defect: {labels[classification[0]]}"

    else:
        return f"Error: Image {dataset_paths} not loaded or is empty"
# Define the paths to your dataset images
dataset_paths = ['Picture1.jpg', 'Picture2.jpg', 'Picture3.jpg']  # Update with actual dataset paths

# Load and preprocess the dataset
dataset = load_and_preprocess_dataset(dataset_paths)
dataset = dataset.reshape(-1, 10000)
scaler.fit(dataset)

# Applying KMeans Clustering on preprocessed dataset
kmeans_model = apply_kmeans(dataset, 2)  # Assuming 2 clusters for defective and non-defective

# Define the paths to your test images
test_image_paths = ['Picture4.jpg','Picture2.jpg']  # Update with actual test image paths

# Test the defect detection on each test image
for path in test_image_paths:
    defect_message = detect_defect(path, kmeans_model, scaler)
    print(f"{path}: {defect_message}")


3 images successfully preprocessed.
Picture4.jpg: ['Image: Picture4.jpg, Defect: Defect Type 1']
Picture2.jpg: ['Image: Picture2.jpg, Defect: Defect Type 1']


  kmeans.fit(dataset)
