In [1]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

2025-11-29 14:11:01.933884: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-29 14:11:01.950666: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-29 14:11:01.969989: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8473] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-29 14:11:01.976364: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1471] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-29 14:11:01.991762: I tensorflow/core/platform/cpu_feature_guar

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ravirajsinh45/real-life-industrial-dataset-of-casting-product")

# print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/ravirajsinh45/real-life-industrial-dataset-of-casting-product?dataset_version_number=2...


100%|██████████| 100M/100M [00:09<00:00, 11.6MB/s] 

Extracting files...





In [3]:
dataset = path + r'/casting_data/casting_data/train'
testset = path + r'/casting_data/casting_data/test'

In [4]:
# Instead of trying to save the model, create a class that takes inputs from the user and provides outputs

class DefectDetector:
    IMG_SIZE = 244
    def __init__(self, train_path):
        '''
        Given a training image, create a model that will predict 
        defects. Training data is expected to contain two folders,
        one for defect and the other for no defect
        '''
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=20,
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True,
            validation_split=0.1 
        )

        train_data = train_datagen.flow_from_directory(
            train_path,
            target_size=(self.IMG_SIZE, self.IMG_SIZE), 
            batch_size=1,
            class_mode='sparse',
            subset='training',
            shuffle=False
        )

        num_classes = len(train_data.class_indices)
        self.class_indices = train_data.class_indices

        self._create_model(num_classes)
        self._train_model(train_data)
        

    # Preprocessing
    def preprocess_defect_image(self, path):
        # 1. Load image
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        if img is None:
            raise ValueError(f"Cannot load image: {path}")
    
        # 2. Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
        # Skip in the interest of simplifying
        # 3. Apply CLAHE (contrast enhancement)
        # clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
        # enhanced = clahe.apply(gray)
    
        # 4. Sharpen to highlight cracks/dents
        kernel = np.array([
            [0, -1,  0],
            [-1, 5, -1],
            [0, -1,  0]
        ])
        sharpened = cv2.filter2D(gray, -1, kernel)
    
        # 5. Convert back to 3-channel BGR (CNNs expect 3 channels)
        output = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
    
        return output

    def extract_embeddings_from_generator(self, generator):
        X = []
        y = []
    
        file_paths = generator.filepaths
        labels = generator.classes
    
        for idx, path in enumerate(file_paths):
            # custom preprocessing
            processed = self.preprocess_defect_image(path)
            processed = self.preprocess_for_model(processed)
    
            X.append(processed)
            y.append(labels[idx])
    
        X = np.array(X)
        y = np.array(y)
    
        # extract MobileNet embeddings
        embeddings = self.feature_extractor.predict(X, batch_size=32)
        return embeddings, y

    # Preprocess, resize, normailze
    def preprocess_for_model(self, img):
        """Takes a BGR OpenCV image and returns MobileNetV2-ready array."""
        img = cv2.resize(img, (224, 224))
        img = tf.keras.applications.mobilenet_v2.preprocess_input(img.astype(np.float32))
        return img
    
    def _create_model(self, num_classes):
        self.feature_extractor = MobileNetV2(
            input_shape=(224, 224, 3),
            pooling='avg',
            include_top=False,
            weights='imagenet'
        )
        self.feature_extractor.trainable = False  # freeze for fast training

        self.classifier = models.Sequential([
            layers.Input(shape=(1280,)),
            layers.Dense(256, activation="relu"),
            layers.Dropout(0.3),
            layers.Dense(num_classes, activation="softmax")
        ])
        self.classifier.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"]
        )

    def _train_model(self, train_data):
        train_embeddings, train_labels = self.extract_embeddings_from_generator(train_data)

        num_classes = len(train_data.class_indices)

        self.classifier.fit(
            train_embeddings, train_labels,
            batch_size=32,
            epochs=10,
            validation_split=0.1
        )
        
    def evaluate(self, test_path):
        test_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=20,
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True
        )
        
        test_data = test_datagen.flow_from_directory(
            test_path,
            target_size=(self.IMG_SIZE, self.IMG_SIZE),
            batch_size=1,
            class_mode='sparse',
            shuffle=False
        )
        
        test_embeddings, test_labels = self.extract_embeddings_from_generator(test_data)
        loss, acc = self.classifier.evaluate(test_embeddings, test_labels)
        print("Test accuracy:", acc)

    def predict(self, image_path):
        '''
        predict and return predicted class and confidence
        '''
        processed = self.preprocess_defect_image(image_path)
        processed = self.preprocess_for_model(processed)
        processed = np.expand_dims(processed, axis=0)
        embedding = self.feature_extractor.predict(processed, verbose=0)
        pred_probs = self.classifier.predict(embedding, verbose=0)
        
        idx_to_class = {v: k for k, v in self.class_indices.items()}
        class_idx = np.argmax(pred_probs)
        pred_class = idx_to_class[class_idx]
        
        return pred_class, pred_probs[0][class_idx]
                

In [5]:
%%time
detector = DefectDetector(dataset)

Found 5971 images belonging to 2 classes.


2025-11-29 14:11:29.272517: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 2min 56s, sys: 11 s, total: 3min 7s
Wall time: 51.1 s


In [6]:
detector.evaluate(testset)

Found 715 images belonging to 2 classes.
Test accuracy: 0.9944055676460266


In [7]:
# Testing prediction
def list_files(path):
    return [
        os.path.join(path, f)
        for f in os.listdir(path)
        if os.path.isfile(os.path.join(path, f))
    ]

defect = path + r'/casting_data/casting_data/test/def_front'
ok = path + r'/casting_data/casting_data/test/ok_front'

defective = list_files(defect)
fine = list_files(ok)

In [8]:
fail_def = 0
total_def = len(defective)

for img in defective:
    pred_class, conf = detector.predict(img)
    if pred_class != "def_front":
        fail_def += 1

fail_def / total_def

0.004415011037527594

In [9]:
fail_ok = 0
total_ok = len(fine)

for img in fine:
    pred_class, conf = detector.predict(img)
    if pred_class != "ok_front":
        fail_ok += 1

fail_ok / total_ok

0.007633587786259542

In [10]:
%%time
detector.predict(defective[9])

CPU times: user 133 ms, sys: 9.66 ms, total: 143 ms
Wall time: 113 ms


('def_front', 0.999987)

In [11]:
defect = path + r'/casting_data/casting_data/train/def_front'
ok = path + r'/casting_data/casting_data/train/ok_front'

defective = list_files(defect)
fine = list_files(ok)
len(defective) + len(fine)

6633

## Notes

- The model uses ImageNet to extract features from training images and uses a much simpler classifier model in order to differentiate between defect or no defect.
- The model takes no more than 5 minutes to train and a couple of seconds to predict.
- Provided a good amount of training images, the model achieves a high accuracy (above 90%). In the above training, there are 6,633 labeled images for training. Obviously less training images leads to less accuracy. This drop however is somewhat reasonable, with the test accuracy with 40 images (20 defect + 20 ok) being above 70% in a majority of cases. The model variance seems to be pretty high with the accuracy going from ~63% to ~84%. If customers continue to label images, we can feed this back in to the model and improve the accuracy. 