In [1]:
import os
import pandas as pd

# Replace this with the path to your toothbrush dataset
root_dir = r"C:\Users\shubh\Downloads\ToothBrush\toothbrush\toothbrush"  

data = []

# Process 'train' folder (only non-defective images available here)
train_path = os.path.join(root_dir, 'train', 'good')
if os.path.exists(train_path):
    for img_name in os.listdir(train_path):
        img_path = os.path.join(train_path, img_name)
        data.append([img_path, 'train', 'non-defective', 'none'])

# Process 'test' folder (contains both 'good' and 'defective')
test_path = os.path.join(root_dir, 'test')
for defect_type in ['good', 'defective']:
    defect_path = os.path.join(test_path, defect_type)
    if os.path.exists(defect_path):
        label = 'non-defective' if defect_type == 'good' else 'defective'
        defect_subtype = 'none' if label == 'non-defective' else 'unknown'  # No specific defect type in test
        for img_name in os.listdir(defect_path):
            img_path = os.path.join(defect_path, img_name)
            data.append([img_path, 'test', label, defect_subtype])

# Process 'ground_truth' folder (only defective images available here)
ground_truth_path = os.path.join(root_dir, 'ground_truth')
if os.path.exists(ground_truth_path):
    for defect_type in os.listdir(ground_truth_path):
        defect_path = os.path.join(ground_truth_path, defect_type)
        for img_name in os.listdir(defect_path):
            img_path = os.path.join(defect_path, img_name)
            data.append([img_path, 'ground_truth', 'defective', defect_type])

# Convert data into a DataFrame and save to CSV
df = pd.DataFrame(data, columns=['image_path', 'dataset_type', 'label', 'defect_type'])
df.to_csv('toothbrush_labels.csv', index=False)

print("CSV file created successfully!")


CSV file created successfully!


In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

# Set paths
root_dir = r"C:\Users\shubh\Downloads\ToothBrush\toothbrush\toothbrush"  # Update with your dataset path
csv_file = "toothbrush_labels.csv"  # The CSV file created previously

# Load the CSV with image paths and labels
df = pd.read_csv(csv_file)

# Set up data generators
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    rotation_range=15
)

train_generator = datagen.flow_from_dataframe(
    df,
    x_col='image_path',
    y_col='label',
    target_size=(128, 128),  # Adjust based on model input size
    batch_size=32,
    class_mode='binary',
    subset='training'
)

validation_generator = datagen.flow_from_dataframe(
    df,
    x_col='image_path',
    y_col='label',
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

# Build the model
model = Sequential([
    ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary output
])

model.layers[0].trainable = False  # Freeze ResNet layers

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10
)

# Evaluate the model
val_images, val_labels = next(validation_generator)
predictions = model.predict(val_images)
predictions = (predictions > 0.5).astype(int)

print(classification_report(val_labels, predictions, target_names=['non-defective', 'defective']))


Found 106 validated image filenames belonging to 2 classes.
Found 26 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
               precision    recall  f1-score   support

non-defective       0.00      0.00      0.00         0
    defective       1.00      0.08      0.14        26

     accuracy                           0.08        26
    macro avg       0.50      0.04      0.07        26
 weighted avg       1.00      0.08      0.14        26



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
class_weights = {0: 1., 1: 2.}  # Example: Increase weight of the minority class
model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,
    class_weight=class_weights
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x246266dcfa0>

In [4]:
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    rotation_range=15,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    brightness_range=[0.8, 1.2]
)


In [5]:
for layer in model.layers[0].layers:
    layer.trainable = True


In [6]:
# Get the true class labels from validation set
val_images, val_labels = next(validation_generator)
predictions = model.predict(val_images)
predictions = (predictions > 0.5).astype(int)

# Generate confusion matrix
cm = confusion_matrix(val_labels, predictions)
print("Confusion Matrix:")
print(cm)


Confusion Matrix:
[[26]]




In [8]:
print(np.unique(predictions))


[1]


In [9]:
print(np.unique(val_labels))


[1.]


# Report: Toothbrush Defect Detection Model

## Objective
### The objective of this analysis is to develop a machine learning model that can classify toothbrush images into two categories: non-defective and defective. The model uses a ResNet50-based architecture with fine-tuning to detect defects in toothbrush images, leveraging image data from various folders.

## Methodology
### 1. Dataset Overview:

#### The dataset consists of images from three directories:
#### Train: Contains only 'good' (non-defective) toothbrush images.
#### Test: Contains both 'good' (non-defective) and 'defective' toothbrush images.
#### Ground_truth: Contains only defective toothbrush images, used for model validation.
#### The images were labeled as non-defective or defective based on their respective directories.
### 2. Data Preprocessing:

#### Image Augmentation: Applied to the images to increase the dataset's size and variability, which helps improve generalization:
#### Horizontal flip
#### Random rotations (up to 15 degrees)
#### Rescaling: Pixel values of the images were rescaled to the range [0, 1] by dividing by 255.


### 3. Model Architecture:

#### Base Model: Used the ResNet50 pre-trained on ImageNet as the backbone (without the top classification layer). This allows the model to leverage knowledge from a vast dataset, providing strong feature extraction capabilities.
#### Added Layers:
#### Flattened the output of ResNet50.
#### Added a dense layer with 128 neurons and ReLU activation.
#### Applied a Dropout layer (rate 0.5) to reduce overfitting.
#### Final output layer: A single neuron with a sigmoid activation function to predict a binary outcome (non-defective vs defective).
#### Training Configuration:
#### Optimizer: Adam with a learning rate of 0.0001.
#### Loss function: Binary cross-entropy, appropriate for binary classification tasks.
#### Metrics: Accuracy.

### 4. Model Training:

#### The model was trained for 10 epochs using a batch size of 32. A validation split of 20% was used to evaluate the model during training.

## Results :

### 1. Training and Validation Accuracy:

#### The training process showed fluctuations in accuracy, with some improvement in later epochs, but it failed to generalize well to the validation set.
#### The validation accuracy fluctuated significantly, reaching 100% accuracy for some epochs, but later falling to 0% in others. This indicates that the model may have overfitted on the training data or failed to generalize due to data issues.
### 2. Classification Report: The classification report generated for the validation set showed:

#### Precision: The model showed 100% precision for the "defective" class, meaning that when it predicted "defective," it was always correct. However, it failed to predict any "non-defective" images.
#### Recall: The recall for the "defective" class was very low (0.08), which suggests that the model had difficulty identifying all defective images. Recall for the "non-defective" class was 0% due to the absence of non-defective predictions.
#### F1-Score: This was also very low, particularly for the non-defective class, indicating poor balance between precision and recall for this class.
#### Accuracy: The overall accuracy of the model on the validation set was 8%, suggesting that the model's predictions were predominantly inaccurate.

### 3. Confusion Matrix: 
#### The confusion matrix output:

#### Indicates that the model predicted all images as "defective," with no "non-defective" predictions. This strongly suggests that the model is biased towards predicting the "defective" class, possibly due to an imbalance in the dataset or an issue with the model’s training procedure.

### 4. Predictions:

#### The predictions showed a bias toward the "defective" class (all predictions were 1).
#### The model’s failure to predict any "non-defective" images (all values in the predictions were 1) indicates that either the model has learned only to identify defective images or there is an issue with the label assignment or data representation.

## Conclusion:

#### The model has shown limited success in predicting the "defective" class but is highly biased and does not predict the "non-defective" class. The next steps include addressing class imbalance, revisiting the model architecture, and evaluating the dataset's distribution to improve classification accuracy.