In [1]:
import numpy as np
import cv2
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, 
                                     Reshape, Activation, Dropout)
from sklearn.model_selection import train_test_split

# -------------------------------
# 1. Define Character Set and Parameters
# -------------------------------
# Allowed characters: A-Z, a-z, 0-9 (62 total)
char_set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
num_classes = len(char_set)
char_to_index = {char: idx for idx, char in enumerate(char_set)}
index_to_char = {idx: char for char, idx in char_to_index.items()}

# Expected CAPTCHA length (modify as needed)
max_length = 5  

# -------------------------------
# 2. Data Loading and Preprocessing
# -------------------------------
def load_data(data_dir):
    X, y = [], []
    for file in os.listdir(data_dir):
        if file.lower().endswith((".jpg", ".png", ".jpeg")):
            img_path = os.path.join(data_dir, file)
            # Read in grayscale and resize to 100x50 (width x height)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (100, 50))
            X.append(img)
            # Use the source file name (without extension) as the label
            y.append(os.path.splitext(file)[0])
    return np.array(X), np.array(y)

def encode_labels(labels, max_length):
    """
    One-hot encode labels into shape (samples, max_length, num_classes)
    """
    encoded = np.zeros((len(labels), max_length, num_classes))
    for i, label in enumerate(labels):
        # For each character in the label, set the corresponding index to 1.
        for j, char in enumerate(label):
            encoded[i, j, char_to_index[char]] = 1
    return encoded

# Set your dataset folder path here
data_dir = r"D:\Dataset\samples"  
X, y = load_data(data_dir)

# Normalize images to the range [0, 1] and add a channel dimension.
X = X.astype(np.float32) / 255.0
X = X.reshape(-1, 50, 100, 1)

# One-hot encode the labels
y_encoded = encode_labels(y, max_length)

# Split data into training and test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# -------------------------------
# 3. Build an Improved CNN Model
# -------------------------------
# Note: Instead of applying softmax in the Dense layer (which would compute a single
# softmax over all outputs), we use a Dense layer with linear activation, reshape, 
# and then apply an Activation('softmax') which works independently along the last axis.
model = Sequential([
    # Convolutional layers
    Conv2D(32, (3, 3), activation="relu", input_shape=(50, 100, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(256, activation="relu"),
    Dropout(0.5),
    # The Dense layer outputs max_length*num_classes values.
    Dense(max_length * num_classes, activation="linear"),
    # Reshape the flat output into (max_length, num_classes)
    Reshape((max_length, num_classes)),
    # Apply softmax along the last dimension so that each character position
    # gets its own probability distribution over the 62 classes.
    Activation("softmax")
])

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

# -------------------------------
# 4. Train and Save the Model
# -------------------------------
# You can experiment with a higher number of epochs if needed.
model.fit(X_train, y_train, epochs=50, batch_size=40, validation_data=(X_test, y_test))
model.save("captcha_model_improved.keras")
print("Training completed and model saved.")

# -------------------------------
# 5. Load the Model and Define Prediction Function
# -------------------------------
# Load the saved model
model = load_model("captcha_model_improved.keras")
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

def predict_captcha(image_path):
    # Load and preprocess the image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (100, 50))
    img = img.astype(np.float32) / 255.0
    img = img.reshape(1, 50, 100, 1)
    # Get predictions from the model
    predictions = model.predict(img)
    # For each character position, choose the class with the highest probability
    predicted_text = "".join(index_to_char[np.argmax(prob)] for prob in predictions[0])
    return predicted_text

# -------------------------------
# 6. Test the Prediction Function
# -------------------------------
# Replace the following path with your test CAPTCHA image source (downloaded from a source URL, for example)
captcha_image = r"D:\Dataset sample\2b827.png"
result = predict_captcha(captcha_image)
print("Predicted CAPTCHA:", result)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.0532 - loss: 3.8235 - val_accuracy: 0.1140 - val_loss: 3.0792
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 79ms/step - accuracy: 0.0702 - loss: 3.1363 - val_accuracy: 0.1570 - val_loss: 2.8497
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 80ms/step - accuracy: 0.1363 - loss: 2.8598 - val_accuracy: 0.2682 - val_loss: 2.5567
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 83ms/step - accuracy: 0.2472 - loss: 2.4679 - val_accuracy: 0.4299 - val_loss: 2.1405
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 83ms/step - accuracy: 0.3708 - loss: 2.0817 - val_accuracy: 0.5383 - val_loss: 1.8096
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 78ms/step - accuracy: 0.4629 - loss: 1.7609 - val_accuracy: 0.6159 - val_loss: 1.4947
Epoch 7/50
[1m22/22[0m [32m━━━━