In [1]:

import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import string 
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras import layers
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score  

In [2]:
image_directory = 'train/'  # Replace with the actual image directory path
labels_file = 'train/labels.txt'  # Replace with the actual path to the labels file

In [3]:
# Function to check CAPTCHA strength
def check_captcha_strength(captcha_text, image):
    score = 0
    length = len(captcha_text)

    # Check Length
    if length >= 6:
        score += 2
    elif length >= 4:
        score += 1
    
    # Check Character Variety
    has_upper = any(c.isupper() for c in captcha_text)
    has_digit = any(c.isdigit() for c in captcha_text)
    has_hex = all(c in string.hexdigits for c in captcha_text)

    if has_upper:
        score += 1
    if has_digit:
        score += 1
    if has_hex:
        score += 1  

    # Check Image Complexity (Noise)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)  
    edge_density = np.sum(edges) / (gray.shape[0] * gray.shape[1])

    if edge_density > 0.1:
        score += 2  
    elif edge_density > 0.05:
        score += 1

    # Calculate Entropy
    char_frequencies = {char: captcha_text.count(char) for char in set(captcha_text)}
    entropy = -sum((freq/length) * math.log2(freq/length) for freq in char_frequencies.values())

    if entropy > 2.5:
        score += 2  
    elif entropy > 1.5:
        score += 1

    # Final Strength Rating
    if score >= 7:
        strength = "Strong"
    elif score >= 4:
        strength = "Moderate"
    else:
        strength = "Weak"

    return strength, score

In [4]:
# Step 1: Preprocessing
def extract_background_color(image):
    corners = [(0, 0), (0, image.shape[0]-1), (image.shape[1]-1, 0), (image.shape[1]-1, image.shape[0]-1)]
    corner_colors = [image[y, x] for x, y in corners]
    background_color = np.mean(corner_colors, axis=0)
    return background_color

def convert_to_hsv(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

# Step 2: Eliminate Obfuscating Lines
def remove_obfuscating_lines(image, background_color):
    mask = cv2.inRange(image, background_color, background_color)
    result = cv2.bitwise_not(image, image, mask=mask)
    return result

# Step 3: Image Segmentation
def segment_image(image):
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(grayscale, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    segments = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if w > 10 and h > 10:  # filter out small segments
            segment = image[y:y+h, x:x+w]
            segments.append(segment)

    return segments

In [5]:
image_size=(100,500)
with open(labels_file, 'r') as file:
  labels=[line.strip() for line in file]

In [6]:
images=[]
strength_scores = []

for i in range(2000):
    filename = str(i) + '.png'
    image_path = os.path.join(image_directory, filename)
    image = cv2.imread(image_path)
    
    if image is not None and image.size != 0:
        image = cv2.resize(image, image_size)
        background_color = extract_background_color(image)
        image_hsv = convert_to_hsv(image)
        image_without_lines = remove_obfuscating_lines(image_hsv, background_color)
        # Image Segmentation
        segments = segment_image(image_without_lines)
        images.append(image)

        # Check CAPTCHA Strength
        captcha_text = labels[i]  # Assuming labels[i] contains the actual CAPTCHA text
        strength, score = check_captcha_strength(captcha_text, image)
        strength_scores.append((captcha_text, strength, score))
        print(f"Image {i+1}: CAPTCHA Strength -> {strength} (Score: {score}/10)")

# Convert strength scores to DataFrame
df_strength = pd.DataFrame(strength_scores, columns=['captcha_text', 'strength', 'score'])


Image 1: CAPTCHA Strength -> Weak (Score: 3/10)
Image 2: CAPTCHA Strength -> Weak (Score: 3/10)
Image 3: CAPTCHA Strength -> Weak (Score: 3/10)
Image 4: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 5: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 6: CAPTCHA Strength -> Weak (Score: 3/10)
Image 7: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 8: CAPTCHA Strength -> Weak (Score: 3/10)
Image 9: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 10: CAPTCHA Strength -> Weak (Score: 3/10)
Image 11: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 12: CAPTCHA Strength -> Weak (Score: 3/10)
Image 13: CAPTCHA Strength -> Weak (Score: 3/10)
Image 14: CAPTCHA Strength -> Weak (Score: 3/10)
Image 15: CAPTCHA Strength -> Weak (Score: 3/10)
Image 16: CAPTCHA Strength -> Weak (Score: 3/10)
Image 17: CAPTCHA Strength -> Weak (Score: 3/10)
Image 18: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 19: CAPTCHA Strength -> Moderate (Score: 4/10)
Image 20: CAPTCHA Strength -> Weak (Score: 3/10)
I

In [7]:
parities = []
for label in labels:
    if label == 'ODD' or label == 'EVEN':
        parities.append(label)
    else:
        decimal_number = int(label, 16)
        if decimal_number % 2 == 0:
            parities.append('EVEN')
        else:
            parities.append('ODD')

In [8]:
df = pd.DataFrame({'image': images, 'parity': parities, 'strength': df_strength['strength'], 'score': df_strength['score']})
df = df.sample(frac=1).reset_index(drop=True)
X_train, X_val, y_train, y_val = train_test_split(df['image'], df['parity'], test_size=0.2, random_state=42)
X_train = np.array(X_train.tolist()) / 255.0
X_val = np.array(X_val.tolist()) / 255.0
label_dict = {'EVEN': 0, 'ODD': 1}
y_train_encoded = np.array([label_dict[label] for label in y_train])
y_val_encoded = np.array([label_dict[label] for label in y_val])


In [9]:
model = tf.keras.Sequential([
    layers.Input(shape=(100, 500, 3)),  # Explicit Input layer
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(2, activation='softmax')  # 2 classes: Even and Odd
])


In [10]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
X_train = np.transpose(X_train, (0, 2, 1, 3))
X_val = np.transpose(X_val, (0, 2, 1, 3))

In [None]:
history = model.fit(X_train, y_train_encoded, batch_size=64, epochs=10, validation_data=(X_val, y_val_encoded))

[1m15/25[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:09[0m 7s/step - accuracy: 0.6174 - loss: 0.6398

In [13]:
y_val_pred = model.predict(X_val)
y_val_pred = np.argmax(y_val_pred, axis=1)
val_accuracy = accuracy_score(y_val_encoded, y_val_pred)
val_precision = precision_score(y_val_encoded, y_val_pred, average='weighted')
val_recall = recall_score(y_val_encoded, y_val_pred, average='weighted')
val_f1_score = f1_score(y_val_encoded, y_val_pred, average='weighted')

print("Validation Metrics:")
print(f"Accuracy: {val_accuracy}")
print(f"Precision: {val_precision}")
print(f"Recall: {val_recall}")
print(f"F1 Score: {val_f1_score}")


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 382ms/step 
Validation Metrics:
Accuracy: 0.9775
Precision: 0.9775147559023608
Recall: 0.9775
F1 Score: 0.9775012662660473


In [None]:
print("CAPTCHA Strength Distribution:")
print(df['strength'].value_counts())

import seaborn as sns
import matplotlib.pyplot as plt

# Plot CAPTCHA Strength Distribution
plt.figure(figsize=(8,5))
sns.countplot(x=df['strength'], palette="coolwarm")
plt.title("CAPTCHA Strength Distribution")
plt.xlabel("Strength")
plt.ylabel("Count")
plt.show()
