In [22]:
import cv2
import numpy as np
import pytesseract
import pandas as pd
from PIL import Image
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def extract_aadhar_data(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
    text1 = pytesseract.image_to_data(threshed, output_type='data.frame', lang="hin+eng")
    text2 = pytesseract.image_to_string(threshed, lang="hin+eng")
    text = text1[text1.conf != -1]
    lines = text.groupby('block_num')['text'].apply(list)
    conf = text.groupby(['block_num'])['conf'].mean()
    return text

image_path = 'C:\\Users\\soumy\\Desktop\\cnn\\new_generated_aadharcard_images\\1front_contrast_adjusted.jpg'
aadhar_data = extract_aadhar_data(image_path)

def create_improved_cnn_model():
    model = Sequential()
    model.add(Input(shape=(64, 64, 3)))
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.005)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.4))
    
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.005)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.4))
    
    # model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.005)))
    # model.add(BatchNormalization())
    # model.add(MaxPooling2D(pool_size=(2, 2)))
    # model.add(Dropout(0.5))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.005)))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00025), loss='binary_crossentropy', metrics=['accuracy'])
    return model

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3
)
test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory(
    'dataset_1/training_set',
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)
test_set = test_datagen.flow_from_directory(
    'dataset_1/test_set',
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)

improved_model = create_improved_cnn_model()
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.000005)
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, mode='min')

improved_model.fit(
    training_set,
    steps_per_epoch=120,
    epochs=15,
    validation_data=test_set,
    validation_steps=40,
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

improved_model.save('improved_aadhar_cnn_model.keras')



Found 3425 images belonging to 2 classes.
Found 1156 images belonging to 2 classes.
Epoch 1/15


  self._warn_if_super_not_called()


[1m108/120[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m3s[0m 282ms/step - accuracy: 0.9199 - loss: 1.7830

  self.gen.throw(typ, value, traceback)


[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 314ms/step - accuracy: 0.9229 - loss: 1.7660 - val_accuracy: 0.7578 - val_loss: 1.7880 - learning_rate: 2.5000e-04
Epoch 2/15
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 293ms/step - accuracy: 0.9645 - loss: 1.3388 - val_accuracy: 0.7933 - val_loss: 1.9031 - learning_rate: 2.5000e-04
Epoch 3/15
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 312ms/step - accuracy: 0.9704 - loss: 1.1481 - val_accuracy: 0.7958 - val_loss: 2.0872 - learning_rate: 2.5000e-04
Epoch 4/15
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 310ms/step - accuracy: 0.9700 - loss: 1.0137 - val_accuracy: 0.7811 - val_loss: 1.8884 - learning_rate: 2.5000e-04
Epoch 5/15
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 303ms/step - accuracy: 0.9765 - loss: 0.9010 - val_accuracy: 0.7682 - val_loss: 1.8721 - learning_rate: 1.2500e-04
Epoch 6/15
[1m120/120[0m [32m━━━━━━━━━━━━━━

In [23]:
loaded_model = tf.keras.models.load_model('improved_aadhar_cnn_model.keras')

evaluation = loaded_model.evaluate(test_set, steps=120)
print(f"Test Loss: {evaluation[0]}")
print(f"Test Accuracy: {evaluation[1]}")

sample_image_path = 'C:\\Users\\soumy\\Desktop\\cnn\\new_generated_aadharcard_images\\2front_blurred.jpg'
sample_image = Image.open(sample_image_path)
sample_image = sample_image.resize((64, 64))
sample_image = np.array(sample_image) / 255.0
sample_image = np.expand_dims(sample_image, axis=0)

prediction = loaded_model.predict(sample_image)
print(f"Prediction: {'Real' if prediction[0][0] > 0.5 else 'Fake'}")


[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 59ms/step - accuracy: 0.7773 - loss: 1.6794  
Test Loss: 1.7031785249710083
Test Accuracy: 0.7733563780784607
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
Prediction: Real
