<a href="https://colab.research.google.com/github/srita2003/Aadhaar_Card_Anomaly_Detection_Model/blob/main/AdhaarCard_Detectionipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install pytesseract opencv-python-headless
!pip install easyocr
!pip install face_recognition
!pip install scikit-learn

In [None]:
import io
import re
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
import easyocr
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from google.colab import files
from skimage.metrics import structural_similarity as ssim
import face_recognition
import os
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc,classification_report, confusion_matrix
import pandas as pd

In [None]:
dataset_path = "/content/drive/MyDrive/adhaar"

img_height, img_width = 256, 256
batch_size = 32
epochs = 100

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest',
    rescale=1./255,
    validation_split=0.2
)


train_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)


model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])


model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()


history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator
)

model.save("aadhar_fraud_detection_model.keras")

In [None]:
test_dataset_path = "/content/drive/MyDrive/test"

test_datagen = ImageDataGenerator(
    rescale=1./255
)

test_generator = test_datagen.flow_from_directory(
    test_dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)


model = tf.keras.models.load_model("aadhar_fraud_detection_model.keras")

test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")

test_predictions = model.predict(test_generator)
test_predictions = (test_predictions > 0.5).astype(int)

true_labels = test_generator.classes

report = classification_report(true_labels, test_predictions, target_names=['Real', 'Fake'], output_dict=True)

cm = confusion_matrix(true_labels, test_predictions)
cm_df = pd.DataFrame(cm, index=['Real', 'Fake'], columns=['Real', 'Fake'])

print("Confusion Matrix:")
print(cm_df)


In [None]:
uploaded = files.upload()
filename = next(iter(uploaded.keys()))
print(f'Uploaded file: {filename}')

image = Image.open(filename)

In [None]:
image = Image.open(io.BytesIO(uploaded[filename]))

image_np = np.array(image)

reader = easyocr.Reader(['en', 'hi'])


In [None]:
results = reader.readtext(image_np)

for result in results:
    print(result[1])

image_rgb = image.convert('RGB')
plt.imshow(image_rgb)
plt.axis('off')
plt.show()

In [None]:
def detect_anomalies(text):
    anomalies = []
    name_pattern = r'(?:[^\n]+(?:\n+)?){0,2}[A-Za-zअ-ह]+\s[A-Za-zअ-ह]+'
    dob_pattern = r'\s*\d{2}[-/.]\d{2}[-/.]\d{4}'
    gender_pattern = r'\b(?:महिला|पुरुष|FEMALE|MALE|Female|Male)\b'
    aadhaar_pattern = r'\b\d{4}\s?\d{4}\s?\d{4}\b'

    if not re.search(name_pattern, text):
        anomalies.append("Missing or incorrect name format")

    if not re.search(dob_pattern, text):
        anomalies.append("Missing or incorrect date of birth format")

    if not re.search(gender_pattern, text):
        anomalies.append("Missing or incorrect gender format")

    if not re.search(aadhaar_pattern, text):
        anomalies.append("Missing or incorrect Aadhaar number format")

    return anomalies

In [None]:
model = tf.keras.models.load_model("aadhar_fraud_detection_model.keras")

In [None]:
def check_user_data(text):
    user_name = input("Enter the full name on the Aadhaar card: ").strip()
    user_dob = input("Enter the Date of Birth (DD/MM/YYYY): ").strip()
    user_gender = input("Enter the gender (MALE/FEMALE): ").strip()
    user_aadhaar = input("Enter the 12-digit Aadhaar number (XXXX XXXX XXXX): ").strip()

    print("\n")

    normalized_text = re.sub(r'\s+', ' ', text).strip().lower()
    user_name_normalized = re.sub(r'\s+', ' ', user_name).strip().lower()
    user_dob_normalized = re.sub(r'\s+', ' ', user_dob).strip().lower()
    user_gender_normalized = re.sub(r'\s+', ' ', user_gender).strip().lower()
    user_aadhaar_normalized = re.sub(r'\s+', ' ', user_aadhaar).strip().lower()

    user_matches = True
    if user_name_normalized not in normalized_text:
        print("Entered name does not match Aadhaar card details.")
        user_matches = False
    if user_dob_normalized not in normalized_text:
        print("Entered Date of Birth does not match Aadhaar card details.")
        user_matches = False
    if user_gender_normalized not in normalized_text:
        print("Entered gender does not match Aadhaar card details.")
        user_matches = False
    if user_aadhaar_normalized not in normalized_text:
        print("Entered Aadhaar number does not match Aadhaar card details.")
        user_matches = False

    if user_matches:
        print("User data matches the Aadhaar card details.")
    return user_matches

In [None]:
def detect_faces_in_image(filename):

    image = Image.open(filename)
    image = image.convert('RGB')

    image_np = np.array(image)
    image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    for (x, y, w, h) in faces:
        cv2.rectangle(image_cv, (x, y), (x+w, y+h), (255, 0, 0), 2)

    image_with_faces = Image.fromarray(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB))
    return image_with_faces, faces

def save_detected_faces(filename, output_folder):

    image_with_faces, faces = detect_faces_in_image(filename)

    image = Image.open(filename)
    image = image.convert('RGB')
    image_np = np.array(image)
    image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, (x, y, w, h) in enumerate(faces):
        face_image = image_cv[y:y+h, x:x+w]

        face_image_pil = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))

        face_image_pil.save(os.path.join(output_folder, f"face_{i+1}.jpg"))


In [None]:
def preprocess_image_for_face_recognition(image_path):

    image = cv2.imread(image_path)

    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    return image_rgb

def get_face_encoding(image_rgb):

    face_encodings = face_recognition.face_encodings(image_rgb)
    if len(face_encodings) == 0:
        raise ValueError("No faces found in the image.")
    return face_encodings[0]

def compare_photos():
    """
    Compares an Aadhaar photo with a user-uploaded photo.
    """
    aadhaar_photo_filename = '/content/output_folder/face_1.jpg'
    aadhaar_photo_rgb = preprocess_image_for_face_recognition(aadhaar_photo_filename)

    aadhaar_face_encoding = get_face_encoding(aadhaar_photo_rgb)

    print("Please upload a photo of yourself for comparison.")
    uploaded_photo = files.upload()
    user_photo_filename = next(iter(uploaded_photo.keys()))

    user_photo_rgb = preprocess_image_for_face_recognition(user_photo_filename)

    user_face_encoding = get_face_encoding(user_photo_rgb)

    results = face_recognition.compare_faces([aadhaar_face_encoding], user_face_encoding)
    if results[0]:
        print("The user-uploaded photo matches the Aadhaar card photo.")
    else:
        print("The user-uploaded photo does not match the Aadhaar card photo.")

In [None]:
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    image = cv2.resize(image, (256, 256))
    image = image / 255.0
    image = np.reshape(image, (1, 256, 256, 3))
    return image

test_image_path = filename

test_image = preprocess_image(test_image_path)

if 'results' not in globals():
    raise ValueError("OCR results are not defined.")

text = " ".join(result[1] for result in results)


anomalies = detect_anomalies(text)

print("\nDetected Anomalies:")
for anomaly in anomalies:
    print(f"- {anomaly}")

prediction = model.predict(test_image)
print(f"\nModel Prediction: {prediction}")

if prediction >= 0.5:
    print("The Aadhaar card is likely to be FAKE.")
else:
    print("The Aadhaar card is likely to be REAL.")
    print("\n")
    if not anomalies:
        print("No anomalies detected. Proceeding with user data check.")
        check_user_data(text)
    else:
        print("Anomalies detected in the Aadhaar card data:")
        for anomaly in anomalies:
            print(f"- {anomaly}")

print("Proceed with photo verification.")

output_folder = '/content/output_folder'

detect_faces_in_image(filename)
save_detected_faces(filename, output_folder)
compare_photos()