# Libraries

In [None]:
!pip install transformers_stream_generator
!pip install transformers
!pip install sentencepiece
!pip install gradio
!pip install pdf2image
!pip install pytesseract
!pip install deepface
# It's highly recommended to use `[decord]` feature for faster video loading.
!pip install qwen-vl-utils[decord]

Collecting transformers_stream_generator
  Downloading transformers-stream-generator-0.0.5.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: transformers_stream_generator
  Building wheel for transformers_stream_generator (setup.py) ... [?25l[?25hdone
  Created wheel for transformers_stream_generator: filename=transformers_stream_generator-0.0.5-py3-none-any.whl size=12425 sha256=57d20e8d6ea3f5c3640d6006cdb3bf749ffb859c20b41d6c153def9b1aaa5cd3
  Stored in directory: /root/.cache/pip/wheels/23/e8/f0/b3c58c12d1ffe60bcc8c7d121115f26b2c1878653edfca48db
Successfully built transformers_stream_generator
Installing collected packages: transformers_stream_generator
Successfully installed transformers_stream_generator-0.0.5
Collecting gradio
  Downloading gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=

# Version 1

In [None]:
import gradio as gr
import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
from pdf2image import convert_from_path
import re
from PIL import Image
import os

# Load QWEN 2.5 VL Model & Processor
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

def extract_text_from_image(image):
    """Extract text using QWEN 2.5 VL."""
    image_path = "temp_image.png"
    image.save(image_path)

    messages = [
        {"role": "user", "content": [
            {"type": "image", "image": image_path},
            {"type": "text", "text": "Extract text from this image."}
        ]}
    ]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt").to(device)

    generated_ids = model.generate(**inputs, max_new_tokens=512)
    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
    response = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

    os.remove(image_path)  # Clean up temp file
    return response


def extract_images_from_pdf(pdf_path):
    """Convert PDF to images."""
    images = convert_from_path(pdf_path)
    print(images)
    return images


def validate_pan_aadhaar(text):
    """Extract PAN and Aadhaar from text."""
    pan_pattern = r"[A-Z]{5}[0-9]{4}[A-Z]{1}"
    aadhaar_pattern = r"\b\d{4}\s?\d{4}\s?\d{4}\b"

    pan_match = re.findall(pan_pattern, text)
    aadhaar_match = re.findall(aadhaar_pattern, text)

    return {
        "PAN Found": pan_match if pan_match else "Not Found",
        "Aadhaar Found": aadhaar_match if aadhaar_match else "Not Found"
    }


def process_file(file):
    """Handle uploaded file (Image or PDF)."""
    if file.name.endswith(".pdf"):
        images = extract_images_from_pdf(file.name)
        if images:
            extracted_text = extract_text_from_image(images[0])
            validation_results = validate_pan_aadhaar(extracted_text)
            return images[0], extracted_text, validation_results
    else:
        image = Image.open(file.name)
        extracted_text = extract_text_from_image(image)
        validation_results = validate_pan_aadhaar(extracted_text)
        return image, extracted_text, validation_results

    return None, "No valid data extracted.", {}

# Gradio UI
demo = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload PDF or Image"),
    outputs=[
        gr.Image(label="Extracted Image"),
        gr.Textbox(label="Extracted Text"),
        gr.JSON(label="PAN/Aadhaar Validation")
    ],
    title="KYC Verification System",
    description="Upload a PAN/Aadhaar document (PDF or Image), extract text using QWEN 2.5 VL, and validate PAN/Aadhaar details."
)

demo.launch(share=True)


config.json:   0%|          | 0.00/1.37k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/57.6k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/5 [00:00<?, ?it/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00004-of-00005.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/1.09G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/216 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://310602df65658a5f67.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
from pdf2image import convert_from_path
import re
from PIL import Image
import os
import cv2
from deepface import DeepFace
import logging
from utils import file_exists, read_yaml

# Load QWEN 2.5 VL Model & Processor
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

# Logging setup
logging_str = "[%(asctime)s: %(levelname)s: %(module)s]: %(message)s"
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(filename=os.path.join(log_dir, "ekyc_logs.log"), level=logging.INFO, format=logging_str, filemode="a")

config_path = "config.yaml"
config = read_yaml(config_path)

artifacts = config['artifacts']
cascade_path = artifacts['HAARCASCADE_PATH']
output_path = artifacts['INTERMIDEIATE_DIR']

def detect_and_extract_face(img):
    logging.info("Extracting face using contours...")

    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray_img, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    max_area = 0
    largest_contour = None

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h

        if 0.5 < w/h < 2.0 and area > max_area:  # Ensuring aspect ratio of a face
            max_area = area
            largest_contour = (x, y, w, h)

    if largest_contour:
        x, y, w, h = largest_contour
        new_w, new_h = int(w * 1.50), int(h * 1.50)
        new_x, new_y = max(0, x - (new_w - w) // 2), max(0, y - (new_h - h) // 2)

        extracted_face = img[new_y:new_y + new_h, new_x:new_x + new_w]
        filename = os.path.join(os.getcwd(), output_path, "extracted_face.jpg")

        if os.path.exists(filename):
            os.remove(filename)

        cv2.imwrite(filename, extracted_face)
        logging.info(f"Extracted face saved at: {filename}")
        return filename
    else:
        logging.warning("No face detected using contours")
        return None

def extract_text_from_image(image):
    """Extract text using QWEN 2.5 VL."""
    image_path = "temp_image.png"
    image.save(image_path)

    messages = [
        {"role": "user", "content": [
            {"type": "image", "image": image_path},
            {"type": "text", "text": "Extract text from this image."}
        ]}
    ]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt").to(device)

    generated_ids = model.generate(**inputs, max_new_tokens=512)
    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
    response = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

    os.remove(image_path)
    return response

def extract_images_from_pdf(pdf_path):
    """Convert PDF to images."""
    return convert_from_path(pdf_path)

def validate_pan_aadhaar(text):
    """Extract PAN and Aadhaar from text."""
    pan_pattern = r"[A-Z]{5}[0-9]{4}[A-Z]{1}"
    aadhaar_pattern = r"\b\d{4}\s?\d{4}\s?\d{4}\b"

    pan_match = re.findall(pan_pattern, text)
    aadhaar_match = re.findall(aadhaar_pattern, text)

    return {"PAN Found": pan_match if pan_match else "Not Found", "Aadhaar Found": aadhaar_match if aadhaar_match else "Not Found"}

def process_file(file):
    """Handle uploaded file (Image or PDF)."""
    if file.name.endswith(".pdf"):
        images = extract_images_from_pdf(file.name)
        if images:
            extracted_text = extract_text_from_image(images[0])
            validation_results = validate_pan_aadhaar(extracted_text)
            return images[0], extracted_text, validation_results
    else:
        image = Image.open(file.name)
        extracted_text = extract_text_from_image(image)
        validation_results = validate_pan_aadhaar(extracted_text)
        face_path = detect_and_extract_face(cv2.imread(file.name))
        return image, extracted_text, validation_results, face_path

    return None, "No valid data extracted.", {}, None

demo = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload PDF or Image"),
    outputs=[
        gr.Image(label="Extracted Image"),
        gr.Textbox(label="Extracted Text"),
        gr.JSON(label="PAN/Aadhaar Validation"),
        gr.Textbox(label="Extracted Face Path")
    ],
    title="KYC Verification System",
    description="Upload a PAN/Aadhaar document (PDF or Image), extract text using QWEN 2.5 VL, validate PAN/Aadhaar details, and detect faces."
)

demo.launch(share=True)


25-03-24 16:26:16 - Directory /root/.deepface has been created
25-03-24 16:26:16 - Directory /root/.deepface/weights has been created


ModuleNotFoundError: No module named 'utils'

# Version 2

In [None]:
# prompt: poppler install

!apt-get install poppler-utils


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 29 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.6 [186 kB]
Fetched 186 kB in 1s (243 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 126209 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.6_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.6) ...
Setting up poppler-utils (22.02.0-2ubuntu0.6) ...
Processing triggers for man-db (2.10.2-1) ...


In [None]:
import gradio as gr
import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from pdf2image import convert_from_path
import re
from PIL import Image
import os
import cv2
import numpy as np
from deepface import DeepFace
import logging
from utils import file_exists, read_yaml

# Load Model & Processor
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

# Logging setup
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(filename=os.path.join(log_dir, "ekyc_logs.log"), level=logging.INFO, format="[%(asctime)s: %(levelname)s]: %(message)s", filemode="a")

config_path = "config.yaml"
config = read_yaml(config_path)

artifacts = config['artifacts']
output_path = artifacts['INTERMIDEIATE_DIR']

# Enhance Aadhaar Image Clarity
def enhance_image(img):
    logging.info("Enhancing image clarity...")
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Apply adaptive histogram equalization
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced_img = clahe.apply(gray)

    # Apply sharpening filter
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharpened = cv2.filter2D(enhanced_img, -1, kernel)

    return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)

# Face Extraction Using Contours
def detect_and_extract_face(img):
    logging.info("Extracting face using contours...")

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    max_area = 0
    largest_contour = None

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h
        if 0.5 < w/h < 2.0 and area > max_area:  # Ensuring aspect ratio of a face
            max_area = area
            largest_contour = (x, y, w, h)

    if largest_contour:
        x, y, w, h = largest_contour
        extracted_face = img[y:y+h, x:x+w]
        filename = os.path.join(output_path, "extracted_face.jpg")

        if os.path.exists(filename):
            os.remove(filename)

        cv2.imwrite(filename, extracted_face)
        logging.info(f"Extracted face saved at: {filename}")
        return filename
    else:
        logging.warning("No face detected using contours")
        return None

# Face Verification
def deepface_face_comparison(image1_path, image2_path):
    logging.info("Verifying faces...")
    if not (file_exists(image1_path) and file_exists(image2_path)):
        logging.warning("One or both image paths do not exist")
        return False
    verification = DeepFace.verify(img1_path=image1_path, img2_path=image2_path)
    return verification.get('verified', False)

# Extract Text from Image
def extract_text_from_image(image):
    image_path = "temp_image.png"
    image.save(image_path)  # Save uploaded image temporarily

    # Load the image as a PIL Image
    img = Image.open(image_path).convert("RGB")  # Ensure correct format

    messages = [{"role": "user", "content": [{"type": "image", "image": img}, {"type": "text", "text": "Extract text from this image."}]}]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(text=[text], images=[img], padding=True, return_tensors="pt").to(device)

    generated_ids = model.generate(**inputs, max_new_tokens=512)
    response = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

    os.remove(image_path)  # Clean up temporary file
    return response


# Extract Images from PDF
def extract_images_from_pdf(pdf_path):
    return convert_from_path(pdf_path)

# Aadhaar & PAN Validation
def validate_pan_aadhaar(text):
    pan_pattern = r"[A-Z]{5}[0-9]{4}[A-Z]{1}"
    aadhaar_pattern = r"\b\d{4}\s?\d{4}\s?\d{4}\b"
    vid_pattern = r"\b\d{16}\b"

    pan_match = re.findall(pan_pattern, text)
    aadhaar_match = re.findall(aadhaar_pattern, text)
    vid_match = re.findall(vid_pattern, text)

    # If Aadhaar is masked, check for VID
    if not aadhaar_match and vid_match:
        logging.info("Aadhaar is masked, using VID.")
        return {"PAN Found": pan_match if pan_match else "Not Found", "Aadhaar Found": "Masked, VID Available"}

    return {"PAN Found": pan_match if pan_match else "Not Found", "Aadhaar Found": aadhaar_match if aadhaar_match else "Not Found"}

# Process Uploaded File
def process_file(file):
    if file.name.endswith(".pdf"):
        images = extract_images_from_pdf(file.name)
        if images:
            enhanced_image = enhance_image(np.array(images[0]))
            extracted_text = extract_text_from_image(Image.fromarray(enhanced_image))
            validation_results = validate_pan_aadhaar(extracted_text)
            return images[0], extracted_text, validation_results
    else:
        image = cv2.imread(file.name)
        enhanced_image = enhance_image(image)
        extracted_text = extract_text_from_image(Image.fromarray(enhanced_image))
        validation_results = validate_pan_aadhaar(extracted_text)
        face_path = detect_and_extract_face(enhanced_image)
        return Image.fromarray(enhanced_image), extracted_text, validation_results, face_path

    return None, "No valid data extracted.", {}, None

# Gradio Interface
demo = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload PAN/Aadhaar (PDF or Image)"),
    outputs=[
        gr.Image(label="Enhanced Image"),
        gr.Textbox(label="Extracted Text"),
        gr.JSON(label="PAN/Aadhaar Validation"),
        gr.Textbox(label="Extracted Face Path")
    ],
    title="Enhanced KYC Verification System",
    description="Upload a PAN/Aadhaar document, extract text using QWEN 2.5 VL, validate PAN/Aadhaar details, and detect faces."
)

demo.launch(share=True,debug=True)


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d21234ff54f2d566f9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://d21234ff54f2d566f9.gradio.live




In [None]:
import gradio as gr
import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from pdf2image import convert_from_path
import re
from PIL import Image
import os
import cv2
import numpy as np
from deepface import DeepFace
import logging
from utils import file_exists, read_yaml

# Load Model & Processor
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else "auto"
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

# Logging setup
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(
    filename=os.path.join(log_dir, "ekyc_logs.log"),
    level=logging.INFO,
    format="[%(asctime)s: %(levelname)s]: %(message)s",
    filemode="a",
)

config_path = "config.yaml"
config = read_yaml(config_path)
artifacts = config["artifacts"]
output_path = artifacts["INTERMIDEIATE_DIR"]

# Enhance Aadhaar Image Clarity
def enhance_image(img):
    logging.info("Enhancing image clarity...")
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Apply adaptive histogram equalization
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_img = clahe.apply(gray)

    # Apply sharpening filter
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharpened = cv2.filter2D(enhanced_img, -1, kernel)

    return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)

# Extract Text from Image (Optimized for Memory)
def extract_text_from_image(image):
    try:
        image = image.convert("RGB")  # Convert to correct format
        messages = [
            {"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": "Extract text from this image."}]}
        ]

        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = processor(text=[text], images=[image], padding=True, return_tensors="pt").to(device)

        with torch.no_grad():  # Avoid unnecessary memory usage
            generated_ids = model.generate(**inputs, max_new_tokens=512)

        response = processor.batch_decode(
            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]

        del inputs, generated_ids
        torch.cuda.empty_cache()
        return response
    except Exception as e:
        logging.error(f"Error in text extraction: {str(e)}")
        return "Error in text extraction."

# Extract Images from PDF (Optimize Memory)
def extract_images_from_pdf(pdf_path):
    return convert_from_path(pdf_path, dpi=150)  # Reduce DPI to lower memory usage

def validate_pan_aadhaar(text):
    pan_pattern = r"[A-Z]{5}[0-9]{4}[A-Z]{1}"
    aadhaar_pattern = r"\b\d{4}\s?\d{4}\s?\d{4}\b"  # Ensuring 12 digits
    vid_pattern = r"\b\d{16}\b"  # Ensuring 16 digits with no spaces

    pan_match = re.findall(pan_pattern, text)
    aadhaar_match = [match for match in re.findall(aadhaar_pattern, text) if len(re.sub(r"\s+", "", match)) == 12]
    vid_match = re.findall(vid_pattern, text)

    if not aadhaar_match and vid_match:
        aadhaar_status = "Masked, VID Available"
    elif aadhaar_match:
        aadhaar_status = aadhaar_match
    else:
        aadhaar_status = "Not Found"

    return {
        "PAN Found": pan_match if pan_match else "Not Found",
        "Aadhaar Found": aadhaar_status
    }


# Process Uploaded File (Optimized for CUDA)
def process_file(file):
    try:
        if file.name.endswith(".pdf"):
            images = extract_images_from_pdf(file.name)
            if images:
                # Process only the first image to save memory
                img = np.array(images[0])
                del images
                torch.cuda.empty_cache()

                enhanced_image = enhance_image(img)
                extracted_text = extract_text_from_image(Image.fromarray(enhanced_image))
                validation_results = validate_pan_aadhaar(extracted_text)

                return Image.fromarray(enhanced_image), extracted_text, validation_results, None

        else:
            image = cv2.imread(file.name)
            enhanced_image = enhance_image(image)
            extracted_text = extract_text_from_image(Image.fromarray(enhanced_image))
            validation_results = validate_pan_aadhaar(extracted_text)

            return Image.fromarray(enhanced_image), extracted_text, validation_results, None

    except Exception as e:
        logging.error(f"Error processing file: {str(e)}")
        return None, "Error processing file.", {}, None

# Gradio Interface
demo = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload PAN/Aadhaar (PDF or Image)"),
    outputs=[
        gr.Image(label="Enhanced Image"),
        gr.Textbox(label="Extracted Text"),
        gr.JSON(label="PAN/Aadhaar Validation"),
        gr.Textbox(label="Extracted Face Path")
    ],
    title="Enhanced KYC Verification System",
    description="Upload a PAN/Aadhaar document, extract text using QWEN 2.5 VL, validate PAN/Aadhaar details, and detect faces.",
)

demo.launch(share=True, debug=True)


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://f6241b925cfa96a4f2.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
