In [None]:
!pip install flask ultralytics easyocr mysql-connector-python opencv-python pandas
from flask import Flask, request, jsonify, render_template, session, redirect, url_for
import os
import zipfile
import pandas as pd
import mysql.connector
from werkzeug.utils import secure_filename
import shutil
import uuid
import json
from datetime import datetime
import difflib
import cv2
from ultralytics import YOLO
import easyocr

app = Flask(__name__)
app.secret_key = os.urandom(24)
app.config['UPLOAD_FOLDER'] = 'uploads/'
app.config['PROCESSED_FOLDER'] = 'processed/'
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024  # 50MB max upload

# Ensure upload directories exist
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
os.makedirs(app.config['PROCESSED_FOLDER'], exist_ok=True)


# Load models directly from their paths
classification_model = YOLO(r"C:\Users\mahia\OneDrive\Desktop\infosys_project\classification_model\yolo11n-cls.pt")  # Load classification model
detection_model = YOLO(r"C:\Users\mahia\OneDrive\Desktop\infosys_project\detection_model\yolo11n.pt")  # Load detection model
ocr_reader = easyocr.Reader(['en'])  # Load OCR model


# Database configuration
db_config = {
    'host': 'localhost',
    'user': 'aadhaar_user',
    'password': 'aadhaar_password',
    'database': 'aadhaar_fraud_db'
}

def get_db_connection():
    """Create a connection to the MySQL database"""
    return mysql.connector.connect(**db_config)

def calculate_similarity(str1, str2):
    """Calculate string similarity using difflib SequenceMatcher"""
    if not str1 or not str2:
        return 0
    
    str1 = str(str1).lower().strip()
    str2 = str(str2).lower().strip()
    
    similarity = difflib.SequenceMatcher(None, str1, str2).ratio()
    return round(similarity * 100, 2)

@app.route('/')
def home():
    """Render the home page"""
    return render_template('index.html')

@app.route('/upload', methods=['POST'])
def upload_files():
    """Handle file uploads (ZIP and Excel)"""
    if 'zipfile' not in request.files or 'excelfile' not in request.files:
        return jsonify({"error": "Both ZIP and Excel files are required"}), 400
    
    zip_file = request.files['zipfile']
    excel_file = request.files['excelfile']
    
    if zip_file.filename == '' or excel_file.filename == '':
        return jsonify({"error": "No file selected"}), 400
    
    # Create a unique session ID for this processing batch
    session_id = str(uuid.uuid4())
    session['current_session'] = session_id
    
    # Create directories for this session
    session_upload_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
    session_processed_dir = os.path.join(app.config['PROCESSED_FOLDER'], session_id)
    
    os.makedirs(session_upload_dir, exist_ok=True)
    os.makedirs(session_processed_dir, exist_ok=True)
    
    # Save files
    zip_path = os.path.join(session_upload_dir, secure_filename(zip_file.filename))
    excel_path = os.path.join(session_upload_dir, secure_filename(excel_file.filename))
    
    zip_file.save(zip_path)
    excel_file.save(excel_path)
    
    # Extract ZIP file
    extract_dir = os.path.join(session_upload_dir, 'extracted')
    os.makedirs(extract_dir, exist_ok=True)
    
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    
    # Save paths to session
    session['extract_dir'] = extract_dir
    session['excel_path'] = excel_path
    
    return jsonify({
        "message": "Files uploaded successfully!",
        "session_id": session_id,
        "redirect": url_for('process_page')
    })

@app.route('/process-page')
def process_page():
    """Render the processing page"""
    if 'current_session' not in session:
        return redirect(url_for('home'))
    
    return render_template('process.html', session_id=session['current_session'])

@app.route('/process', methods=['POST'])
def process_files():
    """Process the uploaded files through the ML pipeline"""
    if 'current_session' not in session:
        return jsonify({"error": "No active session"}), 400
    
    extract_dir = session.get('extract_dir')
    excel_path = session.get('excel_path')
    
    if not extract_dir or not excel_path:
        return jsonify({"error": "Upload files first"}), 400
    
    # Read Excel file
    try:
        excel_data = pd.read_excel(excel_path)
    except Exception as e:
        return jsonify({"error": f"Error reading Excel file: {str(e)}"}), 400
    
    # Get all images from extracted folder
    image_files = []
    for root, _, files in os.walk(extract_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_files.append(os.path.join(root, file))
    
    # Process each image
    results = []
    
    for image_path in image_files:
        # 1. Classification - Check if it's an Aadhaar card
        classification_result = classification_model(image_path)
        is_aadhaar = classification_result[0].probs.top1  # Get the predicted class
        
        if is_aadhaar == 0:  # Assuming class 0 is "Aadhaar"
            # 2. Detection - Detect fields in the Aadhaar card
            detection_result = detection_model(image_path)
            detected_fields = detection_result[0].boxes.data.tolist()  # Get detected fields
            
            # 3. OCR - Extract text from each field
            extracted_data = {}
            image = cv2.imread(image_path)
            
            for field in detected_fields:
                x1, y1, x2, y2, confidence, class_id = map(int, field[:6])
                field_class = detection_model.names[class_id]  # Get class name (e.g., 'Name', 'UID', 'Address')
                
                # Crop the detected region
                cropped_roi = image[y1:y2, x1:x2]
                
                # Convert cropped ROI to grayscale for OCR
                gray_roi = cv2.cvtColor(cropped_roi, cv2.COLOR_BGR2GRAY)
                
                # Use EasyOCR to extract text
                ocr_result = ocr_reader.readtext(gray_roi, detail=0)  # detail=0 returns only the text
                text = ' '.join(ocr_result)  # Combine detected text if multiple lines
                
                # Save the text to the extracted_data dictionary
                extracted_data[field_class] = text
            
            # 4. Match with Excel data
            sr_match = None
            for _, row in excel_data.iterrows():
                # Match UID with Extracted UID
                if 'uid' in extracted_data and str(row['UID']).strip() == str(extracted_data['uid']).strip():
                    sr_match = row['SrNo']
                    break
                
                # If UID not found or not clear, try matching name
                if 'name' in extracted_data and calculate_similarity(row['Name'], extracted_data['name']) > 80:
                    sr_match = row['SrNo']
                    break
            
            if sr_match:
                # Get corresponding row
                record = excel_data[excel_data['SrNo'] == sr_match].iloc[0]
                
                # Calculate match scores
                name_score = calculate_similarity(record['Name'], extracted_data.get('name', ''))
                uid_score = calculate_similarity(str(record['UID']), extracted_data.get('uid', ''))
                
                # Extract address components
                extracted_address = extracted_data.get('address', '')
                
                # Create address from Excel components
                excel_address_parts = []
                for field in ['House Flat Number', 'Town', 'Street Road Name', 'City', 'State', 'PINCODE']:
                    if field in record and pd.notna(record[field]):
                        excel_address_parts.append(str(record[field]))
                
                excel_address = " ".join(excel_address_parts)
                address_score = calculate_similarity(excel_address, extracted_address)
                
                # Calculate overall score (customize weights as needed)
                overall_score = (name_score * 0.4) + (uid_score * 0.4) + (address_score * 0.2)
                
                # Determine final remarks
                if overall_score >= 90:
                    remarks = "Verified"
                elif overall_score >= 70:
                    remarks = "Needs Manual Verification"
                else:
                    remarks = "Potential Fraud"
                
                # Save to database
                try:
                    conn = get_db_connection()
                    cursor = conn.cursor()
                    
                    query = """
                    INSERT INTO verification_results
                    (session_id, sr_no, document_path, extracted_name, excel_name, name_match_score,
                    extracted_uid, excel_uid, uid_match_score, extracted_address, excel_address,
                    address_match_score, overall_score, remarks, processed_date)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """
                    
                    cursor.execute(query, (
                        session['current_session'],
                        sr_match,
                        image_path,
                        extracted_data.get('name', ''),
                        record['Name'],
                        name_score,
                        extracted_data.get('uid', ''),
                        str(record['UID']),
                        uid_score,
                        extracted_address,
                        excel_address,
                        address_score,
                        overall_score,
                        remarks,
                        datetime.now()
                    ))
                    
                    conn.commit()
                    cursor.close()
                    conn.close()
                    
                except Exception as e:
                    print(f"Database error: {str(e)}")
                
                # Append to results
                results.append({
                    "sr_no": sr_match,
                    "document": os.path.basename(image_path),
                    "name_match": name_score,
                    "uid_match": uid_score,
                    "address_match": address_score,
                    "overall_score": overall_score,
                    "remarks": remarks
                })
            else:
                # No match found in Excel
                results.append({
                    "document": os.path.basename(image_path),
                    "error": "No matching record found in Excel file",
                    "extracted_data": extracted_data
                })
        else:
            # Not an Aadhaar card
            results.append({
                "document": os.path.basename(image_path),
                "error": "Not identified as an Aadhaar card"
            })
    
    # Save results to session for the results page
    session['results'] = results
    
    return jsonify({
        "message": "Processing complete",
        "redirect": url_for('results_page')
    })

@app.route('/results-page')
def results_page():
    """Render the results page"""
    if 'current_session' not in session or 'results' not in session:
        return redirect(url_for('home'))
    
    return render_template('results.html', results=session['results'])

if __name__ == '__main__':
    app.run(debug=False)  # Disable debug mode to avoid SystemExit: 1