In [1]:
!pip install opencv_python

^C


In [2]:
# converting from pdf format to jpg format
!pip install PyMuPDF



In [1]:
import fitz  # PyMuPDF

def extract_images_from_pdf(pdf_path):
    pdf_document = fitz.open(pdf_path)
    
    for page_num in range(len(pdf_document)):
        page = pdf_document[page_num]
        image_list = page.get_images(full=True)  # Get the images on the page
        
        for img_index, img in enumerate(image_list):
            xref = img[0]  # The xref number of the image
            base_image = pdf_document.extract_image(xref)  # Extract the image
            image_bytes = base_image["image"]  # Get the image bytes
            image_extension = base_image["ext"]  # Get the image extension
            
            # Save the image
            image_filename = f"extracted_image_page_{page_num+1}_img_{img_index+1}.{image_extension}"
            with open(image_filename, "wb") as img_file:
                img_file.write(image_bytes)
                print(f"Saved {image_filename}")
    
    pdf_document.close()

# Usage
extract_images_from_pdf("Pan.pdf")


Saved extracted_image_page_1_img_1.jpeg


In [2]:
import cv2
import numpy as np
import random

def preprocess_document(image_path):
    # Read the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian smoothing to reduce noise
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Detect edges
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Detect lines to correct skew using Hough Transform
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            if -45 < angle < 45:  # Avoid vertical or near-vertical lines
                # Correct the skew
                M = cv2.getRotationMatrix2D((gray.shape[1] / 2, gray.shape[0] / 2), angle, 1)
                gray = cv2.warpAffine(gray, M, (gray.shape[1], gray.shape[0]))

    # Resize to standard size
    processed_image = cv2.resize(gray, (600, 400))

    # Enhance contrast using Adaptive Histogram Equalization (CLAHE)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    processed_image = clahe.apply(processed_image)

    return processed_image

def augment_image(image):
    # Random rotation
    angle = random.uniform(-15, 15)  # Rotate between -15 and 15 degrees
    height, width = image.shape[:2]
    center = (width // 2, height // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated_image = cv2.warpAffine(image, M, (width, height))

    # Random scaling
    scale = random.uniform(0.9, 1.1)  # Scale between 90% and 110%
    scaled_image = cv2.resize(rotated_image, None, fx=scale, fy=scale)

    return scaled_image



In [None]:
#import cv2
import numpy as np

def preprocess_image(image_path):
    # Read the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Detect edges using Canny
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Detect lines to correct skew using Hough Transform
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=100, maxLineGap=10)

    if lines is not None:
        angles = []
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angles.append(angle)

        if angles:
            median_angle = np.median(angles)

            # Rotate based on the calculated median angle
            if -45 <= median_angle < 45:
                # 0 degrees
                rotation_angle = 0
            elif 45 <= median_angle < 135:
                # 90 degrees
                rotation_angle = 90
            elif median_angle >= 135 or median_angle < -135:
                # 180 degrees
                rotation_angle = 180
            else:
                # 270 degrees (or -90 degrees)
                rotation_angle = -90

            # Correct the skew
            M = cv2.getRotationMatrix2D((gray.shape[1] / 2, gray.shape[0] / 2), rotation_angle, 1)
            gray = cv2.warpAffine(gray, M, (gray.shape[1], gray.shape[0]))

    # Resize to standard size (optional)
    processed_image = cv2.resize(gray, (600, 400))

    return processed_image

# Example usage
preprocessed_image = preprocess_image("extracted_image_page_1_img_1.jpeg")

# Save the processed image
cv2.imwrite("processed_image2.jpeg", preprocessed_image)

# Display the processed image (optional)
cv2.imshow('Processed Image', preprocessed_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [3]:

def main(image_path, output_path):
    # Preprocess the document
    processed_image = preprocess_document(image_path)

    # Augment the image
    #augmented_image = augment_image(processed_image)#

    # Save the final image
    cv2.imwrite(output_path, processed_image)
    print(f"Processed image saved to {output_path}")

# Example usage
image_path = "extracted_image_page_1_img_1.jpeg"
output_path = "processed_document_image.jpeg"
main(image_path, output_path)

Processed image saved to processed_document_image.jpeg


In [None]:
from mtcnn import MTCNN
def detect_face(image):
    detector = MTCNN()
    faces = detector.detect_faces(image)
    return faces

def extract_face(image, face):
    x, y, width, height = face['box']  # Extract the bounding box
    # Make sure the bounding box is within the image dimensions
    x = max(0, x)
    y = max(0, y)
    width = min(width, image.shape[1] - x)
    height = min(height, image.shape[0] - y)
    extracted_face = image[y:y + height, x:x + width]
    return extracted_face

# Main workflow
image_path = 'aadhar.jpg'  # Replace with your preprocessed image path
image = cv2.imread(image_path)

# Convert the image to RGB
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# MTCNN detects faces in the preprocessed image
faces = detect_face(rgb_image)

if faces:
    # Since there is only one face, extract it
    extracted_face = extract_face(rgb_image, faces[0])

    # Save the extracted face
    output_path = 'extract.jpg'
    cv2.imwrite(output_path, cv2.cvtColor(extracted_face, cv2.COLOR_RGB2BGR))  # Save as BGR for OpenCV

    # Optionally display the extracted face
    cv2.imshow('Extracted Face', extracted_face)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("No faces detected in the image.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [4]:
!pip install dlib

Collecting dlib
  Using cached dlib-19.24.6.tar.gz (3.4 MB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: dlib
  Building wheel for dlib (setup.py): started
  Building wheel for dlib (setup.py): finished with status 'error'
  Running setup.py clean for dlib
Failed to build dlib


  error: subprocess-exited-with-error
  
  python setup.py bdist_wheel did not run successfully.
  exit code: 1
  
  [67 lines of output]
  running bdist_wheel
  running build
  running build_ext
    if LooseVersion(cmake_version) < '3.1.0':
  Building extension for Python 3.9.19 (main, May  6 2024, 20:12:36) [MSC v.1916 64 bit (AMD64)]
  Invoking CMake setup: 'cmake C:\Users\rohan\AppData\Local\Temp\pip-install-z3qzyy_n\dlib_6a5e2428425048d7a6c2c2d1b9971be9\tools\python -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=C:\Users\rohan\AppData\Local\Temp\pip-install-z3qzyy_n\dlib_6a5e2428425048d7a6c2c2d1b9971be9\build\lib.win-amd64-cpython-39 -DPYTHON_EXECUTABLE=C:\Users\rohan\anaconda3\python.exe -DDLIB_USE_FFMPEG=OFF -DCMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE=C:\Users\rohan\AppData\Local\Temp\pip-install-z3qzyy_n\dlib_6a5e2428425048d7a6c2c2d1b9971be9\build\lib.win-amd64-cpython-39 -A x64'
  -- Building for: NMake Makefiles
  CMake Error at CMakeLists.txt:5 (message):
  
  
  
    !!!!!!!!!!!!!!!!!!!!!!!

In [9]:
def extract_face_dlib(image_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Convert the image to RGB (dlib uses RGB instead of BGR)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Initialize dlib's face detector
    detector = dlib.get_frontal_face_detector()
    
    # Detect faces in the image
    detected_faces = detector(rgb_image, 1)
    
    # Assuming there is only one face, extract it
    if len(detected_faces) == 1:
        face = detected_faces[0]
        # Extract the face using the bounding box coordinates
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        extracted_face = image[y:y + h, x:x + w]
        return extracted_face
    else:
        print("No face detected or multiple faces detected.")
        return None

# Main workflow
image_path = 'aadhar.jpg'  # Replace with your document image path

# Extract the face from the document
extracted_face = extract_face_dlib(image_path)

# Check if extraction was successful
if extracted_face is not None:
    # Save the extracted face
    output_path = 'extracted_face_dlib.jpg'
    cv2.imwrite(output_path, extracted_face)

    # Optionally display the extracted face
    cv2.imshow('Extracted Face', extracted_face)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

NameError: name 'dlib' is not defined

In [2]:
!pip install retina-face

Collecting retina-face
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting gdown>=3.10.1 (from retina-face)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading retina_face-0.0.17-py3-none-any.whl (25 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown, retina-face
Successfully installed gdown-5.2.0 retina-face-0.0.17


In [8]:
import cv2
from retinaface import RetinaFace
# Load the image
image_path = 'aadhar.jpg'  # Replace with the path to your image
image = cv2.imread(image_path)
# Detect faces in the image
faces = RetinaFace.detect_faces(image)

# Check if any face is detected
if len(faces) == 0:
    print("No face detected.")
else:
    print(f"Detected {len(faces)} face(s).")
# Extract the face based on the detected bounding box
for key, face in faces.items():
    # Get bounding box coordinates
    facial_area = face['facial_area']
    x1, y1, x2, y2 = facial_area

    # Crop the face from the image
    extracted_face = image[y1:y2, x1:x2]

    # Save the extracted face
    output_path = 'extracted_face_dlb.jpg'
    cv2.imwrite(output_path, extracted_face)
    print(f"Face extracted and saved at {output_path}.")
    
    # Optional: Display the extracted face
    cv2.imshow("Extracted Face", extracted_face)
    cv2.waitKey(0)
    cv2.destroyAllWindows()



ValueError: You have tensorflow 2.17.0 and this requires tf-keras package. Please run `pip install tf-keras` or downgrade your tensorflow.