In [10]:
# Section 1: Imports
import cv2 as cv
import numpy as np
import statistics
import time
import pytesseract

# Section 2: Image Loading and Preprocessing
def load_and_binarize(image_path, threshold=200):
    img = cv.imread(image_path, 0)
    _, doc_bin = cv.threshold(img, threshold, 255, cv.THRESH_BINARY)
    return img, doc_bin

def get_negative(binary_image):
    return 255 - binary_image

def extract_connected_components(binary_image):
    return cv.connectedComponentsWithStats(binary_image)

# Section 3: Candidate Point Selection
def select_candidate_points(strategy, negative, num_labels, labels, centroids):
    if strategy == 'a':
        return np.column_stack(np.where(negative > 0))
    elif strategy == 'b':
        return centroids[1:]
    elif strategy == 'c':
        candidate_points = []
        for label in range(1, num_labels):
            component = np.where(labels == label)
            max_y = np.max(component[0])
            x = component[1][np.argmax(component[0])]
            candidate_points.append([x, max_y])
        return np.array(candidate_points)
    else:
        raise ValueError("Invalid strategy. Choose 'a', 'b', or 'c'.")

def remove_non_candidates(shape, candidate_points):
    result = np.zeros(shape, dtype=np.uint8)
    for point in candidate_points:
        x, y = point
        if 0 <= x < shape[1] and 0 <= y < shape[0]:
            result[int(y), int(x)] = 255
    return result

# Section 4: Hough Transform and Angle Calculation
def apply_hough_transform(binary_image, distance_resolution=1, angular_resolution=np.pi/180, density_threshold=10):
    return cv.HoughLines(binary_image, distance_resolution, angular_resolution, density_threshold)

def calculate_document_angle(lines):
    if lines is not None:
        angles = [line[0][1] for line in lines]
        median_angle = statistics.median(angles)
        document_angle = median_angle - np.pi/2
        return np.degrees(document_angle)
    return 0

# Section 5: Image Deskewing
def deskew_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv.getRotationMatrix2D(center, angle, 1.0)
    return cv.warpAffine(image, M, (w, h), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE)

# Section 6: OCR and PDF Generation
def perform_ocr(image):
    return pytesseract.image_to_string(image)

def generate_pdf(image, output_path):
    pdf = pytesseract.image_to_pdf_or_hocr(image, extension='pdf')
    with open(output_path, 'w+b') as f:
        f.write(pdf)


In [None]:
# Section 7: Main Execution - Task 1 and 2
image_path = 'doc.jpg'
original, binary = load_and_binarize(image_path)
negative = get_negative(binary)
num_labels, labels, stats, centroids = extract_connected_components(negative)

strategies = ['a', 'b', 'c']
density_thresholds = [5, 10, 15, 20]

for strategy in strategies:
    start_time = time.time()
    candidate_points = select_candidate_points(strategy, negative, num_labels, labels, centroids)
    selection_time = time.time() - start_time
    result = remove_non_candidates(negative.shape, candidate_points)
    cv.imwrite(f'candidate_points_{strategy}.jpg', result)
    
    print(f"\nStrategy {strategy}:")
    print(f"  Number of candidate points: {len(candidate_points)}")
    print(f"  Selection time: {selection_time:.4f} seconds")

    for threshold in density_thresholds:
        start_time = time.time()
        lines = apply_hough_transform(result, density_threshold=threshold)
        hough_time = time.time() - start_time
        
        angle = calculate_document_angle(lines)
        deskewed = deskew_image(original, angle)
        cv.imwrite(f'deskewed_{strategy}_{threshold}.jpg', deskewed)
        
        print(f"  Density Threshold: {threshold}")
        print(f"    Estimated angle: {angle:.2f} degrees")
        print(f"    Hough transform time: {hough_time:.4f} seconds")

print("\nDeskewing complete. Please check the output images.")


Strategy a:
  Number of candidate points: 205308
  Selection time: 0.0102 seconds
  Density Threshold: 5
    Estimated angle: -2.00 degrees
    Hough transform time: 0.0251 seconds
  Density Threshold: 10
    Estimated angle: -4.00 degrees
    Hough transform time: 0.0246 seconds
  Density Threshold: 15
    Estimated angle: -3.00 degrees
    Hough transform time: 0.0244 seconds
  Density Threshold: 20
    Estimated angle: -4.00 degrees
    Hough transform time: 0.0262 seconds

Strategy b:
  Number of candidate points: 3971
  Selection time: 0.0002 seconds
  Density Threshold: 5
    Estimated angle: -1.00 degrees
    Hough transform time: 0.0035 seconds
  Density Threshold: 10
    Estimated angle: -15.00 degrees
    Hough transform time: 0.0022 seconds
  Density Threshold: 15
    Estimated angle: 10.00 degrees
    Hough transform time: 0.0023 seconds
  Density Threshold: 20
    Estimated angle: 10.00 degrees
    Hough transform time: 0.0022 seconds


In [None]:
# Section 8: Main Execution - Task 3
test_images = ['01.jpg', '02.jpg', '03.jpg', '04.jpg', '05.jpg']
best_strategy = 'c'  # Assume 'c' is the best strategy based on previous results
best_threshold = 15  # Assume 15 is the best threshold based on previous results

for test_image in test_images:
    img = cv.imread(test_image, 0)
    _, binary = cv.threshold(img, 200, 255, cv.THRESH_BINARY)
    negative = get_negative(binary)
    num_labels, labels, stats, centroids = extract_connected_components(negative)
    
    candidate_points = select_candidate_points(best_strategy, negative, num_labels, labels, centroids)
    result = remove_non_candidates(negative.shape, candidate_points)
    
    lines = apply_hough_transform(result, density_threshold=best_threshold)
    angle = calculate_document_angle(lines)
    deskewed = deskew_image(img, angle)
    
    cv.imwrite(f'deskewed_{test_image}', deskewed)
    print(f"\nTest image: {test_image}")
    print(f"  Estimated angle: {angle:.2f} degrees")

In [None]:
# Section 9: Main Execution - Task 4
for test_image in test_images:
    original = cv.imread(test_image)
    deskewed = cv.imread(f'deskewed_{test_image}')
    
    original_text = perform_ocr(original)
    deskewed_text = perform_ocr(deskewed)
    
    print(f"\nOCR Results for {test_image}:")
    print("Original:")
    print(original_text[:500] + "..." if len(original_text) > 500 else original_text)
    print("\nDeskewed:")
    print(deskewed_text[:500] + "..." if len(deskewed_text) > 500 else deskewed_text)
    
    generate_pdf(original, f'original_{test_image[:-4]}.pdf')
    generate_pdf(deskewed, f'deskewed_{test_image[:-4]}.pdf')

print("\nOCR and PDF generation complete. Please check the output files.")