In [1]:
from commonfunctions import *
import cv2 as cv2
from PIL import Image, ImageOps
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
def load_image(path):
    img = Image.open(path)
    img = ImageOps.exif_transpose(img) # Rotate image with camera alignment
    return np.array(img)

In [None]:
from skimage.transform import hough_line, hough_line_peaks,probabilistic_hough_line, rotate
from skimage.transform import ProjectiveTransform, warp
from skimage.measure import find_contours

def order_points(pts): # take 4 points and orders them as follows:top left,top right,bottom left,bottom right
    rectangle=np.zeros((4,2), dtype= np.float32 )
    
    s=pts.sum(axis=1) # the top left 3andaha smallest x+y w bottom rught akbar
    rectangle[0]=pts[np.argmin(s)]
    rectangle[2]=pts[np.argmax(s)]
    
    difference=np.diff(pts,axis=1) # top right 3andaha smallest y-x w bottom right akbar
    rectangle[1]=pts[np.argmin(difference)]
    rectangle[3]=pts[np.argmax(difference)]
    
    return rectangle

#takes 4 corners of paper and then warps them into a perfect rectangular zy akenak bt3ml scan l war2a on camscanner
def four_point(image,pts):
    rectangle=order_points(pts)
    
    top_left,top_right,bottom_right,bottom_left=rectangle
    
    bottom_edge_width=np.linalg.norm(bottom_right-bottom_left)
    top_edge_width=np.linalg.norm(top_right-top_left)
    maxwidth=int(max(bottom_edge_width,top_edge_width)) # 34an amna3 hetta tkoon cropped f ba5od el max

    right_edge_height=np.linalg.norm(top_right-bottom_right)
    left_edge_height=np.linalg.norm(top_left-bottom_left)
    maxheight=int(max(right_edge_height,left_edge_height))
    
    #ba7ot el 4 ordered points into a rectangle
    final_rectangle=np.array([[0,0],[maxwidth-1,0],[maxwidth-1,maxheight-1],[0,maxheight-1] ],dtype=np.float32 )
    #this produces a 3x3 homography matrix which encodes rotation,translation,scaling
    mapping=cv2.getPerspectiveTransform(rectangle,final_rectangle)
    #to apply warping for every pixel in the paper from the top till the bottom
    result=cv2.warpPerspective(image,mapping,(maxwidth,maxheight))
    
    return result

#it takes image and tries to find the 4 corner points to apply warping on it
def detect_document_contour(image):
    gray=rgb2gray(image)
    gray_blurred=gaussian(gray,sigma=1)
    edges = canny(gray_blurred, sigma=1, low_threshold=30/255, high_threshold=100/255).astype(np.uint8) * 255
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)#retr-external to retun the outer contour , msh 3ayez el table grid contour
    if not contours:
        return None

    contours = sorted(contours, key=cv2.contourArea, reverse=True) # 34an a sort el contour from largest(paper) to smallest

    for c in contours[:10]:
        peri = cv2.arcLength(c, True)
        for eps in [0.01, 0.02, 0.03, 0.04, 0.05]:
            approx = cv2.approxPolyDP(c, eps * peri, True)
            if len(approx) == 4:
                return approx.reshape(4, 2)

    # to get the best rectangle even if not perfect contour
    rect = cv2.minAreaRect(contours[0])
    box = cv2.boxPoints(rect)
    return box.astype(np.float32)
    

def deskew(image):
    result=np.zeros_like(image)
    edge=canny(image,sigma=1,low_threshold=10,high_threshold=70)
    lines=probabilistic_hough_line(edge,line_length=80,line_gap=5)
  
    if not lines:
        return image
    
    #Storing angles of lines detected
    angles=[]
    
    for(x0,y0),(x1,y1) in lines:
        delta_x=x1-x0
        delta_y=y1-y0
        ang=np.degrees(np.arctan2(delta_y,delta_x))
        
        while ang>90:
            ang-=180
           
        while ang<-90:
            ang+=180
            
        angles.append(ang)
        
    horizontal=[a for a in angles if abs(a)<45] #to keep the sllightly tilted rows 
    if len(horizontal)<3:
        return image  
        
    skew=float(np.median(horizontal)) #to avoid extreme outlier eno ybawaz el angles ely tal3a
    skew = float(np.clip(skew, -10, 10))  # prevent over-rotation

    rotated =rotate(image,angle=-skew,resize=False, preserve_range=True )
    return rotated 
           
def preprocessing(image):
    doc_cont=detect_document_contour(image)
    warped=four_point(image,doc_cont)
    
    image_gray = rgb2gray(warped)    
 
    image_deskewed=deskew(image_gray)
  
    return image_deskewed

In [None]:

def trim_border(image, border_size=10): #Trims image by border zise
    H, W = image.shape
    return image[border_size:H-border_size, border_size:W-border_size]

def edge_detection(image):
    image = image.astype(np.float32) / 255.0 

    # Edge Detection
    gx = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    gy = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)

    gx_abs = np.abs(gx)
    gy_abs = np.abs(gy)

    # Normalizes Gradients to 90th percentile
    gx_n = gx_abs / (np.percentile(gx_abs, 90) + 1e-6)
    gy_n = gy_abs / (np.percentile(gy_abs, 90) + 1e-6)

    # Picks larger gradient and limits it to 1
    mag_balanced = np.maximum(gx_n, gy_n)
    mag_balanced = np.clip(mag_balanced, 0, 1)
    
    # Thresholding gradient to classify edges
    edges = mag_balanced > 0.6 
    edges = edges.astype(np.uint8) * 255

    return edges

def line_detection(edges, image): # Takes edges and returns intersection points of lines
    H, W = image.shape
    diag = int(np.hypot(H, W)) # Using diagonal to adapt parameters to different resolutions

    # Initializing line detection result images
    horizontal_lines = np.zeros_like(image)
    vertical_lines = np.zeros_like(image)
    intersections = np.zeros_like(image)

    # Hough lines and peaks to extract prominent lines
    acc, angles, dists = hough_line(edges)
    acc, angles, dists = hough_line_peaks(acc, angles, dists, threshold=0.75 * np.max(acc),  
                                        min_distance = int(0.01*diag), num_peaks=40) 
    
    # Drawing hough lines results
    for i in range(len(angles)): 
        theta = abs(angles[i]) 
        if not (theta < np.radians(5) or theta > np.radians(85)): # Reject diagonal lines
            continue 
        if theta < np.radians(45): # Vertical liens
            a = math.cos(angles[i]) 
            b = math.sin(angles[i]) 
            x0 = a * dists[i] 
            y0 = b * dists[i] 
            pt1 = (int(x0 + 10000*(-b)), int(y0 + 10000*(a))) 
            pt2 = (int(x0 - 10000*(-b)), int(y0 - 10000*(a))) 
            cv2.line(vertical_lines, pt1, pt2, (255, 255, 255), 1) 
        elif theta > np.radians(45): # Horizontal lines
            a = math.cos(angles[i]) 
            b = math.sin(angles[i]) 
            x0 = a * dists[i] 
            y0 = b * dists[i] 
            pt1 = (int(x0 + 10000*(-b)), int(y0 + 10000*(a))) 
            pt2 = (int(x0 - 10000*(-b)), int(y0 - 10000*(a))) 
            cv2.line(horizontal_lines, pt1, pt2, (255, 255, 255), 1) 

    # Finding intersection points of horizontal and vertical lines
    intersections = np.bitwise_and(horizontal_lines > 0, vertical_lines > 0) 
    points = np.argwhere(intersections == 1)

    return points

def cluster_1d(values, tol): # Clusters array into groups of values within tolerance
    values = sorted(values) # Sorting array
    clusters = [[values[0]]] # Initialize 2D array clusters with smallest value

    for v in values[1:]:
        if abs(v - clusters[-1][-1]) <= tol: # If value within tolerance of current cluster being filled append it to the end of the cluster
            clusters[-1].append(v)
        else: # If not start a new cluster with initial value
            clusters.append([v])

    return [int(np.mean(c)) for c in clusters] # Return mean of the clusters

def cluster_rows_columns(points, y_tol, x_tol): # Performs 2D clustering of line intersection points in grid
    xs = [p[1] for p in points] # Loading x values of intersections
    ys = [p[0] for p in points] # Loading y values of intersections

    row_ys = cluster_1d(ys, y_tol)
    col_xs = cluster_1d(xs, x_tol)

    return row_ys, col_xs

def cell_extraction(image): # Takes preprocessed image and returns array of cells
    # Remove edge of paper so it isnt considered a part of the table
    image = trim_border(image, border_size=3) 

    H, W = image.shape

    edges = edge_detection(image) # Detecting edges
    points = line_detection(edges, image) # Extracting points of interection of grid
    
    # Tolerance dependant on image resolution
    y_tol = 0.01 * H
    x_tol = 0.01 * W

    rows, columns = cluster_rows_columns(points, y_tol, x_tol) # Clustering of intersection points
    
    # Number of rows and columns
    num_rows = len(rows) - 1
    num_cols = len(columns) - 1

    # Initializing cell images
    cell_images = np.empty((num_rows, num_cols), dtype=object)
    
    for r in range(num_rows):
        for c in range(num_cols):
            # Cell corner points
            x1, y1 = columns[c],   rows[r]
            x2, y2 = columns[c+1], rows[r+1]

            tl = (x1, y1)
            br = (x2, y2)

            cell_images[r, c] = image[y1:y2, x1:x2] # Extracting cell image from original image

    return cell_images

In [5]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import cv2

# --- Define a simple CNN for MNIST ---
class MNIST_CNN(nn.Module):
    def __init__(self):
        super(MNIST_CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64*7*7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64*7*7)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
import pytesseract
import easyocr
from skimage import transform, filters, exposure, util
from PIL import Image

def enhance_cell_for_ocr(cell_gray, out_size=96): # Normalizing and rescaling image for handnwriting ocr
    cell = util.img_as_float(cell_gray)  # ensure float [0..1]

    cell = exposure.rescale_intensity(cell, in_range="image", out_range=(0, 1))  # normalize contrast
    cell_up = transform.resize(cell, (out_size, out_size), anti_aliasing=True, preserve_range=True)  # rescaling image

    thresh = filters.threshold_sauvola(cell_up, window_size=21, k=0.2)  # local thresholding
    binary = cell_up < thresh  # dark ink = foreground

    return (binary.astype(np.uint8) * 255) 

def cell_preprocessing(cell_image): # Preprocesses cell image depending on requirement
    cell_image = cell_image * 255.0
    cell_image = median(cell_image, np.ones((2,2))) # Median filter

    local_thresh = threshold_local(cell_image, block_size = 7, offset=4) # Local thresholding
    thresholded = cell_image > local_thresh
    thresholded = np.bitwise_invert(thresholded) # Inverting image

    #show_images([cell_image, thresholded])

    # Trimming border
    size = thresholded.shape
    thresholded = thresholded[int(size[0]*0.1):int(size[0]*0.95),
                              int(size[1]*0.05):int(size[1]*0.9)]  # remove top 10% and right 10%

    # Trimming cell border so they arent detected as symbols

    row_ink = np.sum(thresholded > 0, axis=1) # Number of 1s in each row
    col_ink = np.sum(thresholded > 0, axis=0) # Number of 1s in each column

    min_row_ink = thresholded.shape[1] * 0.6  # 60% of row width
    min_col_ink = thresholded.shape[0] * 0.6  # 60% of column height

    valid_rows = np.where(row_ink < min_row_ink)[0] # Row is considered valid if less than threshold is high
    valid_cols = np.where(col_ink < min_col_ink)[0] # Column is considered valid if less than threshold is high
    
    if len(valid_rows) == 0 or len(valid_cols) == 0: # If no valid rows or columns
        raise ValueError("No content detected")

    y_min, y_max = valid_rows[[0, -1]] 
    x_min, x_max = valid_cols[[0, -1]]

    trimmed = thresholded[y_min+1:y_max, x_min+1:x_max] # Trimmming non-valid rows and columns from image
    
    trimmed = binary_closing(trimmed, np.ones((3,3)))

    return trimmed

def check_empty_cell(cell_image): # Checks if number of non-empty pixel are more than 2%
    ink_pixels = np.sum(cell_image > 0)
    total_pixels = cell_image.size
    ink_ratio = ink_pixels / total_pixels

    if ink_ratio < 0.02:
        return True
    return False

def detect_question_mark(cell_image): # Not implemented, can use a binary mask with thresholding for detection
    return False

def detect_line_symbols(cell_image): # Detects box, x, check, horizontal and vertical lines in cell image
    H, W = cell_image.shape
    diag = int(np.hypot(H, W)) # Using diagonal to adapt parameters to different resolutions

    # Initializign vound variables for horizontal, vertical and diagonal lines
    count_h = 0
    count_v = 0
    count_d = 0

    # Hough lines and peaks to extract prominent lines
    acc, angles, dists = hough_line(cell_image) 
    acc, angles, dists = hough_line_peaks(acc, angles, dists, threshold=0.7 * np.max(acc),  
                                        min_distance = int(0.07*diag), num_peaks=10) 
    
    for i in range(len(angles)): 
        theta = abs(angles[i]) 
        if not (theta < np.radians(15) or theta > np.radians(75)): # Detecting diagonal lines
            count_d += 1
        elif theta < np.radians(45): # Vertical lines
            count_v += 1
        elif theta > np.radians(45): # Horizontal lines
            count_h += 1
        
    #show_images([horizontal_lines, vertical_lines, diagonal_lines])

    if count_h > 0 and count_v > 0: # Both horizontal and vertical lines present = Box (0)
        return True, 0
    elif count_d > 0 and count_d < 2: # Only one diagonal line detected = Check (5)
        return True, 5
    elif count_d > 1: # More than one diagonal line detected = x (0)
        return True, 0
    elif count_h == 1: # Only one horizontal line detcted = - (0)
        return True, 0
    elif count_h > 1: # More than one horizontal line detected = 5 - i
        return True, 5 - count_h
    elif count_v > 0: # Vertical lines detected = i
        return True, count_v
    return False, 0

def ocr_check_id(cell_image): # Tesseract OCR for checking IDs
    cell_image = (cell_image * 255.0).astype(np.uint8)

    pytesseract.pytesseract.tesseract_cmd = (
    r"C:\Program Files\Tesseract-OCR\tesseract.exe"
    )

    # Detect digits only using specialized model
    extracted_text = pytesseract.image_to_string(cell_image, config="--psm 7 digits")

    return extracted_text

def ocr_check_handwriting(cell_image):
    # Enhance and resize the image for the ocr
    cell_image = enhance_cell_for_ocr(cell_image)

    # Comditioning image for ocr
    cell_image = np.bitwise_invert(cell_image)
    cell_image = (cell_image * 255.0).astype(np.uint8)

    cell_image = binary_opening(cell_image, np.ones((3,3)))
    cell_image = (cell_image * 255.0).astype(np.uint8)
    
    # Initialize reader and read iamge only allowing digits
    reader = easyocr.Reader(['en'], gpu=False)
    result = reader.readtext(cell_image, allowlist='0123456789', detail=0)

    if result != []:
        return True, result[0]
    return False, " "

def read_cell(cell_image, ocr = "None"):
    if ocr == "id": # Directly use tesseract for IDs without preprocessing
        return ocr_check_id(cell_image)
    
    preprocessed = cell_preprocessing(cell_image)

    if not check_empty_cell(preprocessed): # if cell is not empty
        if detect_question_mark(preprocessed): # Check for '?' in cell
            return "?"
        else:
            detection, text = ocr_check_handwriting(cell_image) # Check ocr first for digit
            if detection: 
                return text
            detection, line_val = detect_line_symbols(preprocessed) # Check for line symbols
            if detection:
                return line_val

    return " "

ModuleNotFoundError: No module named 'easyocr'

In [None]:

def Module1(import_image, export_filename):
    image = load_image(import_image) # Load image

    # Preprocessing and cell extraction
    preprocessed_image = preprocessing(image) 
    cells = cell_extraction(preprocessed_image)

    # Table values
    values = np.empty((cells.shape[0]-1, cells.shape[1]-2), dtype=object)

    # Loop on every cell
    for r in range(1, cells.shape[0]): # Skip columns 1,2 due to them having names
        values[r-1, 0] = read_cell(cells[r, 0], ocr="id") # use OCR for IDs on first column
        for c in range(3, cells.shape[1]):
            cell_image = cells[r, c]
            values[r-1, c-2] = read_cell(cell_image, ocr="None")

    # Export values to excel file
    export_excel(values, export_filename, column_names = 1)

omar wael
