In [1]:
from commonfunctions import *
import cv2 as cv2
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
def load_image(file_path):
    image = io.imread(file_path)
    return image

In [None]:
from skimage.transform import hough_line, hough_line_peaks,probabilistic_hough_line, rotate
from skimage.transform import ProjectiveTransform, warp
from skimage.measure import find_contours

def order_points(pts): # take 4 points and orders them as follows:top left,top right,bottom left,bottom right
    rectangle=np.zeros((4,2), dtype= np.float32 )
    
    s=pts.sum(axis=1) # the top left 3andaha smallest x+y w bottom rught akbar
    rectangle[0]=pts[np.argmin(s)]
    rectangle[2]=pts[np.argmax(s)]
    
    difference=np.diff(pts,axis=1) # top right 3andaha smallest y-x w bottom right akbar
    rectangle[1]=pts[np.argmin(difference)]
    rectangle[3]=pts[np.argmax(difference)]
    
    return rectangle

#takes 4 corners of paper and then warps them into a perfect rectangular zy akenak bt3ml scan l war2a on camscanner
def four_point(image,pts):
    rectangle=order_points(pts)
    
    top_left,top_right,bottom_right,bottom_left=rectangle
    
    bottom_edge_width=np.linalg.norm(bottom_right-bottom_left)
    top_edge_width=np.linalg.norm(top_right-top_left)
    maxwidth=int(max(bottom_edge_width,top_edge_width)) # 34an amna3 hetta tkoon cropped f ba5od el max

    right_edge_height=np.linalg.norm(top_right-bottom_right)
    left_edge_height=np.linalg.norm(top_left-bottom_left)
    maxheight=int(max(right_edge_height,left_edge_height))
    
    #ba7ot el 4 ordered points into a rectangle
    final_rectangle=np.array([[0,0],[maxwidth-1,0],[maxwidth-1,maxheight-1],[0,maxheight-1] ],dtype=np.float32 )
    #this produces a 3x3 homography matrix which encodes rotation,translation,scaling
    mapping=cv2.getPerspectiveTransform(rectangle,final_rectangle)
    #to apply warping for every pixel in the paper from the top till the bottom
    result=cv2.warpPerspective(image,mapping,(maxwidth,maxheight))
    
    return result

#it takes image and tries to find the 4 corner points to apply warping on it
def detect_document_contour(image):
    gray=rgb2gray(image)
    gray_blurred=gaussian(gray,sigma=1)
    edges = canny(gray_blurred, sigma=1, low_threshold=30/255, high_threshold=100/255).astype(np.uint8) * 255
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)#retr-external to retun the outer contour , msh 3ayez el table grid contour
    if not contours:
        return None

    contours = sorted(contours, key=cv2.contourArea, reverse=True) # 34an a sort el contour from largest(paper) to smallest

    for c in contours[:10]:
        peri = cv2.arcLength(c, True)
        for eps in [0.01, 0.02, 0.03, 0.04, 0.05]:
            approx = cv2.approxPolyDP(c, eps * peri, True)
            if len(approx) == 4:
                return approx.reshape(4, 2)

    # to get the best rectangle even if not perfect contour
    rect = cv2.minAreaRect(contours[0])
    box = cv2.boxPoints(rect)
    return box.astype(np.float32)
    

def deskew(image):
    result=np.zeros_like(image)
    edge=canny(image,sigma=1,low_threshold=10,high_threshold=70)
    lines=probabilistic_hough_line(edge,line_length=80,line_gap=5)
  
    if not lines:
        return image
    
    #Storing angles of lines detected
    angles=[]
    
    for(x0,y0),(x1,y1) in lines:
        delta_x=x1-x0
        delta_y=y1-y0
        ang=np.degrees(np.arctan2(delta_y,delta_x))
        
        while ang>90:
            ang-=180
           
        while ang<-90:
            ang+=180
            
        angles.append(ang)
        
    horizontal=[a for a in angles if abs(a)<45] #to keep the sllightly tilted rows 
    if len(horizontal)<3:
        return image  
        
    skew=float(np.median(horizontal)) #to avoid extreme outlier eno ybawaz el angles ely tal3a
    skew = float(np.clip(skew, -10, 10))  # prevent over-rotation

    rotated =rotate(image,angle=-skew,resize=False, preserve_range=True )
    return rotated 

            
def preprocessing(image):
    doc_cont=detect_document_contour(image)
    warped=four_point(image,doc_cont)
    
    image_gray = rgb2gray(warped)    
 
    image_deskewed=deskew(image_gray)
  
    return image_deskewed

In [4]:
def cell_extraction(image):
    image = image.astype(np.float32) / 255.0 

    H, W = image.shape
    diag = int(np.hypot(H, W))
    horizontal = np.zeros_like(image)
    vertical = np.zeros_like(image)
    result = np.zeros_like(image)
    gx = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    gy = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)

    gx_abs = np.abs(gx)
    gy_abs = np.abs(gy)

    gx_n = gx_abs / (np.percentile(gx_abs, 90) + 1e-6)
    gy_n = gy_abs / (np.percentile(gy_abs, 90) + 1e-6)

    mag_balanced = np.maximum(gx_n, gy_n)
    mag_balanced = np.clip(mag_balanced, 0, 1)
    
    edges = mag_balanced > 0.6 
    edges = edges.astype(np.uint8) * 255 

    acc, angles, dists = hough_line(edges) 
    acc, angles, dists = hough_line_peaks(acc, angles, dists, threshold=0.7 * np.max(acc),  
                                        min_distance = int(0.01*diag), num_peaks=40) 
    
    for i in range(len(angles)): 
        theta = abs(angles[i]) 
        if not (theta < np.radians(10) or theta > np.radians(80)): 
            continue 
        if theta < np.radians(45): 
            a = math.cos(angles[i]) 
            b = math.sin(angles[i]) 
            x0 = a * dists[i] 
            y0 = b * dists[i] 
            pt1 = (int(x0 + 10000*(-b)), int(y0 + 10000*(a))) 
            pt2 = (int(x0 - 10000*(-b)), int(y0 - 10000*(a))) 
            cv2.line(vertical, pt1, pt2, (255, 255, 255), 1) 
        elif theta > np.radians(45): 
            a = math.cos(angles[i]) 
            b = math.sin(angles[i]) 
            x0 = a * dists[i] 
            y0 = b * dists[i] 
            pt1 = (int(x0 + 10000*(-b)), int(y0 + 10000*(a))) 
            pt2 = (int(x0 - 10000*(-b)), int(y0 - 10000*(a))) 
            cv2.line(horizontal, pt1, pt2, (255, 255, 255), 1) 

    result = np.bitwise_and(horizontal > 0, vertical > 0) 

    points = np.argwhere(result == 1) 

    H, W = image.shape[:2] 
    y_tol = 0.01 * H 
    rows = cluster_rows(points, y_tol) 

    rows = [r for r in rows if len(r) > 2] 

    cells = [] 

    for i in range(len(rows) - 1): 
        top = rows[i] 
        bottom = rows[i + 1] 

        cols = min(len(top), len(bottom)) - 1 

        for j in range(cols): 
            tl = top[j] 
            tr = top[j + 1] 
            bl = bottom[j] 
            br = bottom[j + 1] 

            cells.append((tl, tr, br, bl)) 

    cells = [c for c in cells if valid_cell(*c)] 

    for tl, tr, br, bl in cells: 
        crop = image[tl[1]:br[1], tl[0]:br[0]] 

    vis = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) 

    for tl, tr, br, bl in cells: 
        cv2.polylines( 
            vis, 
            [np.array([tl, tr, br, bl])], 
            True, 
            (0,255,0), 
            1 
        ) 

    show_images([edges, result, vis], 
                titles=["Edges" , "Result" , "Detected Cells"],) 
    
    return result 

def cluster_rows(points, y_tol): 
    points = sorted(points, key=lambda p: p[1]) 
    rows = [] 

    for p in points: 
        if not rows or abs(p[1] - rows[-1][0][1]) > y_tol: 
            rows.append([p]) 
        else: 
            rows[-1].append(p) 

    # sort each row by x 
    for r in rows: 
        r.sort(key=lambda p: p[0]) 

    return rows 

def valid_cell(tl, tr, br, bl, min_area=100): 
    w = abs(tr[0] - tl[0]) 
    h = abs(bl[1] - tl[1]) 
    return w * h > min_area 

In [5]:
def read_cell(cell_image):
    image = 0

In [7]:
def export_excel(data, file_name,column_names=None):
    
    if len(data)==0:
        print( "Empty Data Array Is Provided" )
        
    num_columns=len(data[0])    
        
    for row in data:
        if len(row) !=num_columns:
            print( "Every student must have the same number of entries" )
            
    if column_names is not None:
        columns=["Student Code"]+[f"Grade_{i}"for i in range (1,num_columns)]     
           
    df = pd.DataFrame(data)
    df.to_excel(file_name, index=False)
    
    return file_name

data = [
    [ 2023001 , 85, 90, 78],
    [ 2023002 , 88, 92, 80],
    [ 2023003 , 70, 75, 68]
]

subjects = [ "Math" ,  "Physics" ,  "Programming" ]

export_excel(data,  "grades.xlsx" , subjects)


'grades.xlsx'

In [8]:
image = load_image( "Grades_Sheet_Dataset/c0747b4e-a551-4532-b97a-2155d4696cd6.jpg" )
preprocessed_image = preprocessing(image)
result = cell_extraction(preprocessed_image)
show_images([image, preprocessed_image], titles=[ "Original Image" ,  "Preprocessed Image" ])

NameError: name 'float32' is not defined