In [None]:
from commonfunctions import *
%matplotlib inline
%load_ext autoreload
%autoreload 2

import cv2 
import imutils
from skimage import exposure
import glob
import matplotlib.pyplot as plt
from skimage.transform import (hough_line, hough_line_peaks)
import math 
import xlwt 
from imutils.perspective import four_point_transform
from imutils import contours


#pip install imutils
#pip install opencv
#pip install pytesseract

In [None]:
# SEGMENTATION FUNCTIONS ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

def thresholding(img):
    
    thresh = threshold_otsu(img)
    
    binary = np.copy(img)
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            if img[i,j] < thresh:
                binary[i,j] = 0
            else:
                binary[i,j] = 255
    return binary



#########################################################################################################    
    
# load the image, convert it to grayscale, blur it
# slightly, then find edges
def order_points(pts):
    # initialzie a list of coordinates that will be ordered
    # such that the first entry in the list is the top-left,
    # the second entry is the top-right, the third is the
    # bottom-right, and the fourth is the bottom-left


    pts = np.asarray(pts)
    rect = np.zeros((4, 2), dtype = "float32")
    
 
    # the top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis = 1)

    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
 
    # now, compute the difference between the points, the
    # top-right point will have the smallest difference,
    # whereas the bottom-left will have the largest difference
    diff = np.diff(pts, axis = 1)

    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
 
    # return the ordered coordinates
    return rect

#################################################################################################################


def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them individually
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
 
    # compute the width of the new image, which will be the
    # maximum distance between bottom-right and bottom-left
    # x-coordiates or the top-right and top-left x-coordinates
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
 
    # compute the height of the new image, which will be the
    # maximum distance between the top-right and bottom-right
    # y-coordinates or the top-left and bottom-left y-coordinates
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
 
    # now that we have the dimensions of the new image, construct
    # the set of destination points to obtain a "birds eye view",
    # (i.e. top-down view) of the image, again specifying points
    # in the top-left, top-right, bottom-right, and bottom-left
    # order
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")
 
    # compute the perspective transform matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
 
    # return the warped image
    return warped

#################################################################################################################

def add_border(img):

    i= np.copy(img)
    i[0:5,:] = 0
    i[:,0:5] = 0
    i[i.shape[0]-5:i.shape[0],:] = 0
    i[:,i.shape[1]-5:i.shape[1]] = 0

    return i

#################################################################################################################

def remove_border(img):    
    x = int(0.05 *  img.shape[0])
    y = int(0.05 * img.shape[1])
    i = img[ x: img.shape[0]-x   ,   y: img.shape[1]-y    ]
    return i
    
#################################################################################################################
#################################################################################################################


In [None]:
#READ AND EXTRACT PAPER FROM IMAGE :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################


image = cv2.imread("images/t6.jpg", cv2.IMREAD_COLOR)


gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 75, 200)


show_images([edged],[''])

# find contours in the edge map, then initialize
# the contour that corresponds to the document
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
    cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
docCnt = None

mean=0;

# ensure that at least one contour was found
if len(cnts) > 0:
    # sort the contours according to their size in
    # descending order
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
    # loop over the sorted contours
    for c in cnts:
        # approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.01 * peri, True)
        
        # if our approximated contour has four points,
        # then we can assume we have found the paper
        if len(approx) == 4:
            x, y, w, h = cv2.boundingRect(c)
            docCnt = approx
            break

docCnt = docCnt[:,0]
warpedx = four_point_transform (image, docCnt) 
if warpedx.shape[0] > warpedx.shape[1]:
    warpedx = imutils.rotate_bound(warpedx, 90) 
gray = cv2.cvtColor(warpedx, cv2.COLOR_BGR2GRAY)
threshold = thresholding(gray)
show_images([warpedx,gray,threshold],['',''])


In [None]:
#EXTRACT  THE TABLE :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 
#################################################################################################################

img2 = warpedx.copy()

img3 = img2.copy()
img = img3.copy()

contours,_=cv2.findContours(threshold.astype(np.uint8), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 


# ensure that at least one contour was found
if len(contours) > 0:
    # sort the contours according to their size in
    # descending order
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    # loop over the sorted contours

i = 0 
for c in contours : 
    area = cv2.contourArea(c) 

    # Shortlisting the regions based on there area. 
    if area > 500:  
        approx = cv2.approxPolyDP(c,0.01 * cv2.arcLength(c, True), True) 
        (x, y, w, h) = cv2.boundingRect(c)

        print("contour " ,i, "= ", area)
        i+=1
        show_images([img2[ y : y+h , x : x+w   ]],[''])
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.01 * peri, True)
        
        if  i == 2 :
            docCnt = approx
            #warpedx = img2[ y : y+h , x : x+w   ]
            break

            

docCnt = docCnt[:,0]



warpedx =  four_point_transform (img2, docCnt) 
#threshold = thresholding(warped)
show_images([warpedx],[''])


In [None]:
#EXTRACT STUDENT DETAILS::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

img2 = np.copy(warpedx)
imgAA = np.copy(warpedx)



gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray = add_border(gray)
threshold = thresholding(gray)



contours,_=cv2.findContours(threshold.astype(np.uint8), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 


arr = np.zeros(len(contours))
i= 0 
for cnt in contours : 
    area = cv2.contourArea(cnt) 
    arr[i] = area
    i+=1
    
mean = np.mean(arr)
print("mean = ",mean)


arr = []
for cnt in contours : 
    area = cv2.contourArea(cnt) 
    if area > mean:
        arr.append(area)
        
arr = np.asarray(arr)
mean2 = np.mean(arr)
print("mean2 = ",mean2)    

maxi = 0
grades = None       
imageboxes = []
i = 0 
graded = False
for cnt in contours : 
    area = cv2.contourArea(cnt) 

    # Shortlisting the regions based on there area. 
    if  area>mean2 and area<(img2.shape[1] * img2.shape[0])/2:  
        approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True) 
        (x, y, w, h) = cv2.boundingRect(cnt)
        # Checking if the no. of sides of the selected region is 7
        #and check_contour(img[ y : y+h , x : x+w   ]) 
        #if(len(approx)==4):
        #warped = four_point_transform (img2, approx) 
        imageboxes.append(img2[ y : y+h , x : x+w  ])
        print("contour " ,i, "= ", area)
        i+=1
        new  = gray[ y : y+h , x : x+w   ]
        #new = thresholding(gray[ y : y+h , x : x+w   ])
        #new  = add_border(new)
        show_images([new],[''])
        #cv2.drawContours(imgAA, [cnt], 0, (0,255,0), 3)
        #show_images([imgAA],[''])
        
        if x+w > maxi:
            maxi = x+w


print((img2.shape[1] * img2.shape[0])/2)
            
grades = img2[:,maxi-10:img2.shape[1]]
            
show_images([grades],[''])

In [None]:
#EXTRACT IDS :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

try:
    from PIL import Image
except ImportError:
    import Image
import pytesseract




pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'




def ocr_core(image,mode = 0): # 0 = english , else = arabic
    """
    This function will handle the core OCR processing of images.
    """
    img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    img = remove_border(img)
    img = thresholding(img)
    if mode  == 0:
        text = pytesseract.image_to_string(img)    
    else:
        text = pytesseract.image_to_string(img,lang='ara')
    return text


IDs = []
IDs_text = []

for i in range(0 , len(imageboxes)-3,3):
    print("********** ",i," **********")
    
    index = None
    area1 = imageboxes[i].shape[0] * imageboxes[i].shape[1]
    area2 = imageboxes[i+1].shape[0] * imageboxes[i+1].shape[1]
    area3 = imageboxes[i+2].shape[0] * imageboxes[i+2].shape[1]
    if area1 < area2:
        if area1 < area3:
            index = i
        else:
            index = i+2
    else:
        if area2 < area3:
            index = i+1
        else:
            index = i+2

    show_images([imageboxes[index]],[''])
    IDs.append(imageboxes[index])
    IDs_text.append(ocr_core(imageboxes[index],0))
    print('ID => ',ocr_core(imageboxes[index],0))
    
print(IDs_text)

In [None]:
#GET GRADES CELLS COUNT :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################
    

img2 = np.copy(grades)
imgAA = np.copy(grades)

gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)


threshold = thresholding(gray)

show_images([gray],[''])

contours,_=cv2.findContours(threshold.astype(np.uint8), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 


counter = 0
for cnt in contours : 
    area = cv2.contourArea(cnt) 
    if  area<mean2 and area>mean:  
        counter += 1

print(counter)


In [None]:
#Arrange Contours & EXTRACT THEM::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

number_of_students = len(IDs_text)
number_of_cls_for_each = counter


N = int(number_of_cls_for_each/(number_of_students+1))

img2 = np.copy(grades)
imgAA = np.copy(grades)

gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)


threshold = thresholding(gray)



contours,_=cv2.findContours(threshold.astype(np.uint8), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 

#Sorting Contours
counter = 0 
index = 0
pre = 0
for cnt in contours : 
    area = cv2.contourArea(cnt)

    # Shortlisting the regions based on there area. 
    if  area<mean2 and area>mean:  
        counter +=1
        
        if counter == N:
            contours[pre : index+1] = sorted(contours[pre : index+1] , key=lambda ctr: cv2.boundingRect(ctr)[0])
            counter = 0
            pre = index

    index +=1        
        



imageboxes = []
i = 0 
for cnt in contours : 
    area = cv2.contourArea(cnt) 

    # Shortlisting the regions based on there area. 
    if  area<mean2 and area>mean:  
        approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True) 
        (x, y, w, h) = cv2.boundingRect(cnt)

        imageboxes.append(img2[ y : y+h , x : x+w  ])
        print("contour " ,i, "= ", area)
        i+=1
        new  = gray[ y : y+h , x : x+w   ]
        new = thresholding(gray[ y : y+h , x : x+w   ])
        new  = remove_border(new)
        show_images([new],[''])
        cv2.drawContours(imgAA, [cnt], 0, (0,255,0), 3)
        show_images([imgAA],[''])



In [None]:
#DISTRIBUTE DATA OVER STUDENTS::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

number_of_students = len(IDs_text)
number_of_cls_for_each = len(imageboxes)
N = int(number_of_cls_for_each/(number_of_students+1))
print(number_of_students,number_of_cls_for_each,N)


IDs_text.reverse()


distributed_grades = imageboxes[:number_of_cls_for_each-N]


final_grades=[]
for i in range(0,len(distributed_grades),N):
    student = distributed_grades[i:i+N] 
    final_grades.append(student)

final_grades.reverse()



In [None]:
#CLASSIFIER FUNCTIONS::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################


def check_empty(img):

    new = np.copy(img)
    new = get_Negative(new)
    sumx = np.sum(new, axis=0)
    sumx = np.sum(sumx)
    #if sumx == img.shape[0]*img.shape[1]* 255:
    if sumx == 0:
        return True
    else:
        return False

#################################################################################################################
def check_question(img):
    valid , counter  = check_horizontal(img)

    if valid == True and counter == 2:
        new = get_Negative(img)
        histo = np.sum(new, axis=1)
        first = False
        second = False
        distance1 = 0
        distance2 = 0

    
        for i in range(len(histo)):
        
            if first == False and histo[i] != 0 :
                first = True
                distance1 += 1
            if first == True and second == False and histo[i] != 0 :
                distance1 += 1
                
            if first == True and histo[i] == 0 :
                second = True
        
            if second == True and histo[i] != 0:
                distance2 +=1
                
        if distance1 *0.5 > distance2 :     
            return True
        else:
            return False
    
    else:
        return False
    
    
        
#################################################################################################################


def get_Negative(img):
    new = np.copy(img)
    for x in range(new.shape[0]):
        for y in range(new.shape[1]):
            new[x,y]= 255 -  new[x,y]
    return new

#################################################################################################################


def check_vertical(img):
    
    new = get_Negative(img)
    histo = np.sum(new, axis=0)

    
    first = False
    second = False
    counter = 0
    
    valid = False
    
    for i in range(len(histo)):
        
        if first == False and histo[i] != 0 :
            first = True
            counter += 1
        
        if first == True and histo[i] == 0 :
            second = True
        
        if second == True and histo[i] != 0 and histo[i-1] == 0:
            valid = True
            counter +=1
    

    return valid , counter


##################################################################################################################

def check_horizontal(img):

    new = get_Negative(img)
    histo = np.sum(new, axis=1)

    
    first = False
    second = False
    counter = 0
    
    valid = False
    
    for i in range(len(histo)):
        
        if first == False and histo[i] != 0 :
            first = True
            counter += 1
        
        if first == True and histo[i] == 0 :
            second = True
        
        if second == True and histo[i] != 0 and histo[i-1] == 0:
            valid = True
            counter +=1
    

    return valid , counter



#################################################################################################################
def intTryParse(value):
    try:
        return int(value), True
    except ValueError:
        return value, False

    
def check_Number(img):
    val , is_int = intTryParse( ocr_Numbers(img) )
    return val , is_int



def ocr_Numbers(img): 
    text = pytesseract.image_to_string(img)
    print(text)
    return text



In [None]:
#TRAINING CLASSIFIER SECTION:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

def preprocess(img):
    gray =  cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gaussian = cv2.blur(gray,(3,3))
    ret_val,binary_image = cv2.threshold(gaussian,50,255,cv2.THRESH_BINARY)
    return binary_image


def findContourArea(img):
    contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    area = cv2.contourArea(contours[1])    
    return area, contours

def findBoundingRectangleArea(img, contours):

    x, y, w, h = cv2.boundingRect(contours[1])

    area = w * h
    return area



def findBoundingTriangleArea(img, contours):

    x = cv2.minEnclosingTriangle(contours[1])
    area = x[0]
    return area




def extract_features(img, class_number=None):
    area, contours = findContourArea(img)
    area1 = findBoundingRectangleArea(img, contours)
    area3 = findBoundingTriangleArea(img, contours)
    features = [class_number,area/area1, area/area3]
    return features 




training_data = []
training_data_rec = []
training_data_circle = []
training_data_tri = []

for filename in sorted(glob.glob('tr_images/rectangle/*.png')):
    img = cv2.imread(filename)
    img = preprocess(img)
    img_features  = extract_features(img, 1)
    training_data.append(img_features)
    training_data_rec.append(img_features)
for filename in sorted(glob.glob('tr_images/correct/*.png')):
    img = cv2.imread(filename)
    img = preprocess(img)
    img_features = extract_features(img, 2)
    training_data.append(img_features)
    training_data_circle.append(img_features)
for filename in sorted(glob.glob('tr_images/dash/*.png')):
    img = cv2.imread(filename)
    img = preprocess(img)
    img_features = extract_features(img, 3)
    training_data.append(img_features)
    training_data_tri.append(img_features)

training_data = np.asarray(training_data)
training_data_rec = np.asarray(training_data_rec)
training_data_circle = np.asarray(training_data_circle)
training_data_tri = np.asarray(training_data_tri)


def calculateDistance(x1, x2):

    distance = math.sqrt((x1[0]-x2[0])**2  + (x1[1]-x2[1])**2  )
    return distance




def KNN(test_point, training_points, k):
    distance=[] 
    for i in range(training_points.shape[0]):
        dis = calculateDistance( (test_point[1],test_point[2]),(training_points[i,1],training_points[i,2]))
        distance.append((dis,training_points[i,0]))
       
    distance = sorted(distance)[:k] 

    freq1 = 0  
    freq2 = 0 
    freq3 = 0 
    
    for d in distance: 
        if d[1] == 1: 
            freq1 += 1
        elif d[1] == 2: 
            freq2 += 1
        elif d[1] == 3: 
            
            freq3 += 1
    maxi = max(freq1,freq2,freq3)

    if(freq1 == maxi):
        classification = 1
    elif(freq2 == maxi):
        classification = 2
    elif(freq3 == maxi):
        classification = 3

    return classification


def check_3sym(img ,training_data):

    test_point = extract_features(img)

    prediction = KNN(test_point, training_data, 3)
    return prediction



In [None]:
'''#FOR TESTING CLASSIFIER:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;
#################################################################################################################

x = final_grades[13][1]
test =cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
test = cv2.blur(test,(3,3))
test = thresholding(test)
test  = remove_border(test)


show_images([test],[''])




plt.style.use('seaborn-whitegrid')
x_point = extract_features(test)

print(check_3sym(test,training_data))

plt.scatter(training_data_rec[:, 1], training_data_rec[:, 2], marker='o')
plt.plot(training_data_circle[:, 1], training_data_circle[:, 2], 'o', color='black')
plt.plot(training_data_tri[:, 1], training_data_tri[:, 2], 'o', color='red')

plt.plot(x_point[ 1], x_point[ 2], 'o', color='green')


'''

In [None]:
# GENERAL CLASSIFIER FUNCTION:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

def classify_all(img):
    
    x =check_empty(img) 
    if x == True:
        return ''
    
    if check_question(img):
        return '?'
    
    T , c = check_vertical(img)
    if T == True:
        return c
    
    T , c = check_horizontal(img)
    if T == True:
        return 5 - c
    
    else:
        v = check_3sym(img,training_data)
        if v == 1:
            return 0
        if v == 2:
            return 5
        if v == 3:
            return 0
        
     
    

In [None]:
#WRITE TO EXCEL SHEET ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#################################################################################################################

def remove_border(img):    
    x = int(0.05 *  img.shape[0])
    y = int(0.05 * img.shape[1])
    i = img[ x: img.shape[0]-x   ,   y: img.shape[1]-y    ]
    return i

workbook = xlwt.Workbook()  
  
sheet = workbook.add_sheet("GRADES") 
  

style1 = xlwt.easyxf('pattern: pattern solid, fore_colour red;')



FINALS = []
for i in range(number_of_students) :
    Student_grades = []
    sheet.write(i, 0, IDs_text[i])
    for j in range(N):
        
        x = final_grades[i][j]
        test =cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
        test = thresholding(test)
        test  = remove_border(test)

        try:
            val = classify_all(test)
            if val == '?':
                sheet.write(i,j+1 , '', style1)
            else:
                sheet.write(i, j+1, val)
        except:
            val = ''
                
        Student_grades.append(val)
    FINALS.append(Student_grades)
    

    
workbook.save("sample.xls") 