In [46]:
from commonfunctions import *
import numpy as np
import cv2
import math
from scipy import ndimage
import math
from skimage.filters import threshold_otsu, threshold_local
from skimage.morphology import binary_erosion, binary_dilation
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [47]:
def show(img, factor=1,name="image"):
    """ show an image until the escape key is pressed
    :param factor: scale factor (default 0.5, half size)
    """
    if factor != 1.0:
        img = cv2.resize(img, (0,0), fx=factor, fy=factor) 

    cv2.imshow(name,img)
    while(1):
        k = cv2.waitKey(0)
        if k==27:    # Esc key to stop
            break
    cv2.destroyAllWindows()

In [48]:
def gaussian(img,size=5):
    
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.GaussianBlur(img,(size,size),0)
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
    else:
        img = cv.GaussianBlur(img,(size,size),0)
    
    
    return img

In [49]:
def rotate_img(img):

    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray = cv2.bitwise_not(img)
        thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
        coords = np.column_stack(np.where(thresh > 0))
        angle = cv2.minAreaRect(coords)[-1]
        if angle < -45:
            angle = -(90 + angle)
        else:
            angle = -angle
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
        (h, w) = img.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        img = cv2.warpAffine(img, M, (w, h),flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    else:
        gray = cv2.bitwise_not(img)
        thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
        coords = np.column_stack(np.where(thresh > 0))
        angle = cv2.minAreaRect(coords)[-1]
        if angle < -45:
            angle = -(90 + angle)
        else:
            angle = -angle
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
        (h, w) = img.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        img = cv2.warpAffine(img, M, (w, h),flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return img

In [50]:
def binarize(img,block_size=25):
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,block_size,2)
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
    else:
        img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,block_size,2)
    return img

In [51]:
def morphology(img,kernel_size=5):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(kernel_size,kernel_size))
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        img = closing
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)      
    else:
        opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        img = closing
    return img

In [52]:
def get_corners(img):
    pass

In [53]:
def preprocess_img(img,gaussian_kernel=3,morph_kernel=2,show_steps=0,img_size=1):
    # Fix prespective
    #get_corners(img)

    # Gaussian
    img = gaussian(img,gaussian_kernel)
    if(show_steps !=0 ):
        show(img,img_size,"Gaussian Filtered")
        
    # local thresh
    img = binarize(img)
    if(show_steps !=0 ):
        show(img,img_size,"Binarized")        

    # rotate
    img = rotate_img(img)
    if(show_steps !=0 ):
        show(img,img_size,"Rotated")

    # morphology to remove noise
    img = morphology(img,morph_kernel)
    if(show_steps !=0 ):
        show(img,img_size,"Morphology")

    return img

In [54]:
def segment_img(img):
    if(len(img.shape)>2):
        pic = plt.imread('/home/kamel/Desktop/Image/Project/OMR_IP/input/16.jpg')/255
        pic_n = pic.reshape(pic.shape[0]*pic.shape[1], pic.shape[2])
        pic_n.shape
        from skimage import KMeans
        kmeans = KMeans(n_clusters=5, random_state=0).fit(pic_n)
        pic2show = kmeans.cluster_centers_[kmeans.labels_]
        cluster_pic = pic2show.reshape(pic.shape[0], pic.shape[1], pic.shape[2])
        plt.imshow(cluster_pic)
    else:
        photo = np.copy(img)
        samples = photo.reshape((-1,3))
        samples = np.float32(samples)
        ct = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
        compactness, labels, centers = cv2.kmeans(samples,10,ct,10,cv2.KMEANS_RANDOM_CENTERS)
        centers = np.uint8(centers)
        temp = centers[labels.flatten()]
        img = temp.reshape((photo.shape))
    return img

In [55]:
def remove_lines(img,lines):
    test_value = 100
    pixels_tested = 4
    for line in lines:
        for i in range(2,img.shape[1]-2):
            sum_up = 0
            sum_down = 0
            for j in range(pixels_tested):
                sum_up += img[line-j-1][i]
                sum_down += img[line+j][i]
            if(sum_up > test_value and sum_down > test_value):
                img[line][i] = 255
                for j in range(pixels_tested):
                    img[line-j][i] = 255
                    img[line+j][i] = 255
            elif (sum_up > test_value) :
                
                img[line][i] = 255
                for j in range(pixels_tested):
                    img[line-j][i] = 255
            elif sum_down > test_value:
                img[line][i] = 255
                for j in range(pixels_tested):
                    img[line+j][i] = 255
    return img

In [56]:
def get_staffs(img):
    if(len(img.shape)>2):
        img = rgb2gray(img)
    proj = np.sum(img,axis=1).astype(int)
    
    max_line = np.amax(proj)
    staffs = proj == (max_line)
    m = np.max(proj)
    w = 500
    result = np.zeros((proj.shape[0],500))

    # Draw a line for each row
    for row in range(img.shape[0]):
        cv2.line(result, (0,row), (int(proj[row]*w/m),row), (255,255,255), 1)
    show(result)
    return np.argsort(proj)

In [57]:
def get_notes(img):
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(img)
    thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    proj = np.sum(thresh,axis=0)
    avg = np.mean(proj)
    start = 0
    end = 0
    started = False
    segments = []
    for i in range(len(proj)):
        if proj[i] > int(avg/2) and started == False:
            started = True
        if proj[i] < int(avg/2) and started == True:
            started = False
            end = i
            segments.append([start,end])
            start = i

    return segments

In [58]:
def identify_lines(img):
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    staffs = get_staffs(img)
    lines = []
    for i in range(len(staffs)):
        is_line = True
        for j in range(len(lines)):
            if(abs(staffs[i] - lines[j]) < 10):
                is_line = False
        if(is_line and staffs[i] - 4 > 0 and staffs[i] + 4 < img.shape[0]):
            lines.append(staffs[i])
        if(len(lines) == 5):
            break
#     for i in range(len(lines)):
#         img[lines[i]-3:lines[i]+3,:] = 0
    return img,lines

In [61]:
img = cv2.imread('/home/kamel/Desktop/Image/Project/OMR_IP/input/01.PNG') 
img = preprocess_img(img,gaussian_kernel=1,show_steps=1,img_size=1)

img,lines = identify_lines(img)
show(img,1,"Bolded Lines")

img = remove_lines(img,lines)
show(img,1,"Removed Lines")
segments = get_notes(img)
for i in range(len(segments)):
    show(img[:,segments[i][0]:segments[i][1]],1,"Segment")