In [1]:
from commonfunctions import *
import numpy as np
import cv2
import math
from scipy import ndimage
import math
import imutils
from skimage.filters import threshold_otsu, threshold_local
from skimage.morphology import binary_erosion, binary_dilation
%matplotlib inline
%load_ext autoreload
%autoreload 2

## -----------------------------------
# Auxilary Functions
## -----------------------------------

In [2]:
def digital_or_not(input_image):
    
    if '.png' in input_image.lower():
        return  1
    else:
        return  0

In [3]:
def show(img, factor=1,name="image"):
    """ 
    show an image until the escape key is pressed
    :param factor: scale factor (default 1, half size)
    """
    if factor != 1.0:
        img = cv2.resize(img, (0,0), fx=factor, fy=factor) 

    cv2.imshow(name,img)
    while(1):
        k = cv2.waitKey(0)
        if k==27:    # Esc key to stop
            break
    cv2.destroyAllWindows()

## -----------------------------------
# Image Preprocessing
## -----------------------------------

In [4]:
def gaussian(img,gaussian_kernel=5):
    
    if(len(img.shape)>2):
        img = cv2.GaussianBlur(img,(gaussian_kernel,gaussian_kernel),0)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        mid = 0.5
        mean = np.mean(img)
        gamma = math.log(mid*255)/math.log(mean)
        img = np.power(img, gamma).clip(0,255).astype(np.uint8)
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
    else:
        img = cv2.GaussianBlur(img,(gaussian_kernel,gaussian_kernel),0)
    
    
    return img

In [5]:

def rotate_img(img):
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    height, width = img.shape
    im_gs = img
    max_skew = max(img.shape[0],img.shape[1])
    # Create a grayscale image and denoise it
    im_gs = cv2.fastNlMeansDenoising(im_gs, h=3)

    # Create an inverted B&W copy using Otsu (automatic) thresholding
    im_bw = cv2.threshold(im_gs, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

    # Detect lines in this image. Parameters here mostly arrived at by trial and error.
    lines = cv2.HoughLinesP(
        im_bw, 1, np.pi / 180, 200, minLineLength=width / 12, maxLineGap=width / 150
    )

    # Collect the angles of these lines (in radians)
    angles = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        angles.append(np.arctan2(y2 - y1, x2 - x1))

    # If the majority of our lines are vertical, this is probably a landscape image
    landscape = np.sum([abs(angle) > np.pi / 4 for angle in angles]) > len(angles) / 2

    # Filter the angles to remove outliers based on max_skew
    if landscape:
        angles = [
            angle
            for angle in angles
            if np.deg2rad(90 - max_skew) < abs(angle) < np.deg2rad(90 + max_skew)
        ]
    else:
        angles = [angle for angle in angles if abs(angle) < np.deg2rad(max_skew)]

    if len(angles) < 5:
        # Insufficient data to deskew
        return img

    # Average the angles to a degree offset
    angle_deg = np.rad2deg(np.median(angles))

    # If this is landscape image, rotate the entire canvas appropriately
    if landscape:
        if angle_deg < 0:
            img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            angle_deg += 90
        elif angle_deg > 0:
            img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
            angle_deg -= 90

    # Rotate the image by the residual offset
    M = cv2.getRotationMatrix2D((width / 2, height / 2), angle_deg, 1)
    img = cv2.warpAffine(img, M, (width, height), borderMode=cv2.BORDER_REPLICATE)

    return img

In [6]:
def binarize(img,block_size=25,neighbours=7):
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,block_size,neighbours)
    else:
        img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,block_size,neighbours)
    return img

In [7]:
def morphology(img,kernel_size=2):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(kernel_size,kernel_size))
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        img = closing
        img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)      
    else:
        opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        img = closing
    return img

In [8]:
def preprocess_img(img,show_steps=0,show_size=1,digital=1):
    
    if(digital != 1):
        scale_percent = 50
        width = int(img.shape[1] * scale_percent / 100)
        height = int(img.shape[0] * scale_percent / 100)
        dim = (width, height)
        img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) 
        gaussian_kernel = 5
        block_size = 27
        neighbours = 3
    else:
        gaussian_kernel = 1
        block_size = 65
        neighbours = 1
        
    # rotate
    img = rotate_img(img)
    if(show_steps !=0 ):
        show(img,show_size,"Rotated")
    # Gaussian
    img = gaussian(img,gaussian_kernel)
    if(show_steps !=0 ):
        show(img,show_size,"Gaussian Filtered")
        
    # local thresh
    img = binarize(img,block_size,neighbours)
    if(show_steps !=0 ):
        show(img,show_size,"Binarized")

    # morphology to remove noise
    img = morphology(img,2)
    if(show_steps !=0 ):
        show(img,show_size,"Morphology")

    return img

## -----------------------------------
# Staff Removal
## -----------------------------------

In [9]:
def run_length_encoding(img):
        
    rows = img.shape[0]
    cols = img.shape[1]

    black_runs = []
    white_runs = []
    for i in range(cols):        
        black_run = 0
        white_run = 0

        for j in range(rows):
            if (img[j][i] == 255 and black_run == 0):
                white_run += 1
            if (img[j][i] == 0 and white_run != 0):
                white_runs.append(white_run)
                white_run = 0
            if (img[j][i] == 0 and white_run == 0):
                black_run += 1
            if (img[j][i] == 255 and black_run != 0):
                black_runs.append(black_run)
                black_run = 0

    return white_runs,black_runs

In [10]:
def get_common_run(rle):
    
    counter = np.zeros(5000)
    for i in range(len(rle)):
        counter[rle[i]] += 1
        
    return np.argmax(counter)


In [11]:
def remove_lines_non_digital(img,staff_thickness,staff_space):
    rows = img.shape[0]
    cols = img.shape[1]
    
    for i in range(cols):
        j = 0
        while j < rows:
            if img[j][i] == 0 and j + staff_thickness < rows:
                if np.sum(img[j:j+staff_thickness,i]) != 0:
                    img[j:j+staff_thickness,i] = 255
                    j +=  staff_thickness -1
            j+=1
    return img

In [12]:
def remove_staff_lines(img,show_steps=1,show_size=1,digital=1):

    white_rle,black_rle = run_length_encoding(img)
    staff_thickness = get_common_run(black_rle) + 2
    staff_space = get_common_run(white_rle) 

    img = remove_lines_non_digital(img,staff_thickness,staff_space)
    if show_steps == 1 :
        show(img,show_size,"Removed Lines")
        
    return img,staff_space

## -----------------------------------
# Segmentation Part
## -----------------------------------

In [13]:
def get_notes(img):
    """ 
    Segements the notes
    :param
    img: the img to extract notes from
    """

    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(img)
    thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    proj = np.sum(thresh,axis=0)
    avg = np.mean(proj)
    start = 0
    end = 0
    started = False
    segments = []
    for i in range(len(proj)):
        if proj[i] > int(avg/10) and started == False:
            started = True
        if proj[i] < int(avg/10) and started == True:
            started = False
            end = i
            segments.append([start,end])
            start = i

    return segments

In [14]:
def segment_img(img,show_steps=1,show_size=1):
    segments = get_notes(img)
    if show_steps == 1 :
        for i in range(len(segments)):
            show(img[:,segments[i][0]:segments[i][1]],show_size,"Segment")
    return segments

## -----------------------------------
# Main Code
## -----------------------------------

In [41]:
input_image = '/media/madmax/MadMax_D/CUFE/Fall_2021/CMPN446_ImageProcessing_and_ComputerVision/Project/OMR_IP/input/29.jpg'
img = cv2.imread(input_image) 

digital = digital_or_not(input_image)
show_steps = 1
show_size = 1


img = preprocess_img(img,show_steps,show_size,digital)
img,staff_space = remove_staff_lines(img,show_steps,show_size)
segments = segment_img(img,show_steps,show_size)
print(staff_space)



17


In [38]:
test = img[:,segments[6][0]:segments[6][1]]
vertical =np.copy(test)
show(test,show_size,"Segment")
# print(test)
# plt.hist(test.ravel(),256,[0,256]); plt.show()
print(4*staff_space)
for colum in range(test.shape[1]):
    number_black_pixel=0
    for row in range(test.shape[0]):
        if test[row][colum] == 0:
            number_black_pixel+=1
#     print(number_black_pixel)
    if number_black_pixel > 4*staff_space:
        print("it's note")
        break

64


## -----------------------------------
# Not Needed
## -----------------------------------

In [None]:
def get_horizontal_projection(img,show_steps=1,show_size=1):
    if(len(img.shape)>2):
        img = rgb2gray(img)
    proj = np.sum(img,axis=1).astype(int)
    
    max_line = np.amax(proj)
    staffs = proj == (max_line)
    m = np.max(proj)
    w = 500
    result = np.zeros((proj.shape[0],500))

    if show_steps == 1 :
        for row in range(img.shape[0]):
            cv2.line(result, (0,row), (int(proj[row]*w/m),row), (255,255,255), 1)
        show(result,show_size,"Horizontal Projection")
    return np.argsort(proj)

In [None]:
def identify_lines(img,show_steps=1,show_size=1):
    """ 
    Gets horizontal projection and extracts five lines from it
    :param
    show_steps: wether show steps or not 0 dont show , 1 show
    show_size : the size of the shown image a value to factor in x and y
    """
    if(len(img.shape)>2):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    staffs = get_horizontal_projection(img,show_steps,show_size)
    lines = []
    for i in range(len(staffs)):
        is_line = True
        for j in range(len(lines)):
            if(abs(staffs[i] - lines[j]) < int(img.shape[0]/30)):
                is_line = False
        if(is_line and staffs[i] - 6 > 0 and staffs[i] + 6 < img.shape[0]):
            lines.append(staffs[i])
        if(len(lines) == 5):
            break

    return img,lines

In [None]:
def remove_lines_digital(img,lines):
    test_value = 100
    pixels_tested = 6
    for line in lines:
        for i in range(2,img.shape[1]-2):
            sum_up = 0
            sum_down = 0
            for j in range(pixels_tested):
                sum_up += img[line-j-1][i]
                sum_down += img[line+j][i]
            if(sum_up > test_value and sum_down > test_value):
                img[line][i] = 255
                for j in range(pixels_tested):
                    img[line-j][i] = 255
                    img[line+j][i] = 255
            elif (sum_up > test_value) :
                
                img[line][i] = 255
                for j in range(pixels_tested):
                    img[line-j][i] = 255
            elif sum_down > test_value:
                img[line][i] = 255
                for j in range(pixels_tested):
                    img[line+j][i] = 255
    kernel = np.ones((3,3),np.uint8)
    img = cv2.erode(img,kernel,iterations = 1)
    return img