In [25]:
inputImageName = 'desk.jpg'

In [27]:
# This cell is code for image edge dectection and contour finding
# Correspoding to section 2.1 and 2.2 in report
# By Xuanbin Luo, xl2806@nyu.edu

import imutils
import cv2
import numpy as np

def order_corner_points(corners):
    corners = [(corner[0][0], corner[0][1]) for corner in corners]
    # top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]

    top = []
    for i in range(0, 4):
        mnt = 0
        for j in range(0, 4):
            if i == j:
                continue
            else:
                if corners[i][1] < corners[j][1]:
                    mnt += 1
        if mnt >= 2:
            top.append(corners[i])

    bottom = []
    for i in range(0, 4):
        mnt = 0
        for j in range(0, 4):
            if i == j:
                continue
            else:
                if corners[i][1] > corners[j][1]:
                    mnt += 1
        if mnt >= 2:
            bottom.append(corners[i])

    top_r, top_l, bottom_l, bottom_r = 0, 0, 0, 0
    if top[0][0] < top[1][0]:
        top_l = top[0]
        top_r = top[1]
    else:
        top_l = top[1]
        top_r = top[0]

    if bottom[0][0] < bottom[1][0]:
        bottom_l = bottom[0]
        bottom_r = bottom[1]
    else:
        bottom_l = bottom[1]
        bottom_r = bottom[0]

    print((top_l, top_r, bottom_r, bottom_l))
    return (top_l, top_r,bottom_r, bottom_l)



def perspective_transform(image, corners):

    ordered_corners = order_corner_points(corners)
    top_l, top_r, bottom_r, bottom_l = ordered_corners

    width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
    width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
    width = max(int(width_A), int(width_B))

    height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
    height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
    height = max(int(height_A), int(height_B))

    dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
                    [0, height - 1]], dtype = "float32")

    ordered_corners = np.array(ordered_corners, dtype="float32")

    matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)

    return cv2.warpPerspective(image, matrix, (width, height))

image = cv2.imread(inputImageName)
# image = imutils.resize(image, height=image.shape[0])
image = imutils.resize(image, height=2300)
realImage = image.copy()

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (3, 3), 0)
edged = cv2.Canny(gray, 30, 200)
edged = cv2.dilate(edged, None)
edged = cv2.erode(edged, None)

cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)
screenCnt = None

for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.015 * peri, True)

    if len(approx) == 4:
        screenCnt = approx
        transformed = perspective_transform(realImage, screenCnt)
        break

cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 3)
#cv2.imshow("image", realImage)
#cv2.imshow("Screen Rec", image)
#cv2.imshow("transformed", imutils.resize(transformed, height=image.shape[0]))
#cv2.waitKey(0)
transformed = imutils.resize(transformed, height=2300)
cv2.imwrite("transformed.png", transformed)

((497, 812), (2587, 822), (2636, 1899), (442, 1913))


True

In [28]:
# This cell is code for image enhancement
# Correspoding to section 2.3
# By Shuhan Zhang, sz2898@nyu.edu


# Change image for RGB to grayscale
grayimg = cv2.cvtColor(transformed, cv2.COLOR_RGB2GRAY)

# Gamma Correction
def adjust_gamma(image, gamma=1.0):
    table = []
    for i in range(256):
        table.append(((i / 255.0) ** gamma) * 255)

    table = np.array(table).astype("uint8")
    return cv2.LUT(image, table)

gatemp = adjust_gamma(grayimg, 2.2)
# cv2.imwrite("gatemp.png", gatemp)

# Image Sharpening
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
entemp = cv2.filter2D(gatemp, -1, kernel)
# cv2.imwrite("entemp.png", entemp)

# Image Binarization
threshtemp = cv2.adaptiveThreshold(entemp, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 15)
# cv2.imwrite("threshtemp.png", threshtemp)

# Noise Reduction
para = threshtemp.shape
width = para[1]
height = para[0]

threshtemp_inv = cv2.bitwise_not(threshtemp)
# cv2.imwrite("threshtemp_inv.png", threshtemp_inv)
# find connected components
num_labels, labels, status, centroids = cv2.connectedComponentsWithStats(threshtemp_inv, connectivity = 4)
empty = []
for i in range(num_labels):
    if status[i][4] <= 5:
        empty.append(i)

ccnew = threshtemp.copy()
for j in range(width):
    for i in range(height):
        if status[labels[i][j]][4] <= 5:
            ccnew[i][j] = 255

# cv2.imwrite("ccnew.png", ccnew)

In [29]:
# This cell is code for image segmentation
# Correspoding to section 2.4
# By Shuhan Zhang, sz2898@nyu.edu

new = ccnew[20:height-20, 30:width-30]

height = height-40
width = width-60

# Find lines 
lines = np.zeros(height)
for i in range(height):
  for j in range(width):
    if new[i][j] == 0:
      lines[i] += 1

line_range = []
begin = 0
end = 0
for i in range(0, height):
  if (lines[i] >= 6 and begin == 0):
    begin = i
  elif (lines[i] >= 6 and begin != 0):
    continue
  elif (lines[i] < 6 and begin != 0):
    end = i
    if(end - begin > 10):
      line_range.append([begin, end])
      begin = 0
      end = 0
  else: 
    continue

# Find words 
rect = []
for (x, y) in line_range:
  rows = np.zeros(width)
  for i in range(width):
    for j in range(x, y+1):
      if new[j][i] == 0:
        rows[i] += 1
  be = 0
  en = 0
  for i in range(5, width-5):
    if (rows[i] >= 1 and be == 0):
      be = i
    elif (rows[i] >= 1 and be != 0):
      continue
    elif (rows[i] < 1 and be != 0):
      if(rows[i+1] < 1 and rows[i+2] < 1 and rows[i+3] < 1 and rows[i+4] < 1):
        en = i
        if(en - be > 3):
          rect.append([be, en, x, y])
          be = 0
          en = 0
    else: 
      continue

# Attach boxes outside words
addrect = new.copy()
for (x, y, u, v) in rect:
  cv2.rectangle(addrect, (x, u), (y, v), (0, 0, 0), 1)
cv2.imwrite("segment.png",addrect)

# Function for MSER algorithm, not used in the final design
def MSER(image):  
    mser = cv2.MSER_create(_min_area=1,) 
    keep=[]
    msers, boxes = mser.detectRegions(image)
    for box in boxes:
        x, y, w, h = box
        keep.append([x, y, x + w, y + h])
    for (startX, startY, endX, endY) in keep:
        cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 0), 1)
    cv2.imwrite("result.png",image)
    cv2.waitKey(0)

# Creat folder for results
picsDir = "words/"
if os.path.exists(picsDir):
    lists = os.listdir(picsDir)
    for w in lists:
        os.remove(os.path.join(picsDir, w))
    os.removedirs(picsDir)
os.mkdir("words/")

# Output segmented words
wordCount = 0
for (x, y, u, v) in rect:
    save = True

    pic = new[u:v,x:y]
    psize = pic.shape

    pnl, pl, pstatus, pc = cv2.connectedComponentsWithStats(cv2.bitwise_not(pic), connectivity = 4)

    if (y-x < 6 and pnl > 3):
            continue
    if (pnl == 2 and pstatus[1][3] < (v-u)/3):
            continue

    vers = np.zeros(psize[1])
    for i in range(psize[1]):
        for j in range(psize[0]):
            if pic[j][i] == 0:
                vers[i] += 1

    for i in range(psize[1]-3, -1, -1):
        if vers[i] < 2:
            comma = pic[0:psize[0], i:psize[1]]
            pnl, pl, pstatus, pc = cv2.connectedComponentsWithStats(cv2.bitwise_not(comma), connectivity = 8)
            if pnl > 3:
                save = False
            if (pnl == 2 and pstatus[1][3] < (v-u)/3):
                y = x + i
            break        
        else: 
            continue

    if (save == True and v-u != 0 and y-x !=0):
        word = new[u:v,x:y]
        name = picsDir + str(wordCount) + ".png"
        cv2.imwrite(name, word)
        wordCount += 1