In [1]:
import argparse
import json
import os
import glob
from typing import List
import cv2
import numpy as np


In [2]:
def read_image(img_path, show=False):
    """Reads an image into memory as a grayscale array.
    """
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    if show:
        show_image(img)

    return img

def show_image(img, delay=1000):
    """Shows an image.
    """
    cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE)
    cv2.imshow('image', img)
    cv2.waitKey(delay)
    cv2.destroyAllWindows()
    
    
def parse_args():
    parser = argparse.ArgumentParser(description="cse 473/573 project 1.")
    parser.add_argument(
        "--test_img", type=str, default="./data/test_img.jpg",
        help="path to the image used for character detection (do not change this arg)")
    parser.add_argument(
        "--character_folder_path", type=str, default="./data/characters",
        help="path to the characters folder")
    parser.add_argument(
        "--result_saving_directory", dest="rs_directory", type=str, default="./",
        help="directory to which results are saved (do not change this arg)")
    args = parser.parse_args()
    return args

In [4]:
def ocr(test_img, characters):
    """Step 1 : Enroll a set of characters. Also, you may store features in an intermediate file.
       Step 2 : Use connected component labeling to detect various characters in an test_img.
       Step 3 : Taking each of the character detected from previous step,
         and your features for each of the enrolled characters, you are required to a recognition or matching.

    Args:
        test_img : image that contains character to be detected.
        characters_list: list of characters along with name for each character.

    Returns:
    a nested list, where each element is a dictionary with {"bbox" : (x(int), y (int), w (int), h (int)), "name" : (string)},
        x: row that the character appears (starts from 0).
        y: column that the character appears (starts from 0).
        w: width of the detected character.
        h: height of the detected character.
        name: name of character provided or "UNKNOWN".
        Note : the order of detected characters should follow english text reading pattern, i.e.,
            list should start from top left, then move from left to right. After finishing the first line, go to the next line and continue.
        
    """
    # TODO Add your code here. Do not modify the return and input arguments

    enrollment(characters)

    detection(test_img)
    
    recognition()

    #raise NotImplementedError

In [5]:
def enrollment(characters):
    """ Args:
        You are free to decide the input arguments.
    Returns:
    You are free to decide the return.
    """
    # TODO: Step 1 : Your Enrollment code should go here.
    #img_blur = cv2.GaussianBlur(img_gray, (3,3), 0)
    # Using SOBEL Filters for Edge Detection
    for i in range(0,5):    
        img = characters[i][1]
        sift=cv2.xfeatures2d.SIFT_create()
        kp,desc=sift.detectAndCompute(img,None)
        kp_name = "sift_keypoints_" + str(i) + ".txt"
        file1 = open(kp_name, 'w')
        if (str(kp)=='()' and str(desc) == 'None'):
            file1.write(str(img))
        else: 
            #print (np.size(desc,0),np.size(desc,1))
            for j in range(0,np.size(desc,0)):
                file1.write(str(desc[j]))
        file1.close()

In [None]:
def detection(test_img):
    """ 
    Use connected component labeling to detect various characters in an test_img.
    Args:
        You are free to decide the input arguments.
    Returns:
    You are free to decide the return.
    """
    # TODO: Step 2 : Your Detection code should go here.
    # Connected Component Implementation / Naive Template Matching Implementation
    # Splitting of Test image into windows - 
    #print (np.size(test_img,0),np.size(test_img,1))
    # 
    window_r = ()
    for r in range(0, np.size(test_img,0)):
        flag=1
        for c in range (0,np.size(test_img,1)):
            if (test_img[r][c]<240):
                flag=0
                break
            else:
                flag=1
        if (flag==1):
            window_r = window_r + (r,)
            #print (window_r)
            #print (len(window_r))
    window_row=()
    min = window_r[0]
    #print (" Window r : ")
    #print (window_r)
    count = 1
    for i in range(0,len(window_r)):
        count = count+1
        if (min+count < window_r[i]):
            min = window_r[i]
            count =1 
            window_row=window_row+(window_r[i-1],window_r[i]) 
            #print (window_r[i])
    window_row=window_row+(window_r[i],)
    #print (window_row)          
    window_c=()
    file3 = open("test_img_col_characteristics.txt",'w')
    line=1
    for c in range(0,np.size(test_img,1)):
        flag_c=1
        for r in range(window_row[line-1],window_row[line]):
            if (test_img[r][c]<240):
                flag_c=0
                break
            else:
                flag_c=1
        if (flag_c==1):
            window_c = window_c+(c,)
    window_col=()
    min = window_c[0]
    count =1
    for i in range (0, len(window_c)):
        count = count+1
        if (min+count < window_c[i]):
            min = window_c[i]
            count =1
            window_col=window_col+(window_c[i-1],window_c[i])
    window_col = window_col+(window_c[i],)
    del window_c
    del window_r
    w = window_col[1] - window_col[0]
    h= window_row[1]-window_row[0]
    test_let = [[255 for x in range (w)] for y in range (h)]
    r=0
    for i in range (window_row[0], window_row[1]):
        c=0
        for j in range(window_col[0], window_col[1]):
            test_let [r][c] = test_img[i][j]
            file3.write(str(test_let[r][c]))
            file3.write(" ")
            c=c+1
        file3.write("\n")
        r=r+1

In [None]:
def recognition():
    """ 
    Args:
        You are free to decide the input arguments.
    Returns:
    You are free to decide the return.
    """
    # TODO: Step 3 : Your Recognition code should go here.

    #raise NotImplementedError


In [None]:
def save_results(coordinates, rs_directory):
    """
    Donot modify this code
    """
    results = []
    with open(os.path.join(rs_directory, 'results.json'), "w") as file:
        json.dump(results, file)

In [6]:
def main():
    args = parse_args()
    
    characters = []

    all_character_imgs = glob.glob(args.character_folder_path+ "/*")
    
    for each_character in all_character_imgs :
        character_name = "{}".format(os.path.split(each_character)[-1].split('.')[0])
        characters.append([character_name, read_image(each_character, show=True)])

    test_img = read_image(args.test_img)

    results = ocr(test_img, characters)

    save_results(results, args.rs_directory)


if __name__ == "__main__":
    main()


usage: ipykernel_launcher.py [-h] [--test_img TEST_IMG] [--character_folder_path CHARACTER_FOLDER_PATH]
                             [--result_saving_directory RS_DIRECTORY]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\singh\AppData\Roaming\jupyter\runtime\kernel-4abf82b0-8af1-487c-9009-91358c7b212e.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
