In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import imutils
import numpy as np
import cv2
from math import ceil
from collections import defaultdict
import matplotlib.pyplot as plt

In [3]:
def get_x_ver1(s):
    s = cv2.boundingRect(s)
    return s[0] * s[1]

def get_x(s):
  return s[1][0]

In [4]:
def pre_processing_img(img):
  # convert image from BGR to GRAY to apply canny edge detection algorithm
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # remove noise by blur image
    blurred = cv2.GaussianBlur(gray_img, (5, 5), 0)

    # apply canny edge detection algorithm
    img_canny = cv2.Canny(blurred, 100, 200)

    # find contours
    cnts = cv2.findContours(img_canny.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    return cnts,gray_img

In [5]:
def crop_image(img):
    cnts,gray_img = pre_processing_img(img)

    ans_blocks = []
    x_old, y_old, w_old, h_old = 0, 0, 0, 0

    # ensure that at least one contour was found
    if len(cnts) > 0:
        # sort the contours according to their size in descending order
        cnts = sorted(cnts, key=get_x_ver1)

        # loop over the sorted contours
        for i, c in enumerate(cnts):
            x_curr, y_curr, w_curr, h_curr = cv2.boundingRect(c)

            if w_curr * h_curr > 100000 and w_curr < h_curr:
                # check overlap contours
                check_xy_min = x_curr * y_curr - x_old * y_old
                check_xy_max = (x_curr + w_curr) * (y_curr + h_curr) - (x_old + w_old) * (y_old + h_old)

                # if list answer box is empty
                if len(ans_blocks) == 0:
                    ans_blocks.append(
                        (gray_img[y_curr:y_curr + h_curr, x_curr:x_curr + w_curr],[x_curr,y_curr,w_curr,h_curr]))
                    # update coordinates (x, y) and (height, width) of added contours
                    x_old,y_old,w_old,h_old = x_curr,y_curr,w_curr,h_curr
                
                elif check_xy_min > 20000 and check_xy_max > 20000:
                    ans_blocks.append(
                        (gray_img[y_curr:y_curr + h_curr, x_curr:x_curr + w_curr],[x_curr,y_curr,w_curr,h_curr]))
                    # update coordinates (x, y) and (height, width) of added contours
                    x_old,y_old,w_old,h_old = x_curr,y_curr,w_curr,h_curr

        # sort ans_blocks according to x coordinate
        sorted_ans_blocks = sorted(ans_blocks, key=get_x)
        return sorted_ans_blocks

In [6]:
def divide_ans_blocks(ans_blocks):
  """
  Mỗi blocks đáp án có 6 ô nhỏ mỗi ô nhỏ sẽ có 5 câu 
  Do 4 blocks có độ dài bằng nhau sẽ chia ra 6 ô nhỏ 
  
  """
  list_answers = []
  for ans_block in ans_blocks:
    ans_block_img = np.array(ans_block[0])
    offset1 = ceil(ans_block_img.shape[0] / 6)
    for i in range(6):
        box_img = np.array(ans_block_img[i * offset1:(i + 1) * offset1, :])
        height_box = box_img.shape[0]
        box_img = box_img[14:height_box-14, :]
        offset2 = ceil(box_img.shape[0] / 5)
        for j in range(5):
          list_answers.append(box_img[j * offset2:(j + 1) * offset2, :])
  return list_answers

In [7]:
def list_ans(list_answers):
  """
    - có 120 câu thì sẽ có 4 đáp án thì tổng sẽ có 120 * 4 = 480
    - Để crop mỗi lựa chọn thì lặp qua 4 rồi chọn khoảng crop ra (start lấy tự vị trí đầu cho mỗi đáp án bỏ qua số thứ tự và offset là ví trị kq dừng)
  
  """
  list_choices = []
  for answer_img in list_answers:
    start = 40
    offset = 40
    for i in range(4):
      bubble_choice = answer_img[:,start + i * offset:start + (i + 1) * offset]
      bubble_choice = cv2.threshold(bubble_choice, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
      bubble_choice = cv2.resize(bubble_choice, (28, 28), cv2.INTER_AREA)
      bubble_choice = bubble_choice.reshape((28, 28, 1))
      list_choices.append(bubble_choice)
  return list_choices

In [8]:
import tensorflow as tf

In [9]:
# model  = tf.keras.models.load_model('/content/drive/MyDrive/DA chuyen nganh/Folder Nam/Code/detect_answers/weight.h5')

In [18]:
img = cv2.imread('/content/drive/MyDrive/DA chuyen nganh/Folder Nam/Code/detect_answers/1.jpg')
img = cv2.resize(img,(1100,1500))
ans_blocks = crop_image(img)

In [None]:
# for i in range(len(ans_blocks)):
#   plt.figure()
#   plt.imshow(ans_blocks[i][0])

In [12]:
list_answer = divide_ans_blocks(ans_blocks)
list_answer = list_ans(list_answer)

In [13]:
# results = defaultdict(list)
# list_answers = np.array(list_answer)
# scores = model.predict_on_batch(list_answers / 255.0)

In [15]:
def map_answer(idx):
    if idx % 4 == 0:
        answer_circle = "A"
    elif idx % 4 == 1:
        answer_circle = "B"
    elif idx % 4 == 2:
        answer_circle = "C"
    else:
        answer_circle = "D"
    return answer_circle

In [16]:
def get_answers(list_answers):
    results = defaultdict(list)
    model  = tf.keras.models.load_model('/content/drive/MyDrive/DA chuyen nganh/Folder Nam/Code/detect_answers/weight11.h5')
    list_answers = np.array(list_answers)
    scores = model.predict_on_batch(list_answers / 255.0)
    for idx, score in enumerate(scores):
        question = idx // 4
        if score[1] > 0.9:
            chosed_answer = map_answer(idx)
            results[question + 1].append(chosed_answer)

    return results

In [17]:
get_answers(list_answer)

defaultdict(list,
            {1: ['A'],
             2: ['B'],
             3: ['B'],
             5: ['A'],
             6: ['C'],
             7: ['D'],
             8: ['B'],
             9: ['A', 'D'],
             10: ['C', 'D'],
             11: ['C'],
             12: ['A'],
             13: ['D'],
             14: ['B'],
             16: ['A'],
             17: ['D'],
             18: ['B'],
             19: ['C'],
             20: ['A'],
             22: ['B'],
             25: ['C'],
             26: ['A'],
             27: ['C'],
             29: ['D'],
             30: ['B'],
             32: ['B'],
             33: ['A'],
             34: ['B'],
             35: ['C'],
             36: ['D'],
             37: ['A'],
             39: ['B'],
             40: ['C'],
             42: ['A'],
             43: ['B'],
             44: ['C'],
             45: ['D'],
             46: ['C'],
             47: ['A'],
             49: ['B'],
             51: ['B'],
             52: ['D