## โหลดรูปเบื้องต้น

In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

output_folder = Path("../../data/output_images/output_table")
output_folder.mkdir(exist_ok=True)

#image = cv2.imread("../../data/test_images/transcript/img-1.png")
image = cv2.imread("../../data/test_images/transcript/img-2.png")
#image = cv2.imread("../../data/test_images/transcript/Image_20240817_0001.png")
if image is None:
    raise FileNotFoundError("ไม่พบไฟล์ภาพ กรุณาตรวจสอบเส้นทางของไฟล์")

# จำกัด noise
denoised = cv2.bilateralFilter(image, 15, 100, 100)
cv2.imwrite(f"{output_folder}/transcript_denoised.png", denoised)

gray_img = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY)

# สำหรับภาพที่แสงสม่ำเสมอ
binary_mean = cv2.adaptiveThreshold(
    gray_img, 
    maxValue=255, 
    adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
    thresholdType=cv2.THRESH_BINARY_INV, 
    blockSize=21, 
    C=10
)

# สำหรับภาพที่แสงไม่สม่ำเสมอ
binary_gaussian = cv2.adaptiveThreshold(
    gray_img, 
    maxValue=255, 
    adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    thresholdType=cv2.THRESH_BINARY_INV, 
    blockSize=21, 
    C=10
)

# ใช้ Otsu Thresholding
_, binary_otsu = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

cv2.imwrite(f"{output_folder}/transcript.png", image)
cv2.imwrite(f"{output_folder}/transcript_gray.png", gray_img)
cv2.imwrite(f"{output_folder}/transcript_binary_g.png", binary_gaussian)
cv2.imwrite(f"{output_folder}/transcript_binary_m.png", binary_mean)
cv2.imwrite(f"{output_folder}/transcript_otsu.png", binary_otsu)




True

## CCA

In [3]:
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_gaussian, connectivity=8)

areas = [stat[4] for stat in stats]  # ดึงค่า area
sorted_areas = sorted(areas, reverse=True)  # เรียงลำดับจากมากไปน้อย
second_max_area = sorted_areas[1]  # ค่าอันดับ 2
second_max_area_index = areas.index(second_max_area)  # หาตำแหน่งในลิสต์เดิม
print(second_max_area)

x, y, w, h, area = stats[second_max_area_index]
table_img = binary_gaussian[y:y+h, x:x+w]
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 3)

cv2.imwrite(f"{output_folder}/table.png", table_img)


'''
for idx, i in enumerate(range(1, num_labels)):  # เริ่มจาก 1 เพราะ 0 คือ background
    x, y, w, h, area = stats[i]

    cca_img = image[y:y+h, x:x+w]
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 1)
'''

cv2.imwrite(f"{output_folder}/transcript_cca.png", image)

63804


True

## table

In [4]:

# ใช้ Morphological Operations เพื่อตรวจจับเส้น
#แนวนอน (Rows)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
horizontal_lines = cv2.morphologyEx(table_img, cv2.MORPH_OPEN, horizontal_kernel)

#แนวตั้ง (Columns)
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 25))
vertical_lines = cv2.morphologyEx(table_img, cv2.MORPH_OPEN, vertical_kernel)

# รวมเส้น
table_structure = cv2.add(horizontal_lines, vertical_lines)

'''
plt.figure(figsize=(10,10))
plt.imshow(table_structure, cmap='gray')
plt.axis("off")
plt.show()
'''

# Combine lines
combined_lines = cv2.addWeighted(horizontal_lines, 0.5, vertical_lines, 0.5, 0)

# Find contours
contours, _ = cv2.findContours(combined_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# ฟังก์ชันสำหรับดึงค่า y (แนวตั้ง) สำหรับการจัดเรียงตามแถว
def get_y_position(contour):
    x, y, w, h = cv2.boundingRect(contour)
    return y

# ฟังก์ชันสำหรับดึงค่า x (แนวนอน) สำหรับการจัดเรียงตามคอลัมน์
def get_x_position(cell):
    x, y, w, h = cell
    return x

# จัดเรียง contours ตามตำแหน่ง y (แนวตั้ง) เพื่อแยกแถว
sorted_contours = sorted(contours, key=get_y_position)

# แยกแถว
rows = []
current_row = []
previous_y = -1
for ctr in sorted_contours:
    x, y, w, h = cv2.boundingRect(ctr)
    if(w < 30 and h < 30):
        continue
    if previous_y == -1 or abs(y - previous_y) < 10:  # Same row threshold
        current_row.append((x, y, w, h))
    else:
        # จัดเรียงคอลัมน์ในแถวปัจจุบันตามตำแหน่ง x
        rows.append(sorted(current_row, key=get_x_position))
        current_row = [(x, y, w, h)]
    previous_y = y

# เพิ่มแถวสุดท้าย
if current_row:
    rows.append(sorted(current_row, key=get_x_position))

table_cell_images = []
# แสดงผล
for row_index, row in enumerate(rows[1:]):
    for col_index, cell in enumerate(row):
        x, y, w, h = cell
        cell_image = table_img[y:y+h, x:x+w]
        table_cell_images.append(cell_image)
        # บันทึกหรือประมวลผล cell_image
        #cv2.imshow(f'Row {row_index} Col {col_index}', cell_image)
        cv2.imwrite(f"{output_folder}/Row_{row_index}_Col_{col_index}.png", cell_image)



In [13]:
img = table_cell_images[0]

# Detect horizontal and vertical lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 1))
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 50))

# Detect horizontal and vertical lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 1))
horizontal_lines = cv2.morphologyEx(img, cv2.MORPH_OPEN, horizontal_kernel)

vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 50))
vertical_lines = cv2.morphologyEx(img, cv2.MORPH_OPEN, vertical_kernel)

# Dilate lines to make them more continuous
dilated_horizontal = cv2.dilate(horizontal_lines, cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1)))
dilated_vertical = cv2.dilate(vertical_lines, cv2.getStructuringElement(cv2.MORPH_RECT, (1, 3)))

# Combine horizontal and vertical lines
grid_lines = cv2.add(dilated_horizontal, dilated_vertical)

# Combine lines
table_lines = cv2.add(dilated_horizontal, dilated_vertical)

# Remove lines from the image
tableless_image = cv2.bitwise_not(cv2.bitwise_and(img, img, mask=table_lines))

# Clean small noise
cleaned_image = cv2.erode(tableless_image, cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)))

# Optional: Inpaint to fill gaps
final_image = cv2.inpaint(img, table_lines, 3, cv2.INPAINT_TELEA)

# Show results
#cv2.imshow("Original", img)
#cv2.imshow("Tableless Binary", tableless_image)
#cv2.imshow("Final Image", final_image)
cv2.imwrite(f"{output_folder}/cell_original.png", img)
cv2.imwrite(f"{output_folder}/cell_tableless.png", cleaned_image)
cv2.imwrite(f"{output_folder}/cell_Final.png", final_image)



True