In [1]:
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from imutils.object_detection import non_max_suppression
import os
import sys
net = cv2.dnn.readNet('frozen_east_text_detection.pb')


In [19]:
def to_knn(image):
    data = np.reshape(image,(image.shape[0]*image.shape[1],3))
    data = np.float32(data)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    K = 2
    ret,label,center=cv2.kmeans(data,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
    center = np.uint8(center)
    res = center[label.flatten()]
    res2 = res.reshape(image.shape)
    res3 = cv2.cvtColor(res2,cv2.COLOR_BGR2GRAY)
    _, threshold = cv2.threshold(res3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    output = cv2.merge([threshold,threshold,threshold])


    return output

In [25]:
def detect_text_region(image):
 
	orig = image.copy()
	(H, W) = image.shape[:2]

# set the new width and height and then determine the ratio in change
# for both the width and height
	newW, newH = 320, 320
	rW = W / float(newW)
	rH = H / float(newH)

# resize the image and grab the new image dimensions
	image = cv2.resize(image, (newW, newH))
	(H, W) = image.shape[:2]
	layerNames = [
	"feature_fusion/Conv_7/Sigmoid",
	"feature_fusion/concat_3"]
	blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
	(123.68, 116.78, 103.94), swapRB=True, crop=False)
	net.setInput(blob)
	(scores, geometry) = net.forward(layerNames)
	(numRows, numCols) = scores.shape[2:4]
	rects = []
	confidences = []
	detections =[]


	for y in range(0, numRows):
		# extract the scores (probabilities), followed by the geometrical
		# data used to derive potential bounding box coordinates that
		# surround text
		scoresData = scores[0, 0, y]
		xData0 = geometry[0, 0, y]
		xData1 = geometry[0, 1, y]
		xData2 = geometry[0, 2, y]
		xData3 = geometry[0, 3, y]
		anglesData = geometry[0, 4, y]

		# loop over the number of columns
		for x in range(0, numCols):
			# if our score does not have sufficient probability, ignore it
			if scoresData[x] < 0.5:
				continue

			# compute the offset factor as our resulting feature maps will
			# be 4x smaller than the input image
			(offsetX, offsetY) = (x * 4.0, y * 4.0)

			# extract the rotation angle for the prediction and then
			# compute the sin and cosine
			angle = anglesData[x]
			cos = np.cos(angle)
			sin = np.sin(angle)

			# use the geometry volume to derive the width and height of
			# the bounding box
			h = xData0[x] + xData2[x]
			w = xData1[x] + xData3[x]

			# compute both the starting and ending (x, y)-coordinates for
			# the text prediction bounding box
			endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
			endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
			startX = int(endX - w)
			startY = int(endY - h)

			# add the bounding box coordinates and probability score to
			# our respective lists
			rects.append((startX, startY, endX, endY))
			confidences.append(scoresData[x])

	# apply non-maxima suppression to suppress weak, overlapping bounding
	# boxes
	if len(rects)==0:
		return None
	boxes = non_max_suppression(np.array(rects), probs=confidences)
	
	for (startX, startY, endX, endY) in boxes:
	# scale the bounding box coordinates based on the respective
	# ratios
		startX = int(startX * rW)
		startY = int(startY * rH)
		endX = int(endX * rW)
		endY = int(endY * rH)

		detections.append([startY,endY,startX,endX])
  
	return detections


In [32]:
data_east = pd.DataFrame(columns=["filename","folder","imagename","cropped"])

In [5]:
subfolders =[]
for folder_name, subs, filenames in os.walk('./dataset/train/'):
        # Print the subfolder name
        # print(f"Subfolder: {subfolders}")
        subfolders.extend(subs)
        break

## Preprocessing the Train folder

In [None]:
total = 0
count1 = 0
count2 = 0
input_path = './dataset/train/'
output_path = './hand_east/train/'
for x in tqdm(subfolders):
    file_path = input_path + x
    for _, _, filenames in os.walk(file_path):
        for f in filenames:
            total += 1
            img = file_path+'/'+f
            image1 = cv2.imread(img)
            image_knn = to_knn(image1)
            ci = detect_text_region(image_knn)
            if ci:
                count1 += 1
                arr = np.array(ci)
                m_max = arr.max(axis=0)
                m_min = arr.min(axis=0)
                min_y = m_min[0]
                max_y = m_max[1]
                min_x = m_min[2]
                max_x = m_max[3]
                cropped = image_knn[min_y:max_y,min_x:max_x]
                if(cropped.shape[0]>50 and cropped.shape[1]>500):
                    count2 += 1
                    outimg = cropped[0:,100:500]
                    resized = cv2.resize(outimg,(256,256))
                    newdir = output_path + x
                    if not os.path.exists(newdir):
                        os.makedirs(newdir)
                    opg = newdir + "/" + f
                    cv2.imwrite(opg,resized)
                    data_east.loc[len(data_east)] = [img,x,f,opg]
            if(total%100==0):
                print(f"total: {total}, count1: {count1}, count2: {count2}")

In [None]:
for x in tqdm(subfolders):
    file_path = input_path + x
    for y, z, filenames in os.walk(file_path):
        print(filenames)
        break

In [39]:
sub2 =[]
for folder_name, subs, filenames in os.walk('./hand_east/train/'):
        # Print the subfolder name
        # print(f"Subfolder: {subfolders}")
        sub2.extend(subs)
        break

In [47]:
sub3 =[]
for x in tqdm(sub2):
    file_path = output_path + x
    for y, z, filenames in os.walk(file_path):
        sub3.append(filenames)
        break

100%|██████████| 1328/1328 [00:00<00:00, 34437.36it/s]


In [48]:
c =0
temp = []
for x in sub3:
    if(len(x)==1):
        c += 1

In [49]:
c

152

In [51]:
pd.DataFrame.to_csv(data_east,'data_east.scv')

In [57]:
dd = pd.read_csv('data_east.scv')


In [54]:
dd.head()

Unnamed: 0.1,Unnamed: 0,filename,folder,imagename,cropped
0,0,./dataset/train/P2369/B4.jpg,P2369,B4.jpg,./hand_east/train/P2369/B4.jpg
1,1,./dataset/train/P2369/B6.jpg,P2369,B6.jpg,./hand_east/train/P2369/B6.jpg
2,2,./dataset/train/P2369/B2.jpg,P2369,B2.jpg,./hand_east/train/P2369/B2.jpg
3,3,./dataset/train/P2369/B0.jpg,P2369,B0.jpg,./hand_east/train/P2369/B0.jpg
4,4,./dataset/train/P2369/B1.jpg,P2369,B1.jpg,./hand_east/train/P2369/B1.jpg


## Processing the Validation folder

In [60]:
val_files = []
for _, _, filenames in os.walk('./dataset/val/'):
    val_files.extend(filenames)
    break

In [61]:
len(val_files)

730

In [66]:

def crop_rectangular_box(image):
    
    

    
    # Find contours in the binary image
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the contour with the largest area
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Get the bounding rectangle of the largest contour
    x, y, w, h = cv2.boundingRect(largest_contour)
    
    # Crop the image based on the bounding rectangle
    cropped_image = image[y:y+h, x:x+w]
    #cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    #output = cv2.merge([cropped_image,cropped_image,cropped_image])
    
    return cropped_image


In [73]:
total = 0
count1 = 0
count2 = 0
temp =[]
input_path = './dataset/val/'
output_path = './hand_east/val/'

for f in tqdm(val_files):
    img = input_path + f
    image1 = cv2.imread(img)
    image_knn = to_knn(image1)
    ci = detect_text_region(image_knn)
    if ci:
        arr = np.array(ci)
        m_max = arr.max(axis=0)
        m_min = arr.min(axis=0)
        min_y = m_min[0]
        max_y = m_max[1]
        min_x = m_min[2]
        max_x = m_max[3]
        cropped = image_knn[min_y:max_y,min_x:max_x]
        outimg = cropped[0:,100:500]
        temp.append(outimg.shape)
        if(outimg.shape[0]==0 or outimg.shape[1]==0):
            cimage = crop_rectangular_box(image_knn[:,:,0])
            outimg = cimage[0:,200:600]
            resized = cv2.resize(outimg,(256,256))
            newdir = output_path
            if not os.path.exists(newdir):
                os.makedirs(newdir)
            opg = newdir + f
            cv2.imwrite(opg,resized)
            continue
        resized = cv2.resize(outimg,(256,256))
        newdir = output_path
        if not os.path.exists(newdir):
            os.makedirs(newdir)
        opg = newdir + f
        cv2.imwrite(opg,resized)
    else:

        cimage = crop_rectangular_box(image_knn[:,:,0])
        outimg = cimage[0:,200:600]
        resized = cv2.resize(outimg,(256,256))
        newdir = output_path
        if not os.path.exists(newdir):
            os.makedirs(newdir)
        opg = newdir + f
        cv2.imwrite(opg,resized)

100%|██████████| 730/730 [07:35<00:00,  1.60it/s]


In [71]:
temp

[(80, 0, 3)]

In [2]:
data_east = pd.read_csv('hand_east/data_east.csv')

In [30]:
data_pairs = pd.DataFrame(columns=['img0','img1','label'])

In [None]:
count = 0
for x in subfolders:
    df1 = data_east[data_east['folder']==x]
    if(len(df1)>1):
        for i in range(len(df1)):
            for j in range(len(df1)):
                if(i!=j):
                    xi = df1['cropped'].iloc[i]
                    xj = df1['cropped'].iloc[j]
                    data_pairs.loc[len(data_pairs)] = [xi,xj,1]
                    count += 1
                    if(count%1000==0):
                        print(count)
    df2 = data_east[data_east['folder']!=x]
    for i in range(len(df1)):
        for j in range(len(df2)):
            xi = df1['cropped'].iloc[i]
            xj = df2['cropped'].iloc[j]
            dx = (data_pairs['img0'] == xj) & (data_pairs['img1'] == xi)
            if not dx.any():
                data_pairs.loc[len(data_pairs)] = [xi,xj,0]
                count += 1
                if(count%1000==0):
                    print(count)

In [24]:
np.array([data_east['folder']=='P2369']).any()

True

In [93]:
dd = data_east[data_east['folder']!='P2369']

In [10]:
df2['cropped'].iloc[0]

'./hand_east/train/M1042/B3.jpg'

In [33]:
data_pairs.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1124455 entries, 0 to 1124454
Data columns (total 3 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0   img0    1124455 non-null  object
 1   img1    1124455 non-null  object
 2   label   1124455 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 34.3+ MB
