In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, AveragePooling2D, BatchNormalization
#import l2-norm pooling
from tensorflow.keras.layers import Lambda
from tensorflow.keras import backend as K


def l2_norm(x):
    x = x**2
    x = K.sum(x, axis=3, keepdims=True)
    x= K.sqrt(x)
    return x
l2_norm_pooling = Lambda(lambda x: l2_norm(x))
from tensorflow.keras.metrics import RootMeanSquaredError





In [2]:
# preprocessing
def preprocess(img_path, show=False):
    image = cv2.imread(img_path)
    image = cv2.resize(image, (256, 256))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.equalizeHist(image)
    # find 200 keypoints with SIFT
    sift = cv2.SIFT_create(nfeatures=200, contrastThreshold=0.005)
    kp = sift.detect(image, None)
    if len(kp) < 200:
        kp += cv2.SIFT_create(nfeatures=len(kp)-200, contrastThreshold=0.0001).detect(image, None)
    if len(kp) > 200:
        kp = kp[:200]
    if show:
        print(len(kp))
        # show the keypoints
        image = cv2.drawKeypoints(image, kp, None)
        cv2.imshow('image', image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    # cast to float32, normalize and reshape
    kp = np.array([k.pt for k in kp], dtype=np.float32)
    kp /= 255.0
    #kp = kp.reshape(1, 200, 2)
    return kp


In [147]:
print(preprocess('scrapped_images1/0_7.jpg', show=True))

200
[[0.1132782  0.0374416 ]
 [0.9074974  0.73773307]
 [0.3219107  0.84775263]
 [0.80422735 0.36091012]
 [0.80349237 0.57291746]
 [0.3219107  0.84775263]
 [0.3219107  0.84775263]
 [0.80349237 0.57291746]
 [0.80349237 0.57291746]
 [0.9257316  0.07567762]
 [0.31583455 0.44811073]
 [0.9257316  0.07567762]
 [0.78208923 0.24280223]
 [0.9259598  0.7238679 ]
 [0.36774865 0.07065852]
 [0.06343682 0.9497703 ]
 [0.06343682 0.9497703 ]
 [0.06343682 0.9497703 ]
 [0.7761342  0.5354224 ]
 [0.3688654  0.4449142 ]
 [0.3709435  0.23929033]
 [0.76935726 0.3081576 ]
 [0.76935726 0.3081576 ]
 [0.93135977 0.14530882]
 [0.9399474  0.929657  ]
 [0.7441984  0.52816254]
 [0.38188708 0.27087492]
 [0.7441984  0.52816254]
 [0.6859887  0.07070081]
 [0.1132782  0.0374416 ]
 [0.94659543 0.05097001]
 [0.9522392  0.91778696]
 [0.11412621 0.92937624]
 [0.7306844  0.5507156 ]
 [0.29836485 0.85146755]
 [0.9730122  0.34126824]
 [0.28782192 0.8427817 ]
 [0.70770824 0.5372724 ]
 [0.6998225  0.43552384]
 [0.12648557 0.046582

In [3]:
#local imports
from local_package.Cropping import trim_to_edges
from local_package.Cropping import crop_with_perspective
#from local_package.ConvNN import preproces

In [4]:
#prepare a dataset for training the model
def prepare_dataset():
    directory = './scrapped_images1/'
    images = []
    label_values = []
    for file_name in os.listdir(directory):
        # 5-13, 18-20
        if all([not file_name.startswith(f'{str(i)}_') for i in range(5, 14)] + [not file_name.startswith(f'{str(i)}_') for i in range(18, 21)]):
            continue
        file_path = os.path.join(directory, file_name)
        try:
            image = cv2.imread(file_path)
            top_left, top_right, bottom_left, bottom_right = trim_to_edges(image, file_name)
            # normalize the points
            # top_left = [top_left[0] / image.shape[1]*255, top_left[1] / image.shape[0]*255]
            # top_right = [top_right[0] / image.shape[1]*255, top_right[1] / image.shape[0]*255]
            # bottom_left = [bottom_left[0] / image.shape[1]*255, bottom_left[1] / image.shape[0]*255]
            # bottom_right = [bottom_right[0] / image.shape[1]*255, bottom_right[1] / image.shape[0]*255]

            top_left = [top_left[0] / image.shape[1], top_left[1] / image.shape[0]]
            top_right = [top_right[0] / image.shape[1], top_right[1] / image.shape[0]]
            bottom_left = [bottom_left[0] / image.shape[1], bottom_left[1] / image.shape[0]]
            bottom_right = [bottom_right[0] / image.shape[1], bottom_right[1] / image.shape[0]]

            images.append(preprocess(file_path))
            label_values.append([top_left, top_right, bottom_left, bottom_right])
        except Exception as e:
            print(f"Exception for {file_name}:", e)
    return images, label_values

In [2]:
def preprocess(image):
    image = cv2.resize(image, (320, 320))
    # converto to 3 channel if gray or 4 channel
    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    if image.shape[2] == 4:
        image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

    return image


In [3]:
#prepare a dataset for fine-tuning the u2net model
def u2net_prepare_dataset():
    directory = './Scraping_images/generated_with_coordinates(fixed_0)'
    images = []
    label_masks = []
    with open('./Scraping_images/generated_with_coordinates(fixed_0)/coordinates.txt', 'r') as f:
        lines = f.readlines()
        coord_dict = {}
        for line in lines:
            line = line.split()
            coord_dict[int(line[0].split('_')[1].split('.')[0])] = [float(x) for x in line[1:]]
            
        for file_name in os.listdir(directory):
            file_path = os.path.join(directory, file_name)
            try:
                #print(int(file_name.split('_')[1].split('.')[0]))
                #print(coord_dict[int(file_name.split('_')[1].split('.')[0])])
                image = cv2.imread(file_path)
                top_left_x, top_left_y, top_right_x, top_right_y, bottom_left_x, bottom_left_y, bottom_right_x, bottom_right_y = coord_dict[int(file_name.split('_')[1].split('.')[0])]
                top_left = [top_left_x, top_left_y] 
                top_right = [top_right_x, top_right_y]
                bottom_left = [bottom_left_x, bottom_left_y] if bottom_left_x < bottom_right_x else [bottom_right_x, bottom_right_y]
                bottom_right = [bottom_right_x, bottom_right_y] if bottom_left_x < bottom_right_x else [bottom_left_x, bottom_left_y]
                # normalize the points
                top_left = [top_left[0] / image.shape[1]*320, top_left[1] / image.shape[0]*320]
                top_right = [top_right[0] / image.shape[1]*320, top_right[1] / image.shape[0]*320]
                bottom_left = [bottom_left[0] / image.shape[1]*320, bottom_left[1] / image.shape[0]*320]
                bottom_right = [bottom_right[0] / image.shape[1]*320, bottom_right[1] / image.shape[0]*320]
                
                label_mask = np.zeros((320, 320, 1), dtype=np.uint8)
                label_mask = cv2.fillConvexPoly(label_mask, np.array([top_left, top_right, bottom_right, bottom_left], dtype=np.int32), 255)
                
                images.append(preprocess(image))
                label_masks.append(label_mask)
                
            except Exception as e:
                print(f"Exception for {file_name}:", e)
    return images, label_masks

In [4]:
u2net_images, u2net_label_masks = u2net_prepare_dataset()

Exception for coordinates.txt: list index out of range


In [5]:
print(len(u2net_images), len(u2net_label_masks))

2789 2789


In [6]:
# save the dataset as images
for i, (img, mask) in enumerate(zip(u2net_images, u2net_label_masks)):
    cv2.imwrite(f'Cropping/U-2-Net/train_data/DUTS/DUTS-TR/DUTS-TR/im_aug/25_{i}.jpg', img)
    cv2.imwrite(f'Cropping/U-2-Net/train_data/DUTS/DUTS-TR/DUTS-TR/gt_aug/25_{i}.png', mask)

In [9]:
images, label_values = prepare_dataset()

Exception for 10_0.jpg: OpenCV(4.9.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'

Exception for 10_1.jpg: OpenCV(4.9.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'

Exception for 10_10.jpg: OpenCV(4.9.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'

Exception for 10_100.jpg: OpenCV(4.9.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'

Exception for 10_101.jpg: OpenCV(4.9.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - src is not 

KeyboardInterrupt: 

In [165]:
images1 = []
for img in images:
    points = []
    for point in img:
        points.append(point[0])
        points.append(point[1])
    images1.append(points)
print(images1[:10])

[[0.08965749, 0.16262488, 0.10964388, 0.48641077, 0.10991519, 0.6557058, 0.13135603, 0.21011873, 0.13135603, 0.21011873, 0.13199008, 0.7846878, 0.17263263, 0.25095648, 0.17263263, 0.25095648, 0.2177844, 0.29640305, 0.2177844, 0.29640305, 0.21789008, 0.18847257, 0.26287317, 0.38496637, 0.26291466, 0.33757666, 0.26291466, 0.33757666, 0.26630318, 0.6449963, 0.26630318, 0.6449963, 0.67171186, 0.59596294, 0.67171186, 0.59596294, 0.30072716, 0.648775, 0.30100578, 0.37459448, 0.30100578, 0.37459448, 0.4872577, 0.6488536, 0.6616981, 0.47102264, 0.92976946, 0.8033959, 0.6318425, 0.63853854, 0.38839474, 0.63811475, 0.31162575, 0.5633372, 0.62235856, 0.3450105, 0.49489132, 0.3422241, 0.62008286, 0.1786581, 0.32780567, 0.6493351, 0.3334319, 0.5322725, 0.3334319, 0.5322725, 0.5857795, 0.7724321, 0.57808834, 0.33797324, 0.57620907, 0.3420842, 0.3432632, 0.49857536, 0.3432632, 0.49857536, 0.3432632, 0.49857536, 0.5720435, 0.4485537, 0.34940434, 0.6483741, 0.5069478, 0.29335088, 0.56859654, 0.6483788,

In [168]:
print(len(images1))
label_values1 = np.array(label_values).reshape(-1, 8)
print(len(label_values1))

4165
4165


In [170]:
# save to csv
import pandas as pd
# merge the images and label_values
df = pd.DataFrame(images1)
df1 = pd.DataFrame(label_values1, columns=['top_left_x', 'top_left_y', 'top_right_x', 'top_right_y', 'bottom_left_x', 'bottom_left_y', 'bottom_right_x', 'bottom_right_y'])
df = pd.concat([df, df1], axis=1)
df.to_csv('Datasets/images_with_labels.csv', index=False)


In [5]:
images = np.array(images).reshape(-1, 200)
# write label_values into one dimensional array
label_values = np.array(label_values).reshape(-1, 8)

images = images / 255

In [6]:
images.shape, label_values.shape

((4165, 256, 256), (4165, 8))

In [7]:
np.save('Datasets/images_norm.npy', images)
np.save('Datasets/label_values_norm.npy', label_values)

In [3]:
# load the half of the dataset from a file
#images = np.load('Datasets/images.npy')[:int(len(images)/4)]
#label_values = np.load('Datasets/label_values.npy')[:int(len(label_values)/4)]

images = np.load('Datasets/images_norm.npy')
label_values = np.load('Datasets/label_values_norm.npy')
splited_into_train_test = False


In [4]:
# normalize the label values
# remove bottom left and top right corners (save only 0,1,6,7 indexes)
# label_values = label_values[:, [0, 1, 6, 7]]

In [4]:
# compress the dataset by changing the types of the values
images = images.astype('bool')
label_values = label_values.astype('float16')

In [5]:
images.shape, label_values.shape

((4165, 256, 256), (4165, 8))

In [5]:
# split the dataset into training and testing
from sklearn.model_selection import train_test_split
splited_into_train_test = True
X_train, X_test, y_train, y_test = train_test_split(images, label_values, test_size=0.3, random_state=42)

In [6]:
from tensorflow import convert_to_tensor
if splited_into_train_test:
    X_test = convert_to_tensor(X_test)
    y_test = convert_to_tensor(y_test)
else:
    images = convert_to_tensor(images)
    label_values = convert_to_tensor(label_values)

In [6]:
#create a model with self-attention layer
from tensorflow.keras.layers import Layer
from tensorflow.keras import initializers, regularizers, constraints
from tensorflow.keras import activations
from tensorflow.keras import Input
from tensorflow.keras import Model

class SelfAttention(Layer):
    def __init__(self, units=32, **kwargs):
        super(SelfAttention, self).__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        self.W = self.add_weight(name="W", shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
        self.V = self.add_weight(name="V", shape=(self.units, 1), initializer="random_normal", trainable=True)
        self.U = self.add_weight(name="U", shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
        self.b = self.add_weight(name="b", shape=(self.units,), initializer="random_normal", trainable=True)
        super(SelfAttention, self).build(input_shape)

    def call(self, inputs):
        # calculate the attention weights
        u = K.tanh(K.dot(inputs, self.W) + self.b)
        u = K.dot(u, self.V)
        u = K.softmax(u, axis=1)
        # calculate the context vector
        c = K.dot(inputs, self.U)
        c = K.batch_dot(u, c, axes=[1, 1])
        return c

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

    def get_config(self):
        return super(SelfAttention, self).get_config()
    
    @classmethod
    def from_config(cls, config):
        return cls(**config)
    
    def get_config(self):
        return super().get_config()


In [7]:
#initialize the model with the self-attention layer
model = Sequential()
model.add(Conv2D(32, (3, 3), activation="relu", input_shape=(256, 256, 1), padding="same"))
model.add(Conv2D(64, (3, 3), activation="relu", padding="same"))
model.add(SelfAttention(64))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(8, activation='linear'))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=[RootMeanSquaredError()])
if splited_into_train_test:
    model.fit(X_train, y_train, epochs=15, batch_size=32, validation_data=(X_test, y_test))
else:
    model.fit(images, label_values, epochs=15, batch_size=32)


Epoch 1/15

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15

KeyboardInterrupt: 

In [13]:
# Implement the CNN regression model for predicting the four points (x,y) for cropping the 256x256 image
# MaxPooling2D((2,2)) # AveragePooling2D((2,2)) # Lambda(lambda x: l2_norm(x))

model = Sequential()
model.add(Conv2D(32, (5, 5), activation="relu", input_shape=(256, 256, 1), padding="same", strides=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(32, (5, 5), activation="relu", padding="same", strides=(2, 2)))
model.add(BatchNormalization())
model.add(AveragePooling2D((2, 2), strides=2, padding='valid'))
model.add(Dropout(0.75))
model.add(Conv2D(64, (3, 3), activation="relu", padding="same"))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation="relu", padding="same"))
model.add(BatchNormalization())
model.add(AveragePooling2D((2, 2), strides=2, padding='valid'))
model.add(Dropout(0.75))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.75))
model.add(Dense(8, activation='linear'))


model.compile(loss='mean_squared_error', optimizer='adam', metrics=[RootMeanSquaredError()])
if splited_into_train_test:
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))
else:
    model.fit(images, label_values, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [19]:
# save the model
from tensorflow.keras.models import save_model
save_model(model, 'CNN_cropping_model_4.keras', save_format='h5')

v2 - preproces: exp_interp=True

v3 - preproces: exp_interp=False, GPU

In [21]:
from local_package.ConvNN import preproces
from tensorflow.keras.models import load_model
loaded_model = load_model('CNN_cropping_model_4.keras', custom_objects={'SelfAttention': SelfAttention})

In [23]:
#test the model
img_path = './scrapped_images1/8_4.jpg'
image = cv2.imread(img_path)
preprocessed_image = preproces(img_path)
predicted_points = loaded_model.predict((np.array([preprocessed_image]) / 255).astype('bool'))
#predicted_points = np.where(predicted_points < 0.5, predicted_points*2, (predicted_points-0.5)*2)
#predicted_points = np.array([0,0,1,0,0,1,1,1])
predicted_points = predicted_points.reshape(-1, 2)
print(predicted_points)
# denormalize the points
'''
print(predicted_points)

left_top = [predicted_points[0][0] / 2, predicted_points[0][1] / 2]
right_top = [(predicted_points[1][0]/2)+0.5, predicted_points[1][1] / 2]
left_bottom = [predicted_points[2][0] / 2, (predicted_points[2][1]/2)+0.5]
right_bottom = [(predicted_points[3][0]/2)+0.5, (predicted_points[3][1]/2)+0.5]


predicted_points = np.array([left_top, right_top, left_bottom, right_bottom])
print(predicted_points)
predicted_points[:, 0] *= image.shape[1]
predicted_points[:, 1] *= image.shape[0]
predicted_points = predicted_points.astype(np.int32)
'''
if max(image.shape) > 1000:
    image = cv2.resize(image, (int(image.shape[1]*0.25), int(image.shape[0]*0.25)))
#predicted_points[:, 0] /= 255
predicted_points[:, 0] *= image.shape[1]
#predicted_points[:, 1] /= 255
predicted_points[:, 1] *= image.shape[0]
predicted_points = predicted_points.astype(np.int32)
print(predicted_points)
print(image.shape)

preprocessed_image = cv2.cvtColor(preprocessed_image, cv2.COLOR_GRAY2BGR)
preprocessed_image = cv2.resize(preprocessed_image, (image.shape[1], image.shape[0]))
# draw the points on the image
for i, point in enumerate(predicted_points):
    cv2.circle(image, (point[0], point[1]), 5, (0, 0, 255), -1)
    cv2.circle(preprocessed_image, (point[0], point[1]), 5, (0, 0, 255), -1)


cv2.imshow('test_image', image)
cv2.imshow('prerocessed_image', preprocessed_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


[[0.29192832 0.23424691]
 [0.73803234 0.23270541]
 [0.28690824 0.73582864]
 [0.7333375  0.7354136 ]]
[[175 140]
 [442 139]
 [172 441]
 [440 441]]
(600, 600, 3)


In [12]:

image = cv2.imread(img_path)
predicted_points = model.predict(np.array([preproces(img_path, expodential_interpolation=False)]) / 255.0)
predicted_points = predicted_points.reshape(-1, 2)
print(predicted_points)
# denormalize the x points
predicted_points[:, 0] /= 255.0
predicted_points[:, 0] *= image.shape[1]
# denormalize the y points
predicted_points[:, 1] /= 255.0
predicted_points[:, 1] *= image.shape[0]
# convert the points to integer
predicted_points = predicted_points.astype(np.int32)

print(predicted_points)
# draw the points on the image
for point in predicted_points:
    cv2.circle(image, (point[0], point[1]), 5, (0, 0, 255), -1)

# # trim the image to the edges
# image = image[predicted_points[0][1]:predicted_points[3][1], predicted_points[0][0]:predicted_points[1][0]]
# cv2.imshow('test_image', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# top_left, top_right, bottom_left, bottom_right = trim_to_edges(image,'20_')
# # move points from the center by a margin
# # margin = 60
# # top_left = (top_left[0] + margin, top_left[1] + margin)
# # top_right = (top_right[0] - margin, top_right[1] + margin)
# # bottom_left = (bottom_left[0] + margin, bottom_left[1] - margin)
# # bottom_right = (bottom_right[0] - margin, bottom_right[1] - margin)

# image = crop_with_perspective(image, top_left, top_right, bottom_left, bottom_right)

# # return the image to the original aspect ratio
# max_height = predicted_points[3][1] - predicted_points[0][1]
# max_width = predicted_points[1][0] - predicted_points[0][0]
# image = cv2.resize(image, (0,0), fx=1, fy=max_height/max_width if max_height > max_width else max_width/max_height)
cv2.imshow('test_image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

[[0.07005253 0.05350221]
 [0.8757026  0.0530272 ]
 [0.07094659 0.89496064]
 [0.871609   0.8950962 ]]
[[0 0]
 [2 0]
 [0 2]
 [2 2]]


In [126]:
img_path = './scrapped_images1/8_12.jpg'
import time
# count the time for preprocessing the image
#preprocessed_image = preproces(image_path, expodential_interpolation=False)
source_image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
time_start = time.time()
# # remove jpg noise
# image = cv2.Canny(image, 8, 32, L2gradient=False)
# image = cv2.fastNlMeansDenoising(source_image, None, 10, 7, 21)
# #image = cv2.GaussianBlur(image,(3,3),20)
# # cv2.imshow('preprocessed_image', image)
# # cv2.waitKey(0)
# # cv2.destroyAllWindows()
# se = cv2.getStructuringElement(cv2.MORPH_RECT, (8,8))
# bg = cv2.morphologyEx(image, cv2.MORPH_DILATE, se)
# image = cv2.divide(image, bg, scale=255)
#image = cv2.equalizeHist(image)
# cv2.imshow('preprocessed_image', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
#image = cv2.threshold(source_image, 0, 255, cv2.THRESH_OTSU)[1]
#image = cv2.GaussianBlur(image,(5,5),1)
# make one more canny edge detection with different parameters
image = source_image
image = cv2.equalizeHist(image)
image = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
image = cv2.Canny(image, 8, 32, L2gradient=False)
image2 = cv2.Canny(source_image, 50, 150, L2gradient=False)

#combine the two edge detection results
image = cv2.bitwise_or(image, image2)
# new_image = np.zeros((256, 256), dtype=np.uint8)
# for x in range(128):
#     for y in range(128):
#         #write the pixels with quad growth so for the 128 pixel was the same as len(image)/2 pixels
#         half_height = len(image)//2
#         half_width = len(image[0])//2
#         a_x = (half_height*11.25-half_height*8)/(54720)
#         b_x = half_height/180-a_x*90
#         old_x = int(a_x*x**2+b_x*x)
#         a_y = (half_width*11.25-half_width*8)/(54720)
#         b_y = half_width/180-a_y*90
#         old_y = int(a_y*y**2+b_y*y)

#         # calculate the new pixels color using interpolation
#         new_image[x][y] = image[old_x][old_y]
#         new_image[255-x][255-y] = image[len(image)-1-old_x][len(image[0])-1-old_y]
#         new_image[255-x][y] = image[len(image)-1-old_x][old_y]
#         new_image[x][255-y] = image[old_x][len(image[0])-1-old_y]
# image = new_image
# lines = cv2.HoughLinesP(image, 1, np.pi/180, threshold=1, minLineLength=1, maxLineGap=100)
# canvas_edges = []
# for line in lines:
#     x1, y1, x2, y2 = line[0]
#     canvas_edges.append((x1, y1))
#     canvas_edges.append((x2, y2))
# x, y, w, h = cv2.boundingRect(np.array(canvas_edges))
# padding = 40
# image = cv2.rectangle(image, (x+padding, y+padding), (x+w-padding, y+h-padding), (0,0,0), -1)
time = time.time() - time_start
print(time)
cv2.imshow('preprocessed_image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()



0.154557466506958


In [16]:

image = crop_with_perspective(image, predicted_points[0], predicted_points[1], predicted_points[2], predicted_points[3])

# return the image to the original aspect ratio
max_height = predicted_points[3][1] - predicted_points[0][1]
max_width = predicted_points[1][0] - predicted_points[0][0]
image = cv2.resize(image, (0,0), fx=1, fy=max_height/max_width if max_height > max_width else max_width/max_height)
#image = cv2.resize(image, (0,0), fx=0.5, fy=0.5)
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Cropping test

In [79]:
from local_package.Cropping import trim_to_edges, crop_with_perspective

In [28]:
test_image_path = './scrapped_images1/19_12.jpg'
test_image = cv2.imread(test_image_path)
if max(test_image.shape) > 1000:
    test_image = cv2.resize(test_image, (0,0), fx=0.25, fy=0.25)
points = trim_to_edges(test_image, os.path.basename(test_image_path))
for point in points:
    cv2.circle(test_image, (point[0], point[1]), 5, (0, 0, 255), -1)
test_image = crop_with_perspective(test_image, points[0], points[1], points[2], points[3])
cv2.imshow('test_image', test_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [25]:
def preproces_(img_path, expodential_interpolation=True):
    source_image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if not expodential_interpolation:
        source_image = cv2.resize(source_image, (256, 256))

    #create a checker texture
    checker = np.zeros((256, 256), dtype=np.uint8)
    for x in range(256):
        for y in range(256):
            if (x//16+y//16)%2==0:
                checker[x][y] = 255
    image = checker
    image = source_image

    # image = cv2.equalizeHist(source_image)
    # image = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
    # image = cv2.Canny(image, 8, 32, L2gradient=False)
    # image2 = cv2.Canny(source_image, 50, 150, L2gradient=False)
    # #combine the two edge detection results
    # image = cv2.bitwise_or(image, image2)
    
    #write into a new 256x256 image pixels from the original image starting from the edges on the image
    if expodential_interpolation:
        new_image = np.zeros((256, 256), dtype=np.uint8)
        for x in range(128):
            for y in range(128):
                #write the pixels with quad growth so for the 128 pixel was the same as len(image)/2 pixels
                half_height = len(image)//2
                half_width = len(image[0])//2
                a_x = (half_height*11.25-half_height*8)/(54720)
                b_x = half_height/180-a_x*90
                old_x = int(a_x*x**2+b_x*x)
                a_y = (half_width*11.25-half_width*8)/(54720)
                b_y = half_width/180-a_y*90
                old_y = int(a_y*y**2+b_y*y)

                # calculate the new pixels color using interpolation
                new_image[x][y] = image[old_x][old_y]
                new_image[255-x][255-y] = image[len(image)-1-old_x][len(image[0])-1-old_y]
                new_image[255-x][y] = image[len(image)-1-old_x][old_y]
                new_image[x][255-y] = image[old_x][len(image[0])-1-old_y]
        image = new_image
    return image 

In [27]:
orig_image = cv2.imread('./scrapped_images1/19_12.jpg')
image = preproces_("./scrapped_images1/19_12.jpg", expodential_interpolation=True)
image = cv2.resize(image, (0,0), fx=2, fy=2)
if max(orig_image.shape) > 1000:
    orig_image = cv2.resize(orig_image, (0,0), fx=0.25, fy=0.25)
cv2.imshow('image', image)
cv2.imshow('orig_image', orig_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [54]:

IMG_RESOLUTION = (256, 256)
def preproces(img_path, show=False):
    source_image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    # create a checker texture
    # checker = np.zeros(IMG_RESOLUTION, dtype=np.uint8)
    # for x in range(IMG_RESOLUTION[0]):
    #     for y in range(IMG_RESOLUTION[1]):
    #         if (x//16+y//16)%2==0:
    #             checker[x][y] = 255
    # source_image = checker

    image = cv2.equalizeHist(source_image)

    #write into a new 256x256 image pixels from the original image starting from the edges on the image
    new_image = np.zeros((256, 256), dtype=np.uint8)
    for x in range(128):
        for y in range(128):
            #write the pixels with quad growth so for the 128 pixel was the same as len(image)/2 pixels
            half_height = len(image)//2
            half_width = len(image[0])//2
            a_x = (half_height*11.25-half_height*8)/(54720)
            b_x = half_height/180-a_x*90
            old_x = int(a_x*x**2+b_x*x)
            a_y = (half_width*11.25-half_width*8)/(54720)
            b_y = half_width/180-a_y*90
            old_y = int(a_y*y**2+b_y*y)

            # calculate the new pixels color using interpolation
            new_image[x][y] = image[old_x][old_y]
            new_image[255-x][255-y] = image[len(image)-1-old_x][len(image[0])-1-old_y]
            new_image[255-x][y] = image[len(image)-1-old_x][old_y]
            new_image[x][255-y] = image[old_x][len(image[0])-1-old_y]
    image = new_image
    #show image
    if show:
        cv2.imshow("image", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return image

In [61]:

img_path = './scrapped_images1/19_29.jpg'
image = preproces(img_path,True)

In [60]:
img = cv2.imread(img_path)
img = cv2.resize(img, (0,0), fx=0.25, fy=0.25)
cv2.imshow('image', img)
cv2.waitKey(0)

-1