In [None]:
#GENERAL
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
#PATH PROCESS
import os
import os.path
from pathlib import Path
import glob
# IMAGE PROCESS
import cv2
# MACHINE LEARNING
from sklearn.model_selection import train_test_split
import pennylane as qml
plt.style.use("dark_background")

# Pre Processing

To make the 227 x 227 pixel images compatible with quantum computing, we apply a threshold and downsample the image to be 8 x 8 pixels.


We have modified the preprocessing from: https://www.kaggle.com/brsdincer/surface-crack-detection-end-to-end-process by Baris Dincer.

In [None]:
Surface_Data = Path("crack_dataset")

In [None]:
Surface_JPG_Path = list(Surface_Data.glob(r"*/*.jpg"))

In [None]:
Surface_Labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],Surface_JPG_Path))

TO SERIES

In [None]:
Surface_JPG_Path_Series = pd.Series(Surface_JPG_Path,name="JPG").astype(str)
Surface_Labels_Series = pd.Series(Surface_Labels,name="CATEGORY")

TO DATAFRAME

In [None]:
Main_Surface_Data = pd.concat([Surface_JPG_Path_Series,Surface_Labels_Series],axis=1)

In [None]:
print(Main_Surface_Data.head(-1))

TO SHUFFLE

In [None]:
Main_Surface_Data = Main_Surface_Data.sample(frac=1).reset_index(drop=True)

In [None]:
print(Main_Surface_Data.head(-1))

# VISUALIZATION

LABELS

In [None]:
Positive_Surface = Main_Surface_Data[Main_Surface_Data["CATEGORY"] == "Positive"]
Negative_Surface = Main_Surface_Data[Main_Surface_Data["CATEGORY"] == "Negative"]

Positive_Surface = Positive_Surface.reset_index()
Negative_Surface = Negative_Surface.reset_index()

In [None]:
def simple_vision(path):
    figure = plt.figure(figsize=(8,8))
    
    Reading_Img = cv2.imread(path)
    Reading_Img = cv2.cvtColor(Reading_Img,cv2.COLOR_BGR2RGB)
    
    plt.xlabel(Reading_Img.shape)
    plt.ylabel(Reading_Img.size)
    plt.imshow(Reading_Img)


In [None]:
def greyscale_vision(path):
    figure = plt.figure(figsize=(8,8))
    
    Reading_Img = cv2.imread(path)
    Reading_Img = cv2.cvtColor(Reading_Img,cv2.IMREAD_GRAYSCALE)
    
    plt.xlabel(Reading_Img.shape)
    plt.ylabel(Reading_Img.size)
    plt.imshow(Reading_Img)


In [None]:
def threshold_vision(path):
    figure = plt.figure(figsize=(8,8))
    
    Reading_Img = cv2.imread(path)
    Reading_Img = cv2.cvtColor(Reading_Img,cv2.COLOR_BGR2RGB)
    _,Threshold_Img = cv2.threshold(Reading_Img,130,255,cv2.THRESH_BINARY_INV)
    
    plt.xlabel(Threshold_Img.shape)
    plt.ylabel(Threshold_Img.size)
    plt.imshow(Threshold_Img)

## SIMPLE VISION

In [None]:
simple_vision(Main_Surface_Data["JPG"][4])

In [None]:


simple_vision(Main_Surface_Data["JPG"][2])



### Positive Surfaces

In [None]:
figure,axis = plt.subplots(4,4,figsize=(10,10))

for indexing,operations in enumerate(axis.flat):
    
    Reading_Img = cv2.imread(Positive_Surface["JPG"][indexing])
    Reading_Img = cv2.cvtColor(Reading_Img,cv2.COLOR_BGR2RGB)
    
    operations.set_xlabel(Reading_Img.shape)
    operations.set_ylabel(Reading_Img.size)
    operations.imshow(Reading_Img)
    
plt.tight_layout()
plt.show()

### Negative Surfaces

In [None]:
figure,axis = plt.subplots(4,4,figsize=(10,10))

for indexing,operations in enumerate(axis.flat):
    
    Reading_Img = cv2.imread(Negative_Surface["JPG"][indexing])
    Reading_Img = cv2.cvtColor(Reading_Img,cv2.COLOR_BGR2RGB)
    
    operations.set_xlabel(Reading_Img.shape)
    operations.set_ylabel(Reading_Img.size)
    operations.imshow(Reading_Img)
    
plt.tight_layout()
plt.show()

## THRESHOLD VISION

We'll use threshold vision.

In [None]:
threshold_vision(Main_Surface_Data["JPG"][4])

### Positive Surfaces

In [None]:
figure,axis = plt.subplots(5,5,figsize=(10,10))

for indexing,operations in enumerate(axis.flat):
    
    Reading_Img = cv2.imread(Positive_Surface["JPG"][indexing], cv2.IMREAD_GRAYSCALE)
    _,Threshold_Img = cv2.threshold(Reading_Img,100,255,cv2.THRESH_BINARY_INV)
    # dsize must be an 8x8 for the PQC.
    dsize = (8, 8)

    # resize image
    Threshold_Img = cv2.resize(Threshold_Img, dsize,cv2.INTER_AREA)
    
    operations.set_xlabel(Threshold_Img.shape)
    operations.set_ylabel(Threshold_Img.size)
    operations.imshow(Threshold_Img)
    
plt.tight_layout()
plt.show()

### Negative Surfaces

In [None]:
figure,axis = plt.subplots(5,5,figsize=(10,10))

for indexing,operations in enumerate(axis.flat):
    Reading_Img = cv2.imread(Negative_Surface["JPG"][indexing], cv2.IMREAD_GRAYSCALE)
    _,Threshold_Img = cv2.threshold(Reading_Img,100,255,cv2.THRESH_BINARY_INV)
    # dsize must be an 8x8 for the PQC.
    dsize = (8, 8)

    # resize image
    Threshold_Img = cv2.resize(Threshold_Img, dsize,cv2.INTER_AREA)
    operations.set_xlabel(Threshold_Img.shape)
    operations.set_ylabel(Threshold_Img.size)
    operations.imshow(Threshold_Img)
    
plt.tight_layout()
plt.show()

We note approximately 5% of the images as having contradicting labels when downsampled.

We could follow in the footsteps of Farhi et al. to clean the data further we can discard all the positive and negative images that are misclassified by the downsampling.

In [None]:
def normalize_data(data):
    #Create Array of pixel value
    #data = cv2.imread(data,cv2.IMREAD_GRAYSCALE)
    if np.all(data) == 0:
        return data
    testdata=data
    arr_data=testdata.flatten()/max(testdata.flatten());
    encoding_data= np.array([np.round(x,6) for x in arr_data]);
    sum_const=np.sqrt(sum(encoding_data*encoding_data))
    encoding_norm=encoding_data/sum_const
    return encoding_norm

In [None]:
# figure,axis = plt.subplots(5,5,figsize=(10,10))
Reading_Img = cv2.imread(Negative_Surface["JPG"][1], cv2.IMREAD_GRAYSCALE)
_,Threshold_Img = cv2.threshold(Reading_Img,100,255,cv2.THRESH_BINARY_INV)
# dsize must be an 8x8 for the PQC.
dsize = (8, 8)

# resize image
Threshold_Img = cv2.resize(Threshold_Img, dsize,cv2.INTER_AREA)
figure,axis = plt.subplots(5,5,figsize=(10,10))
print(Threshold_Img)
plt.imshow(Threshold_Img)
normalize_data(Threshold_Img)

In [None]:
def preprocess_image(img_path: str) -> np.ndarray:
    """
    Applies a threshold, resize, then normalize.
    img_path: str =  image file path
    """
    # read image
    Reading_Img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    # apply threshold
    _,Threshold_Img = cv2.threshold(Reading_Img,100,255,cv2.THRESH_BINARY_INV)
    # dsize must be an 8x8 for the PQC.
    dsize = (8, 8)
    # resize image
    Threshold_Img = cv2.resize(Threshold_Img, dsize,cv2.INTER_AREA)
    # normalize image
    Threshold_Img = normalize_data(Threshold_Img)
    return Threshold_Img

We will apply this to both the negative and positive surface datasets.


In [None]:
# Apply preprocessing to get
Negative_Surface['Image_Array']=Negative_Surface['JPG'].apply(preprocess_image)

In [None]:
Positive_Surface['Image_Array']=Positive_Surface['JPG'].apply(preprocess_image)

In [None]:
Main_Surface_Data['Image_Array']=Main_Surface_Data['JPG'].apply(preprocess_image)

We will used the resized arrays as our training feature and the categories as our targets.

In [None]:
X = Main_Surface_Data['Image_Array']
Y = Main_Surface_Data['CATEGORY']

Split the data into test and training sets

In [None]:
trainX, testX, trainy, testy = train_test_split(X, Y, test_size=0.3, random_state=42)

In [None]:

trainy = tf.one_hot(trainy, depth=2)
testy = tf.one_hot(testy, depth=2)

In [None]:
trainy