In [60]:
# for loading/processing the images  
from keras.preprocessing.image import load_img 
from keras.preprocessing.image import img_to_array 
from keras.applications.vgg16 import preprocess_input 

# models 
from keras.applications.vgg16 import VGG16 
from keras.models import Model

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd

In [61]:
name = [str(i+1)+'_dot_gray.bmp' for i in range(100)]

In [62]:
path0 = os.getcwd()
i = 8 # pattern number
cf = str(i)
path1 = path0 + "/conventional_coated_1200dpi/config"+cf
path2 = path0 + "/conventional_unocated_1200dpi/config"+cf
path3 = path0 + "/Waterless_coated_1200dpi/config"+cf
path4 = path0 + "/Waterless_uncoated_1200dpi/config"+cf
path5 = path0 + "/laser_coated_600dpi/config"+cf
path6 = path0 + "/laser_uncoated_600dpi/config"+cf
PATHS = [path1,path2,path3,path4,path5,path6]

In [63]:
# load the model first and pass as an argument, remove the output layer
model = VGG16()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)

def extract_features(file, model):
    # load the image as a 224x224 array
    img = load_img(file, target_size=(224,224), interpolation='bicubic')
    # convert to numpy array
    img = np.array(img) 
    # reshape the data for the model reshape(num_of_samples, dim 1, dim 2, channels)
    reshaped_img = img.reshape(1,224,224,3)
    # prepare image for model
    imgx = preprocess_input(reshaped_img)
    # get the feature vector
    features = model.predict(imgx, use_multiprocessing=True)
    return features

# ------------- FEATURES EXTRACTION ------------

In [64]:
CONFIG = pd.DataFrame()
k = 0
for path in PATHS:  
    # change the working directory to the path where the images are located
    os.chdir(path)

    # this list holds all the image filename
    images = []
    # creates a ScandirIterator aliased as files
    with os.scandir(path) as files:
      # loops through each file in the directory
        for file in files:
            if any(file.name == s for s in name):
                # adds only the image files to the list
                images.append(file.name)
#     print(f'number of images:{len(images)}')
    data = {}
    p = path0
    # loop through each image in the dataset
    for im in images:
        # try to extract the features and update the dictionary
        feat = extract_features(im,model)
        data[im] = feat
    # get a list of the filenames
    filenames = np.array(list(data.keys()))

    # get a list of just the features
    feat = np.array(list(data.values()))
    # reshape so that there are N images of 4096 vectors
    feat = feat.reshape(-1,4096)
    feat = pd.DataFrame(data=feat)
    feat['label'] = k
    CONFIG = CONFIG.append(feat)
    k = k+1

In [65]:
data = CONFIG.loc[:, (CONFIG != CONFIG.iloc[0]).any()]
data = data.reset_index(drop=True)

# -----------classifications ------------

### split train_data and test_data

In [81]:
seed = 101
lb0 = data.loc[0:99]
lb1 = data.loc[100:199]
lb2 = data.loc[200:299]
lb3 = data.loc[300:399]
lb4 = data.loc[400:499]
lb5 = data.loc[500:599]

train_lb0 = lb0.sample(frac=0.60, random_state = seed)
train_lb1 = lb1.sample(frac=0.60, random_state = seed)
train_lb2 = lb2.sample(frac=0.60, random_state = seed)
train_lb3 = lb3.sample(frac=0.60, random_state = seed)
train_lb4 = lb4.sample(frac=0.60, random_state = seed)
train_lb5 = lb5.sample(frac=0.60, random_state = seed)
train = pd.concat([train_lb0, train_lb1, train_lb2, train_lb3, train_lb4, train_lb5])

test_lb0 = lb0.drop(train_lb0.index)
test_lb1 = lb1.drop(train_lb1.index)
test_lb2 = lb2.drop(train_lb2.index)
test_lb3 = lb3.drop(train_lb3.index)
test_lb4 = lb4.drop(train_lb4.index)
test_lb5 = lb5.drop(train_lb5.index)
test = pd.concat([test_lb0, test_lb1, test_lb2, test_lb3, test_lb4, test_lb5])

b = train.shape[1]
X_train = train.iloc[:,:(b-1)]
y_train = np.ravel(train.iloc[:,(b-1):b])

X_test = test.iloc[:,:(b-1)]
y_test= np.ravel(test.iloc[:,(b-1):b])

### SVM classification

In [82]:
from sklearn import svm, datasets
from sklearn.metrics import confusion_matrix
rbf = svm.SVC(kernel='rbf', gamma='scale', C=1, decision_function_shape='ovo').fit(X_train, y_train)

rbf_pred = rbf.predict(X_test)
# retrieve the accuracy
accuracy_rbf = rbf.score(X_test, y_test)
print('Accuracy Radial Basis Kernel:', accuracy_rbf)
# creating a confusion matrix
cm_rbf = confusion_matrix(y_test, rbf_pred)
print(cm_rbf)

Accuracy Radial Basis Kernel: 1.0
[[40  0  0  0  0  0]
 [ 0 40  0  0  0  0]
 [ 0  0 40  0  0  0]
 [ 0  0  0 40  0  0]
 [ 0  0  0  0 40  0]
 [ 0  0  0  0  0 40]]


### Random Forest classification

In [83]:
from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier(n_estimators=100, random_state=seed,
                           max_features ='auto',min_samples_split = 2)
#Train the model using the training sets
clf.fit(X_train,y_train)

y_pred=clf.predict(X_test)
accuracy = clf.score(X_test, y_test)
print('Accuracy:', accuracy)

cm = confusion_matrix(y_test, y_pred)
print(cm)

Accuracy: 1.0
[[40  0  0  0  0  0]
 [ 0 40  0  0  0  0]
 [ 0  0 40  0  0  0]
 [ 0  0  0 40  0  0]
 [ 0  0  0  0 40  0]
 [ 0  0  0  0  0 40]]
