In [66]:
# from google.colab import drive
# drive.mount('/content/drive/')

In [67]:
SIZE = 256
EPOCH = 200
BATCH_SIZE = 16
test_ratio = 0.2
INPUT_SHAPE = (SIZE, SIZE, 3) 
MODEL_NAME = '21_1121.h5'
TB_FILE = '21_1121'

In [68]:
from keras.callbacks import TensorBoard
tensorboard = TensorBoard(log_dir="logs\\{}".format(TB_FILE))

In [69]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.utils import np_utils
plt.style.use('classic')

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Activation, Dropout, Flatten, Dense

import os
import cv2
from PIL import Image
import numpy as np

In [70]:
# load train data

# image_directory = '/content/drive/MyDrive/Dataset/'
image_directory = 'archive/train/'
dataset = []
label = [] 

badimgs = os.listdir(image_directory + 'not-good/')
for i, image_name in enumerate(badimgs):   
    if (image_name.split('.')[1] == 'png'):
        image = cv2.imread(image_directory + 'not-good/' + image_name)
        image = cv2.resize(image , (SIZE, SIZE))
        image = image/255.0
        dataset.append(image)
        label.append(1)


goodimgs = os.listdir(image_directory + 'good/')
for i, image_name in enumerate(goodimgs):
    if (image_name.split('.')[1] == 'png'):
        image = cv2.imread(image_directory + 'good/' + image_name)
        image = cv2.resize(image , (SIZE, SIZE))
        image = image/255.0
        dataset.append(image)
        label.append(0)


dataset = np.array(dataset)
label = np.array(label)

print(dataset.shape)
print(label.shape)

(300, 256, 256, 3)
(300,)


In [71]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(dataset, label, test_size = test_ratio, random_state = 0) 

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test)

(240, 256, 256, 3)
(240,)
(60, 256, 256, 3)
[0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0]


In [72]:
# oversampling
from imblearn.combine import SMOTETomek
from imblearn.over_sampling import RandomOverSampler

# Create an oversampler
# smk = SMOTETomek(random_state=42)
ovs = RandomOverSampler(random_state=0)

dataset2d = X_train.reshape(X_train.shape[0], -1)

# Fit and transform the data
dataset1, y_train = ovs.fit_resample(dataset2d, y_train)
X_train = dataset1.reshape(dataset1.shape[0], SIZE, SIZE, 3)

print(X_train.shape)
print(y_train.shape)
# print(y_train)

(398, 256, 256, 3)
(398,)


In [73]:
# data augmentation
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rotation_range=360)
datagen.fit(X_train)

In [74]:
import tensorflow as tf

# base_model = tf.keras.applications.mobilenet_v2.MobileNetV2(
#     weights='imagenet',
#     include_top=False,
#     pooling='avg'
# )

base_model = tf.keras.applications.ResNet50(
    weights='imagenet',
    include_top=False,
    pooling='avg'
)

base_model.trainable = False 

model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.Dense(1024, activation='relu'), 
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation='relu'),
    # add regularization l2 to dense layer
    # tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),        
    tf.keras.layers.Dense(1, activation='sigmoid')
])


model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 dense_11 (Dense)            (None, 1024)              2098176   
                                                                 
 dropout_2 (Dropout)         (None, 1024)              0         
                                                                 
 dense_12 (Dense)            (None, 128)               131200    
                                                                 
 dense_13 (Dense)            (None, 1)                 129       
                                                                 
Total params: 25,817,217
Trainable params: 2,229,505
Non-trainable params: 23,587,712
_________________________________________________________________


In [75]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [76]:
history = model.fit(datagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
                    steps_per_epoch=len(X_train) / BATCH_SIZE,
                    epochs=EPOCH,
                    validation_data=(X_test, y_test),
                    callbacks=[tensorboard],
                    )


model.save(MODEL_NAME)

train_score = model.evaluate(X_train, y_train, verbose=0)
test_score = model.evaluate(X_test, y_test, verbose=0)
print('Train Loss:{0:.3f}'.format(train_score[0]))
print('Train accuracy:{0:.3}'.format(train_score[1]))
print('Test Loss:{0:.3f}'.format(test_score[0]))
print('Test accuracy:{0:.3}'.format(test_score[1]))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200

KeyboardInterrupt: 

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plt.plot(epochs, acc, 'y', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# calculate precision and recall
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, confusion_matrix

y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)

print('Accuracy: {:.2f}'.format(accuracy_score(y_test, y_pred)))
print('Precision: {:.2f}'.format(precision_score(y_test, y_pred)))
print('Recall: {:.2f}'.format(recall_score(y_test, y_pred)))
print('F1: {:.2f}'.format(f1_score(y_test, y_pred)))
print('ROC AUC: {:.2f}'.format(roc_auc_score(y_test, y_pred)))
print('Confusion Matrix: ', confusion_matrix(y_test, y_pred))

In [None]:
mythreshold=0.5
from sklearn.metrics import confusion_matrix

y_pred = (model.predict(X_test)>= mythreshold).astype(int)
cm=confusion_matrix(y_test, y_pred)  
print(cm)

y_pred = (model.predict(dataset)>= mythreshold).astype(int)
cm=confusion_matrix(label, y_pred)  
print(cm)

In [None]:
test_image_directory = 'archive/'
test_dataset = []

testimg = os.listdir(test_image_directory + 'test/')
for i, image_name in enumerate(testimg):   
    if (image_name.split('.')[1] == 'png'):
        image = cv2.imread(test_image_directory + 'test/' + image_name)
        image = cv2.resize(image , (SIZE, SIZE))
        image = image/255.0
        test_dataset.append(image)

test_dataset = np.array(test_dataset)

In [None]:
from keras.models import load_model
# load model
model = load_model(MODEL_NAME)

In [None]:
mythreshold=0.5
test_y_preds = (model.predict(test_dataset)>= mythreshold).astype(int)
# save to csv
import pandas as pd
df = pd.DataFrame(test_y_preds)
df.to_csv('submission.csv', index=False)

