In [123]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
# import PIL
import matplotlib

from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.utils import np_utils
from keras.layers import BatchNormalization
from keras import regularizers
from keras.layers import ZeroPadding2D
# from keras.preprocessing import image

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img

# from PIL import Image

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import classification_report

# import matplotlib.pyplot as plt
# import matplotlib.image as mpimg

from imutils import paths

In [25]:
# set directory path
root_dir = os.path.abspath('C:\\Users\\LYC\\Desktop\\SFU\\Fall 2020 Courses\\STAT 440\\Module 3') 
data_dir = os.path.join(root_dir, 'tr') 

In [62]:
train = os.path.join(data_dir, 'img') # training img file directory

In [28]:
def loadImages(path):
    '''Put files into lists and return them as one list with all images 
     in the folder'''
    image_files = sorted([os.path.join(path, 'img', file)
                          for file in os.listdir(path + "/img")
                          if file.endswith('.jpg')])
    return image_files

img = loadImages(data_dir) # all the directories for training images

def load_test_Images(path):
    '''Put files into lists and return them as one list with all images 
     in the folder'''
    image_files = sorted([os.path.join(path, 'te', file)
                          for file in os.listdir(path + "/te")
                          if file.endswith('.jpg')])
    return image_files

In [29]:
def loadLbl(path):
    '''Put files into lists and return them as one list with all images 
     in the folder'''
    lbl_files = sorted([os.path.join(path, 'lbl', file)
                          for file in os.listdir(path + "/lbl")
                          if file.endswith('.txt')])
    return lbl_files

lbl = loadLbl(data_dir) # all the labels for training images

In [7]:
# create csv file, name column = image file names, label column = corresponding label

labels = []
for i in range(len(lbl)):
    img2 = img[i][-11:]
    label = open(lbl[i], "r").read()
    label = label.replace("\n", "")
    labels.append((img2, label))

labels = pd.DataFrame(labels, columns=['name', 'label'])
labels.to_csv('labels.csv', index=False)

In [110]:
train_data = pd.read_csv(os.path.join(root_dir, 'labels.csv'), dtype = 'str') # load the csv file

In [111]:
# Initialize ImageDataGenerator
# this will pull image from source folder, 
# then perform random transformations we initialized, 
# then feed the transformed image to the next step

aug = ImageDataGenerator(
    validation_split = 0.2, # creates validation set, no need to do train_test_split
    rescale=1./255.,
	rotation_range=30,
	zoom_range=0.15,
	width_shift_range=0.2,
	height_shift_range=0.2,
	shear_range=0.15,
	horizontal_flip=True,
	fill_mode="nearest")

In [112]:
# create pipeline for feeding training data from source folder to training model

train_generator=aug.flow_from_dataframe(dataframe=train_data,
                                            directory=(train), #directory for training images, IMPORTANT
                                            x_col='name',
                                            y_col='label',
                                            subset='training', # identify as training data
                                            batch_size=32, 
                                            seed=42,
                                            shuffle=True,
                                            class_mode='categorical',
                                            target_size=(56,56)) #

Found 1011 validated image filenames belonging to 6 classes.


In [113]:
# create pipeline for feeding validation data from source folder to training model
# the validation data will be split from training data automatically

valid_generator=aug.flow_from_dataframe(dataframe=train_data,
                                            directory=(train), #directory for training images, IMPORTANT
                                            x_col='name',
                                            y_col='label',
                                            subset='validation', #identify as validation data
                                            batch_size=32, 
                                            seed=42,
                                            class_mode='categorical',
                                            target_size=(56,56))

Found 252 validated image filenames belonging to 6 classes.


In [218]:
# model architecture

model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), strides=2, activation='relu',input_shape=(56,56,3)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Conv2D(32, kernel_size=(2, 2), strides=2, activation='relu'))
model.add(Conv2D(64, (2, 2), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(6, activation='softmax'))

In [219]:
model.compile(loss='categorical_crossentropy', optimizer=tf.optimizers.Adam(lr=0.001), metrics=['accuracy'])

In [246]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

model.fit(train_generator,
          steps_per_epoch=STEP_SIZE_TRAIN,
          validation_data=valid_generator,
          validation_steps=STEP_SIZE_VALID,
          epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x333029d0>

In [247]:
# prepare test data
#load test set
test = load_test_Images(root_dir)
#read and store test image
test_image = []
for i in test:
    pic = load_img(i, target_size=(56,56,1), grayscale=False)
    a = img_to_array(pic)
    a = a/255.
    a = a.astype('float32')
    test_image.append(a)
    
test = np.stack(test_image)

prediction = model.predict_classes(test)

In [248]:
prediction = prediction + 1 # for some reason prediction predicts from 0 - 5 instead of 1 - 6, so we add 1 to each prediction

In [249]:
# create datafame with prediction and test picture IDs
from pandas import *
idx = pd.Series(Int64Index(range(1264,2528)))
prediction = pd.Series(prediction)

data = concat([idx,prediction],axis = 1)
data.columns = ['Id',"Prediction"]

In [250]:
#creating submission file
data.to_csv('kaggle_submission.csv', header=True, index=False)

In [251]:
# save the model to directory
model.save(root_dir)

INFO:tensorflow:Assets written to: C:\Users\LYC\Desktop\SFU\Fall 2020 Courses\STAT 440\Module 3\assets
