In [1]:
import shutil
import os

# Load the images dataset

In [2]:
source = 'images'

# Data Processing

In [None]:
# Prepare the directory structure for the algorithm
train_dest1 = 'MLU_MPC_Data_With_Dropout/training_set/Class0'
train_dest2 = 'MLU_MPC_Data_With_Dropout/training_set/Class1'
valid_dest1 = 'MLU_MPC_Data_With_Dropout/validation_set/Class0'
valid_dest2 = 'MLU_MPC_Data_With_Dropout/validation_set/Class1'
test_dest1 = 'MLU_MPC_Data_With_Dropout/test_set/Class0'
test_dest2 = 'MLU_MPC_Data_With_Dropout/test_set/Class1'

In [16]:
import pandas as pd
# Load the csv file that lists the image id's and their respective labels that can be used for training and validation
training_original_df = pd.read_csv('training.csv')

In [4]:
# Load the csv file that lists the image id's only for the test data set
test_df = pd.read_csv('public_test_features.csv')

In [5]:
imagefiles = os.listdir(source) 

In [6]:
from sklearn.model_selection import train_test_split

In [None]:
training_original_df.head()

In [None]:
img_train, img_valid = train_test_split(training_original_df, test_size = 0.25, random_state = 42)

In [None]:
img_train.head()

In [None]:
img_valid.head()

In [None]:
img_train.shape


In [None]:
img_valid.shape

In [None]:
# Move the training image set to its corresponding folder structure
for index, row in img_train.iterrows():
    imgid = row['ID']
    imgclass = row['class']
    filename = str(imgid)+'.png'
    for f in imagefiles:
        if f == filename:
            if imgclass == 0:
                shutil.copy(source+'/'+f,train_dest1)
            elif imgclass == 1:
                shutil.copy(source+'/'+f,train_dest2)
        

In [None]:
# Move the validation image set to its corresponding folder structure
for index, row in img_valid.iterrows():
    imgvid = row['ID']
    imgvclass = row['class']
    filenamev = str(imgvid)+'.png'
    for fv in imagefiles:
        if fv == filenamev:
            if imgvclass == 0:
                shutil.copy(source+'/'+fv,valid_dest1)
            elif imgvclass == 1:
                shutil.copy(source+'/'+fv,valid_dest2)
        

In [1]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# The Network

In [4]:
# Build the network

classifier = Sequential()
classifier.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape = (64, 64, 3)))
classifier.add(Conv2D(32, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.25))
 
classifier.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.25))
 
classifier.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.25))

classifier.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.25))

 
classifier.add(Flatten())
classifier.add(Dense(units = 256, activation = 'relu'))
classifier.add(Dropout(0.5))
classifier.add(Dense(units = 1, activation = 'sigmoid'))

In [5]:
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [6]:
# Visualize the network
classifier.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 62, 62, 32)        9248      
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 31, 31, 64)        18496     
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 29, 29, 64)        36928     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 14, 14, 64)        0         
__________

In [7]:
# Part 2 - Fitting the CNN to the images
from keras.preprocessing.image import ImageDataGenerator

# Data Augmentation

In [8]:

# Train and test data generators
train_datagen = ImageDataGenerator(rescale = 1./255,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip = True,
vertical_flip=False)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [9]:
training_set = train_datagen.flow_from_directory('MLU_MPC_Data_With_Dropout/training_set',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')

Found 7519 images belonging to 2 classes.


In [10]:
test_set = test_datagen.flow_from_directory('MLU_MPC_Data_With_Dropout/validation_set',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')

Found 2507 images belonging to 2 classes.


In [11]:
classifier.fit_generator(training_set,
steps_per_epoch = 7519,
epochs = 25,
validation_data = test_set,
validation_steps = 2507)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x14bdc1f2e48>

In [12]:
# Save/Load the model
import datetime

now = datetime.datetime.now()
curdate = now.strftime("%Y-%m-%d")
print(curdate)
#  %H:%M
classifier.save("CNN-Classifier-Dropout"+curdate+".h5")
#classifier.load("file_name.h5")

2018-05-28


# Test images

In [13]:
import numpy as np
from keras.preprocessing import image


In [14]:
# Prepare the submission file
submission_file = "submission6_porvakan.csv"
subcsv = open(submission_file, "w", newline='')
subcolumnTitleRow = "ID,class\n"
subcsv.write(subcolumnTitleRow)

9

In [None]:
# Predict the class for each individual image in the test data set
test_df = pd.read_csv('public_test_features.csv')

for index, row in test_df.iterrows():
    img_id = row['ID']
    sub_test_image = image.load_img('images/'+str(img_id)+'.png', target_size = (64, 64))
    sub_test_image = image.img_to_array(sub_test_image)
    sub_test_image = np.expand_dims(sub_test_image, axis = 0)
    #sub_test_image = final_datagen.flow(sub_test_image, batch_size=1)
    subresult = classifier.predict(sub_test_image)
    #training_set.class_indices
    subImageID = img_id
    subClass = int(subresult[0][0])
    subrow = str(subImageID) + "," + str(subClass) + "\n"
    subcsv.write(subrow)
subcsv.close()