In [1]:
import cv2
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from PIL import Image
from tqdm import tqdm_notebook
from random import shuffle

import os
import shutil

In [2]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras_tqdm import TQDMNotebookCallback
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.callbacks import Callback

Using TensorFlow backend.


In [3]:
# Data Augmentation
train_datagen = ImageDataGenerator(rescale=1/255., shear_range=0.2, zoom_range=0.2, horizontal_flip=True) 
val_datagen = ImageDataGenerator(rescale=1/255.)

In [4]:
batch_size = 24
train_generator = train_datagen.flow_from_directory(
        r'G:\Machine Learning\git_code\data\lunar-rock\DataSet\TrainImages',
        target_size=(56, 56),
        batch_size=batch_size,
        class_mode='binary')

# validation_generator = val_datagen.flow_from_directory(
#         r'G:\Machine Learning\git_code\data\lunar-rock\DataSet\ValImages',
#         target_size=(150, 150),
#         batch_size=batch_size,
#         class_mode='categorical')

##Found 20000 images belonging to 2 classes.
##Found 5000 images belonging to 2 classes.

Found 11998 images belonging to 2 classes.


In [5]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(56, 56, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 56, 56, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 7, 7, 128)         0         
__________

In [7]:
epochs = 2
lrate = 0.01
decay = lrate/epochs
# SGD - stochastic gradient descent optimizer with learnign rate of 0.01 and a momentum of 0.9
# Since we are having binary classification, we used 'binary_crossentropy'
sgd = SGD(lr=lrate, decay=decay, nesterov=False)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [8]:
## Callback for loss logging per epoch
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

history = LossHistory()

## Callback for early stopping the training
early_stopping = keras.callbacks.EarlyStopping(monitor='loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')

In [9]:
ratio = 0.2
n = 11998
fitted_model = model.fit_generator(
        train_generator,
        steps_per_epoch= int(n) // batch_size,
        epochs=2,
        callbacks=[TQDMNotebookCallback(leave_inner=True, leave_outer=True), early_stopping, history],
        verbose=0)

HBox(children=(IntProgress(value=0, description='Training', max=2, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='Epoch 0', max=499, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='Epoch 1', max=499, style=ProgressStyle(description_width='ini…




In [10]:
test_datagen = ImageDataGenerator(rescale=1/255.)
test_generator = test_datagen.flow_from_directory(
       r'G:\Machine Learning\git_code\data\lunar-rock\DataSet\Test',
       target_size=(56, 56),
       batch_size=batch_size,
       class_mode=None)+

Found 7534 images belonging to 1 classes.


In [11]:
test_generator.reset()
pred = model.predict_generator(test_generator, steps=7534/12, verbose=0)

In [12]:
predicted_class_indices = np.argmax(pred, axis=1)

In [13]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [14]:
import os
src_directory = 'G:\Machine Learning\git_code\data\lunar-rock\DataSet\Test\TestImages'
filenames = []
for file in os.listdir(src_directory):
    filenames.append(file)

In [15]:
result = pd.DataFrame(list(zip(filenames, predictions)), columns=['Image_File', 'Class'])

In [16]:
result = result.set_index('Image_File')
result.to_csv(r'lunar-rock/lunar_rock_submission_e2_56_new.csv')