In [1]:
'''For preprocessing images'''
import numpy as np
from PIL import Image
import scipy
import matplotlib.pyplot as plt
import csv
import glob
'''For CNN'''
import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, MaxPool2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import SGD, Adam, RMSprop
from keras.models import Model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model

import keras.backend as K
K.set_image_data_format('channels_last')
from matplotlib.pyplot import imshow

Using TensorFlow backend.


In [2]:
image_size = 96
train_data_size = 4000
test_data_size = 1000

In [3]:
def standardize(img):
    # padding
    longer_side = max(img.size)
    horizontal_padding = (longer_side - img.size[0]) / 2
    vertical_padding = (longer_side - img.size[1]) / 2
    img = img.crop(
        (
            -horizontal_padding,
            -vertical_padding,
            img.size[0] + horizontal_padding,
            img.size[1] + vertical_padding
        )
    )
    # resizing to standardized size
    img = img.resize([image_size,image_size],Image.ANTIALIAS) \
    # plt.imshow(img) # To see the image being standardized.
    
    # converting image to numpy array
    img.load()
    img = np.asarray(img, dtype="int32")
    return img

In [4]:
def function():
    for filename in glob.glob('input/subset_data/train/*.tif'):
        img =Image.open(filename)
        img = standardize(img)
        print(img.shape)
        return

In [5]:
function()

(96, 96, 3)


In [6]:
'''Loading data'''
def get_id_from_filename(filename):
    id = filename.split("/")[-1]
    id = id.split(".")[0]
    return id

In [7]:
def load_train():
    names = []
    # Change first number base on number of training examples
    X_train = np.empty((train_data_size,image_size,image_size,3), dtype="int32")
    Y_train = np.empty(shape=(train_data_size,2),dtype="int32")

    i = 0
    for filename in glob.glob('input/subset_data/train/*.tif'):
        names.append(get_id_from_filename(filename))
        img =Image.open(filename)
        img = standardize(img)
        X_train[i-1] = img
        i += 1
        
    with open('input/subset_data/train_labels_full.csv') as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        next(readCSV, None)
        for row in readCSV:
            name = row[0]
            if name in names:
                label = int(row[1])
                if label == 0:
                    Y_train[names.index(name)] = np.array([1,0]) # means 0
                elif label == 1:
                    Y_train[names.index(name)] = np.array([0,1]) # means 1
    return X_train,Y_train

In [8]:
def load_test():
    names = []
    # Change first number base on number of training examples
    X_test = np.empty((test_data_size,image_size,image_size,3), dtype="int32")
    Y_test = np.empty(shape=(test_data_size,2),dtype="int32")

    i = 0
    for filename in glob.glob('input/subset_data/test_with_outputs/*.tif'):
        names.append(get_id_from_filename(filename))
        img =Image.open(filename)
        img = standardize(img)
        X_test[i-1] = img
        i += 1
        
    with open('input/subset_data/train_labels_full.csv') as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        next(readCSV, None)
        for row in readCSV:
            name = row[0]
            if name in names:
                label = int(row[1])
                if label == 0:
                    Y_test[names.index(name)] = np.array([1,0]) # means 0
                elif label == 1:
                    Y_test[names.index(name)] = np.array([0,1]) # means 1
    return X_test,Y_test

In [9]:
X_train_orig,Y_train_orig = load_train()
X_test_orig,Y_test_orig = load_test()

# Normalizing for faster convergence
X_train = X_train_orig/255.
X_test = X_test_orig/255.
Y_train = Y_train_orig
Y_test = Y_test_orig

print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("X_test shape: ", X_test.shape)
print("Y_test shape: ", Y_test.shape)

X_train shape:  (4000, 96, 96, 3)
Y_train shape:  (4000, 2)
X_test shape:  (1000, 96, 96, 3)
Y_test shape:  (1000, 2)


In [10]:
X_train

array([[[[0.89019608, 0.89411765, 0.90196078],
         [0.89019608, 0.89411765, 0.90196078],
         [0.89019608, 0.89411765, 0.90196078],
         ...,
         [0.9254902 , 0.8627451 , 0.90588235],
         [0.88627451, 0.80392157, 0.8627451 ],
         [0.85098039, 0.76078431, 0.83137255]],

        [[0.89019608, 0.89411765, 0.90196078],
         [0.89019608, 0.89411765, 0.90196078],
         [0.89019608, 0.89411765, 0.90196078],
         ...,
         [0.83921569, 0.76862745, 0.83137255],
         [0.84313725, 0.76078431, 0.82745098],
         [0.83529412, 0.75294118, 0.82745098]],

        [[0.89019608, 0.89411765, 0.90196078],
         [0.89019608, 0.89411765, 0.90196078],
         [0.89019608, 0.89411765, 0.90196078],
         ...,
         [0.82745098, 0.75294118, 0.82745098],
         [0.82745098, 0.75294118, 0.83137255],
         [0.83529412, 0.76078431, 0.83921569]],

        ...,

        [[1.        , 0.96470588, 0.97254902],
         [1.        , 0.96470588, 1.        ]

In [12]:
# To check values inside.
# print(X_train)
# print(Y_train)
# print(X_test)
# print(Y_test)
X_test_orig = X_test*255.0
print(X_train_orig)

[[[[227. 228. 230.]
   [227. 228. 230.]
   [227. 228. 230.]
   ...
   [236. 220. 231.]
   [226. 205. 220.]
   [217. 194. 212.]]

  [[227. 228. 230.]
   [227. 228. 230.]
   [227. 228. 230.]
   ...
   [214. 196. 212.]
   [215. 194. 211.]
   [213. 192. 211.]]

  [[227. 228. 230.]
   [227. 228. 230.]
   [227. 228. 230.]
   ...
   [211. 192. 211.]
   [211. 192. 212.]
   [213. 194. 214.]]

  ...

  [[255. 246. 248.]
   [255. 246. 255.]
   [255. 233. 255.]
   ...
   [216. 186. 214.]
   [218. 188. 214.]
   [222. 190. 214.]]

  [[255. 243. 245.]
   [242. 215. 230.]
   [159. 124. 156.]
   ...
   [233. 202. 233.]
   [223. 189. 216.]
   [227. 191. 219.]]

  [[236. 211. 215.]
   [120.  89. 105.]
   [126.  84. 122.]
   ...
   [224. 193. 224.]
   [222. 187. 217.]
   [232. 193. 222.]]]


 [[[146. 118. 130.]
   [217. 189. 201.]
   [243. 215. 227.]
   ...
   [216. 195. 202.]
   [255. 248. 254.]
   [230. 213. 219.]]

  [[251. 225. 236.]
   [255. 239. 250.]
   [187. 161. 172.]
   ...
   [241. 220. 227.]
 

## Model tried and tested before:
https://towardsdatascience.com/image-classification-python-keras-tutorial-kaggle-challenge-45a6332a58b8

In [10]:
model = Sequential()

model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(image_size, image_size, 3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
#model.add(Dropout(0.3))
model.add(Dense(2, activation = 'softmax'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 94, 94, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 47, 47, 32)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 47, 47, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 45, 45, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 22, 22, 64)        0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 22, 22, 64)        256       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 20, 20, 64)        36928     
__________

In [12]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size = 32, epochs = 20, verbose = 1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

In [None]:
preds = model.evaluate(x = X_test, y = Y_test)
print()
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))