In [None]:
#Load necessary libraries
from glob import glob 
import numpy as np
import pandas as pd
import keras,cv2,os

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Activation
from keras.layers import Conv2D, MaxPool2D

from tqdm import tqdm_notebook,trange
import matplotlib.pyplot as plt

import gc #garbage collection, we need to save all the RAM we can

In [None]:
#paths to training and test data
path = "../input/" #adapt this path, when running locally
train_path = path + 'train/'
test_path = path + 'test/'

df = pd.DataFrame({'path': glob(os.path.join(train_path,'*.tif'))}) # load the filenames
df['id'] = df.path.map(lambda x: x.split('/')[3].split(".")[0]) # keep only the file names in 'id'
labels = pd.read_csv(path+"train_labels.csv") # reading the provided labels
df = df.merge(labels, on = "id") # merging labels and filepaths
df.head() 

In [None]:
#https://www.kaggle.com/gomezp/complete-beginner-s-guide-eda-keras-lb-0-93
def load_data(N,df):
    """ This functions loads N images using the data df
    """
    # allocate a numpy array for the images (N, 96x96px, 3 channels, values 0 - 255)
    X = np.zeros([N,96,96,3],dtype=np.uint8) 
    #convert the labels to a numpy array too
    y = np.squeeze(df.as_matrix(columns=['label']))[0:N]
    #read images one by one, tdqm notebook displays a progress bar
    for i, row in tqdm_notebook(df.iterrows(), total=N):
        if i == N:
            break
        X[i] = cv2.imread(row['path'])
          
    return X,y

In [None]:
#N = df["path"].size # get the number of images in the training data set
N=10000 #read only a sample of data
X,y = load_data(N=N,df=df)

In [None]:
#Collect garbage
positives_samples = None
negative_samples = None
gc.collect();

In [None]:
#Train test(validate) split [70:30]
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print(X_train.shape,X_val.shape,y_train.shape,y_val.shape)

A very simple network consisting of only has 7 layers, among which there are 3 convolutional layers (C1, C3 and C5), 2 sub-sampling (pooling) layers (S2 and S4), and 1 fully connected layer (F6), that are followed by the output layer. 

In [None]:
#build Model
#initialize the model
model = Sequential()

#CONV+Pool 1
model.add(Conv2D(64, 3, input_shape = (96, 96, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPool2D(pool_size =(2,2)))
model.add(Dropout(0.25))
          
#Conv+Pool 2
model.add(Conv2D(32, 3))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPool2D(pool_size =(2,2)))
model.add(Dropout(0.25))

#Conv+FC
model.add(Flatten())
model.add(Dense(28))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Dropout(0.25))

#O/P layer
#finally convert to values of 0 to 1 using the sigmoid activation function
model.add(Dense(1, activation = "sigmoid"))
          


In [None]:
#Compile model
model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adam(0.001), 
              metrics=['accuracy'])

In [None]:
#train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=3,batch_size=50)


In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='best')
plt.show()