In [1]:
import numpy as np
import pandas as pd
from keras.applications.vgg16 import VGG16
from keras.utils.vis_utils import plot_model
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.models import Model
from keras.optimizers import SGD
from keras.utils import np_utils
from sklearn.metrics import classification_report

Using TensorFlow backend.


In [4]:
# read data from google drive
! pip install pydrive
# these classes allow you to request the Google drive API
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive 
from google.colab import auth 
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
file_id = '1CZ96d936zVKksp2tlEyITAE9jdnjybnL'
downloaded = drive.CreateFile({'id': file_id})
# allows you to temporarily load your file in the notebook VM

# assume the file is called file.csv and it's located at the root of your drive
downloaded.GetContentFile('train.json')



In [0]:
# Import the data
data = pd.read_json("train.json")

In [0]:
# training data (for now using only band_1 for convolution)
# labels are in "is_iceberg" column where 0 value indicates a ship while 1 indicates iceberg
X_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in data["band_1"]])
X_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in data["band_2"]])
channel_3 = X_band_1 + X_band_2
new_data = np.concatenate([X_band_1[:, :, :, np.newaxis],
                             X_band_2[:, :, :, np.newaxis],
                             channel_3[:, :, :, np.newaxis]], axis=-1)

targets = data["is_iceberg"]

# split in test and train
split = np.array_split(new_data, 10, axis=0)
X_train = np.concatenate(split[0:8], axis=0)
X_test = np.concatenate(split[8:10], axis=0)
y_train = np.concatenate(np.array_split(targets, 10, axis=0)[0:8], axis=0)
Y_test = np.concatenate(np.array_split(targets, 10, axis=0)[8:10], axis=0)


# to one-hot vectors
y_train = np_utils.to_categorical(y_train, num_classes=2)
y_test = np_utils.to_categorical(Y_test, num_classes=2)


In [7]:
# some stats
print("Shape of the input image : ", new_data[0].shape)

Shape of the input image :  (75, 75, 3)


In [0]:
# image generator generating image tensors from the data
gen = ImageDataGenerator(horizontal_flip=True,
                         vertical_flip=True,
                         width_shift_range=2,
                         height_shift_range=2,
                         channel_shift_range=0,
                         zoom_range=0.2,
                         rotation_range=10)

gen_op = gen.flow(x=X_train, y=y_train, batch_size=10, seed=10)
gen_val = gen.flow(x=X_test, y=y_test, batch_size=10, seed=10)

In [0]:
def getModel():
    base_model = VGG16(weights='imagenet', include_top=False,
                       input_shape=X_train.shape[1:], classes=2)
    x = base_model.get_layer("block5_pool").output
    x = GlobalAveragePooling2D()(x)

    # add a fully conne3cted layer
    x = Dense(512, activation='relu', name="dense1")(x)
    x = Dropout(rate=0.2)(x)
    x = Dense(512, activation="relu", name="dense2")(x)
    x = Dropout(rate=0.3)(x)
    predictions = Dense(2, activation='softmax', name="output")(x)
    
    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional VGG16 layers
    for layer in base_model.layers:
        layer.trainable = False

    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='binary_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    return model

In [38]:
model = getModel()


# fit the data on model
model.fit_generator(generator=gen_op,
                    steps_per_epoch=50,
                    epochs=5,
                    validation_data=gen_val,
                    validation_steps=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f80672246d8>

In [0]:
predictions = model.predict(X_test, batch_size=10)
y_pred = np.argmax(predictions, axis=1)

In [40]:
print(classification_report(Y_test, y_pred))

             precision    recall  f1-score   support

          0       0.81      0.85      0.83       211
          1       0.68      0.61      0.65       109

avg / total       0.77      0.77      0.77       320

