In [1]:
import numpy as np
import pandas as pd
import sys; sys.path.append('../courses/deeplearning1/nbs/')
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
# if keras giving hard time
# !pip install --upgrade keras --user
# !pip install --upgrade tensorflow --user

## to do:
1. load into data frame
2. convert to numpy array
3. average images to make 3rd band
4. stack to array shape(#_ims, 75,75,3)
5. make train, test

In [3]:
train = pd.read_json('data/train.json')

In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1604 entries, 0 to 1603
Data columns (total 5 columns):
band_1        1604 non-null object
band_2        1604 non-null object
id            1604 non-null object
inc_angle     1604 non-null object
is_iceberg    1604 non-null int64
dtypes: int64(1), object(4)
memory usage: 75.2+ KB


In [5]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878361, -27.15416, -28.668615, -29.537971...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920305, -14.920363, -12.66633...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [6]:
# convert images to numpy array
def img_to_array(img_list): 
    return np.asanyarray(img_list, dtype=np.float32).reshape((75,75))
train.band_1 = train.band_1.apply(img_to_array)
train.band_2 = train.band_2.apply(img_to_array)

In [7]:
# create one_hot response
from keras.utils.np_utils import to_categorical
Y_tr = to_categorical(train.is_iceberg.values,2)

Using TensorFlow backend.


In [8]:
# learning globals -> for fast iter (need to be optimized)
tr_len = train.shape[0]
nb_classes = len(train.is_iceberg.unique())
batch_size = 16
epoch = 500
lr = 1e-2

In [9]:
# make channel # before dims -> theano dim ordering
# create training set of (train_len, 3, 75, 75)
ch_1 = np.vstack(train.band_1.values).reshape(tr_len,75,75)
ch_2 = np.vstack(train.band_1.values).reshape(tr_len,75,75)
# add 3rd channel of average input
ch_3 = ((ch_1 + ch_2) / 2)

X_tr = np.stack((ch_1,ch_2,ch_3),axis=3)

In [10]:
# create validation set for train and test
import random; ind = random.sample(range(tr_len), 300)

X_val = X_tr[ind]
Y_val = Y_tr[ind]

# make sure to remove the validation samples from the training set
X_tr = np.delete(X_tr,ind,0)
Y_tr = np.delete(Y_tr,ind,0)

# # reshape for theano dim leave for tf backend
# X_tr = np.reshape(X_tr,(X_tr.shape[0],3,75,75))
# # reshape for theano dim leave for tf backend
# X_val = np.reshape(X_val,(X_val.shape[0],3,75,75))

In [11]:
list(map(lambda x: x.shape, [X_tr,Y_tr,X_val,Y_val]))

[(1304, 75, 75, 3), (1304, 2), (300, 75, 75, 3), (300, 2)]

## Load resnet from keras
- iterate through layers to find best representation of dataset

In [12]:
# use functional api to work with applications
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Dropout, Flatten, Input
from keras.models import Model
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint,EarlyStopping


def FCBlock(x):
    x = Dense(4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    return x

def build_model():
    # get base model from 
    # change input shape based upon backend, tf == dim first
    inp = Input(shape=(75,75,3))
    base_model = VGG16(weights='imagenet', include_top=False, input_tensor=inp)
    
    # make base model untrainable -> will pass through because of functional api
    for layer in base_model.layers: layer.trainable=False
    
    x = base_model.output
    
    # flatten last conv layer
    x = Flatten()(x)
    
    # add fully connected layers
    for i in range(2): x = FCBlock(x)
    
    # add fully connected output
    predictions = Dense(2, activation='softmax')(x)

    return Model(input=base_model.input, output=predictions)

![vgg-structure](data/vgg16.png)

>using this network structure but instead of 1000 classes we're using is_iceberg

In [13]:
# # compile model to train last layers
# model = build_model()
# model.compile(optimizer=SGD(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])

# # fit model on sample data
# checkpointer = ModelCheckpoint(filepath='data/model/weights.hdf5',verbose=1, save_best_only=True)
# early_stop = EarlyStopping(monitor='val_loss', patience = 3)
# model.fit(X_tr, Y_tr,
#           batch_size=batch_size,
#           nb_epoch=epoch, 
#           validation_data=(X_val,Y_val), 
#           callbacks=[checkpointer])

In [21]:
# create model from scratch
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras.optimizers import Adam

#Building the model
def build_model():
    model=Sequential()

    #Conv Layer 1
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    #Conv Layer 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #Conv Layer 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #Conv Layer 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #Flatten the data for upcoming dense layers
    model.add(Flatten())

    #Dense Layers
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))

    #Dense Layer 2
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))

    #Sigmoid Layer
    model.add(Dense(2))
    model.add(Activation('softmax'))
    return model

In [22]:
model = build_model()
model.compile(loss='binary_crossentropy',
              optimizer=SGD(lr=0.001),
              metrics=['accuracy'])

In [28]:
# load the most recent weights
model.load_weights('data/model/weights_custom_model1_run1.hdf5')

In [29]:
checkpointer = ModelCheckpoint(filepath='data/model/weights_custom_model1_run2.hdf5',
                               verbose=1, save_best_only=True)
early_stop = EarlyStopping(monitor='val_loss', patience = 3)
model.fit(X_tr,Y_tr,
          batch_size=12,
          epochs=100,
          verbose=1,
          validation_data=(X_val, Y_val),
         callbacks=[checkpointer, early_stop])

Train on 1304 samples, validate on 300 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


<keras.callbacks.History at 0x7fdf235b8e90>

In [30]:
# load test data and resize for predictions
test = pd.read_json('data/test.json')

# conver to array
test.band_1 = test.band_1.apply(img_to_array)
test.band_2 = test.band_2.apply(img_to_array)
# make channel # before dims -> theano dim ordering
# create training set of (train_len, 3, 75, 75)
tst_len = test.shape[0]
ch_1 = np.vstack(test.band_1.values).reshape(tst_len,75,75)
ch_2 = np.vstack(test.band_1.values).reshape(tst_len,75,75)
# add 3rd channel of average input
ch_3 = ((ch_1 + ch_2) / 2)

X_test = np.stack((ch_1,ch_2,ch_3),axis=3)

In [31]:
# choose model weights and create predictions 
model.load_weights('data/model/weights_custom_model1_run2.hdf5')

preds = model.predict(X_test, batch_size=32)

In [40]:
is_iceberg = preds[:,0]
id = test['id'].astype(str).values

In [49]:
preds = pd.DataFrame({'id':id, 'is_iceberg':is_iceberg})
preds.to_csv('data/preds1.csv', index=False)

In [50]:
from IPython.display import FileLink
FileLink('data/preds1.csv')