In [1]:
from PIL import ImageFilter, ImageStat, Image, ImageDraw
from multiprocessing import Pool, cpu_count
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import glob
import os
import cv2
import keras
print(keras.__version__)
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers import Input
from keras import optimizers
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split, KFold
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau,ModelCheckpoint
from keras.optimizers import Adam
from keras.utils import to_categorical
print(K.backend())
print('image_dim_ordering before ',K.image_dim_ordering())


import pandas as pd
import numpy as np
np.random.seed(1969)

def limit_mem():
    K.get_session().close()
    cfg = K.tf.ConfigProto()
    cfg.gpu_options.allow_growth=True
    K.set_session(K.tf.Session(config=cfg))

limit_mem()

ROOT_DIR = 'data/cervix/'


print('load train data ...')
train_data = np.load(ROOT_DIR + 'train_basic_data_roi.npy')
train_target = np.load(ROOT_DIR + 'train_target_roi.npy')
train_target = to_categorical(train_target)

print('train shape .. ', train_data.shape)
print('target shape .. ', train_target.shape)
train_data = np.transpose(train_data, (0,2,3,1))
print('train shape after .. ', train_data.shape)

test_data = np.load(ROOT_DIR+'test_basic_data_roi.npy')
test_data = np.transpose(test_data, (0,2,3,1))

Using TensorFlow backend.


2.0.2
tensorflow
('image_dim_ordering before ', 'tf')
load train data ...
('train shape .. ', (16409, 3, 128, 128))
('target shape .. ', (16409, 3))
('train shape after .. ', (16409, 128, 128, 3))


In [2]:
print('datagen ...')
datagen = ImageDataGenerator(rotation_range=0.4, zoom_range=0.3, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=True)
print('datagen fit ...')
datagen.fit(train_data)
train_data.shape


datagen ...
datagen fit ...


(16409, 128, 128, 3)

In [3]:
def conv_block(inp, filters, size=3, strides=(1,1), act=True):
    x = Convolution2D(filters, (size, size), strides=strides, padding='same')(inp)
    x = BatchNormalization()(x)
    return Activation('relu')(x) if act else x

In [4]:
def res_block(nf, inp):
    x = conv_block(inp, nf)
    x = conv_block(x, nf, act=False)
    return keras.layers.add([inp, x])

In [5]:
def res_model(inp, opt_='adam'):
    x = conv_block(inp, 32)
    x = res_block(32, x)
    x = MaxPooling2D()(x)
    
    x = conv_block(x, 64)
    x = res_block(64, x)
    x = MaxPooling2D()(x) # 32x32x64
    
    x = conv_block(x, 128)
    x = res_block(128, x)    
    x = MaxPooling2D()(x) # 16x16x128
    
    x = conv_block(x, 256)
    x = res_block(256, x)
    x = MaxPooling2D()(x) #8x8x256
    
    x = conv_block(x, 512)
    x = res_block(512, x)
    x = MaxPooling2D()(x) #4x4x512
    
    
    x = Dropout(0.3)(x)
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(3, activation='softmax')(x)
    return x

In [11]:
def create_model3(opt_ = 'adam'):
    model = Sequential()
    
    model.add(Convolution2D(32, (3, 3), dim_ordering='tf', input_shape=(128, 128, 3), padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Convolution2D(32, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) # 64x64x32
    
    
    
    model.add(Convolution2D(64, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Convolution2D(64, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) 
    
    
    
    model.add(Convolution2D(128, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Convolution2D(128, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) 
    model.add(Dropout(0.3))
    
    
    model.add(Convolution2D(256, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Convolution2D(256, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) 
    model.add(Dropout(0.3))
    
    
    model.add(Convolution2D(512, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Convolution2D(512, (3, 3), dim_ordering='tf', padding='same')) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) 
    model.add(Dropout(0.3))
 
    
    model.add(Flatten())
    model.add(Dense(512))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer=opt_, loss='categorical_crossentropy', metrics=['categorical_crossentropy']) 
    return model

print('train create model  ...')




train create model  ...


In [3]:
def create_model2(opt_ = 'adam'):
    model = Sequential()
    model.add(Convolution2D(32, 3, 3, activation='relu', dim_ordering='tf', input_shape=(128, 128, 3))) #use input_shape=(3, 64, 64)
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) # 64x64x32
    
    model.add(Convolution2D(64, 3, 3, activation='relu', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) # 32x32x64
    #model.add(Dropout(0.3))
    
    model.add(Convolution2D(128, 3, 3, activation='relu', dim_ordering='tf'))
    model.add(BatchNormalization())
    
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) # 16x16x128
    #model.add(Dropout(0.3))
    
    model.add(Convolution2D(256, 3, 3, activation='relu', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) # 8x8x256
    
    model.add(Dropout(0.3))
    
    model.add(Convolution2D(512, 3, 3, activation='relu', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), dim_ordering='tf')) # 4x4x512
    
    model.add(Dropout(0.3))
 
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer=opt_, loss='categorical_crossentropy', metrics=['categorical_crossentropy']) 
    return model

print('train create model  ...')




train create model  ...


In [12]:
nb_runs = 8
initial_random_seed = 1969
model_paths = []

for i in range(nb_runs):
    print('train model fit ...')
    x_train,x_val_train,y_train,y_val_train = train_test_split(train_data,train_target,test_size=0.2, random_state=initial_random_seed+i)
    model = create_model2()

    
    # create models folder in ROOT_DIR and anil_net folder in models 
    if not os.path.exists(ROOT_DIR + 'models/anil_net/run_{}'.format(i)):
        os.mkdir(ROOT_DIR + 'models/anil_net/run_{}'.format(i))
    
    saved_model_path = ROOT_DIR + 'models/anil_net/run_{}/'.format(i)
    model_fn = saved_model_path + '{val_loss:.2f}_loss_epoch_{epoch}.h5' 
    ckpt1 = ReduceLROnPlateau(monitor='val_loss', patience=10, factor=0.1, min_lr=1e-8, verbose=1) 
    ckpt2 = ModelCheckpoint(filepath=model_fn, monitor='val_loss', save_best_only=True, save_weights_only=True)
    history = model.fit_generator(datagen.flow(x_train,y_train, batch_size=128, shuffle=True), nb_epoch=300, samples_per_epoch=len(x_train), 
                              verbose=2, validation_data=(x_val_train, y_val_train), callbacks=[ckpt1,ckpt2], workers=20)
    
    model_names = sorted(glob.glob(saved_model_path+'*.h5'), key=os.path.getctime)
    model_paths.append(model_names[-1])
    for m in model_names[:-1]:os.remove(m)
    limit_mem()

train model fit ...




Epoch 1/300
47s - loss: 1.1230 - categorical_crossentropy: 1.1230 - val_loss: 1.0919 - val_categorical_crossentropy: 1.0919
Epoch 2/300
36s - loss: 1.0451 - categorical_crossentropy: 1.0451 - val_loss: 1.0786 - val_categorical_crossentropy: 1.0786
Epoch 3/300
36s - loss: 1.0106 - categorical_crossentropy: 1.0106 - val_loss: 1.0551 - val_categorical_crossentropy: 1.0551
Epoch 4/300
36s - loss: 0.9968 - categorical_crossentropy: 0.9968 - val_loss: 1.0920 - val_categorical_crossentropy: 1.0920
Epoch 5/300
36s - loss: 0.9859 - categorical_crossentropy: 0.9859 - val_loss: 0.9631 - val_categorical_crossentropy: 0.9631
Epoch 6/300
36s - loss: 0.9643 - categorical_crossentropy: 0.9643 - val_loss: 0.9789 - val_categorical_crossentropy: 0.9789
Epoch 7/300
36s - loss: 0.9670 - categorical_crossentropy: 0.9670 - val_loss: 0.9339 - val_categorical_crossentropy: 0.9339
Epoch 8/300
36s - loss: 0.9508 - categorical_crossentropy: 0.9508 - val_loss: 1.0008 - val_categorical_crossentropy: 1.0008
Epoch 9/

### Stacking Starter

In [5]:
# def get_augmented_preds(model, data,nb_aug=10):
#     print(type(data))
#     test_pred = np.zeros((data.shape[0], 3, nb_aug))
# #     #for i,img in enumerate(data):
# #         single_img_batch = datagen.flow(np.expand_dims(img,0))
# #         aug_imgs = np.concatenate([next(single_img_batch).astype(np.float32) for _ in range (nb_aug)], axis=0)
# #         batch_pred = model.predict(aug_imgs)
# #         batch_pred = np.mean(batch_pred,axis=0)
# #         test_pred[i] = batch_pred
    
#     for i in range(nb_aug):
#         batches = datagen.flow(data, batch_size=64, shuffle=False)    
#         batch_pred = model.predict_generator(batches, steps = int(np.ceil(data.shape[0]/float(64)))+1, workers=20)
#         test_pred[:, :, i] = batch_pred[:data.shape[0]]  
#     return test_pred.mean(axis=2)

In [6]:
# nb_runs = 1
# initial_random_seed = 1969
# model_paths = []
# nfolds=5
# train_stack = np.zeros((train_data.shape[0], 3))
# test_stack = np.zeros((test_data.shape[0],3, nfolds))

# for i in range(nb_runs, nb_runs+1):
#     print('.............run-{}, begins..............'.format(i))
#     kf = KFold(n_splits=5, shuffle=True,random_state=initial_random_seed+i)
    
#     if not os.path.exists(ROOT_DIR + 'models/anil_net_stack/run_{}'.format(i)):
#         os.mkdir(ROOT_DIR + 'models/anil_net_stack/run_{}'.format(i))
    
#     for fold,(train_index,test_index) in enumerate(kf.split(train_data)):
#         print('...........fold-{}, begins.........'.format(fold))
#         model = create_model2()
#         model.compile('Adam', loss='sparse_categorical_crossentropy')
        
#         if not os.path.exists(ROOT_DIR + 'models/anil_net_stack/run_{0}/fold{1}'.format(i, fold)):
#             os.mkdir(ROOT_DIR + 'models/anil_net_stack/run_{0}/fold{1}'.format(i, fold))

    
#         saved_model_path = ROOT_DIR + 'models/anil_net_stack/run_{0}/fold{1}/'.format(i, fold)
#         model_fn = saved_model_path + '{val_loss:.2f}_loss_epoch_{epoch}.h5' 
#         ckpt1 = ReduceLROnPlateau(monitor='val_loss', patience=10, factor=0.1, min_lr=1e-6, verbose=1) 
#         ckpt2 = ModelCheckpoint(filepath=model_fn, monitor='val_loss', save_best_only=True, save_weights_only=True)
#         history = model.fit_generator(datagen.flow(train_data[train_index],train_target[train_index], batch_size=128, shuffle=True), nb_epoch=300, samples_per_epoch=len(train_index), 
#                               verbose=2, validation_data=(train_data[test_index], train_target[test_index]), callbacks=[ckpt1,ckpt2], workers=20)
    
#         model_names = sorted(glob.glob(saved_model_path+'*.h5'), key=os.path.getctime)
#         model_paths.append(model_names[-1])
#         for m in model_names[:-1]:os.remove(m)
#         model.load_weights(model_paths[fold])
#         print('..... stack filling begins........')
#         train_stack[test_index] = get_augmented_preds(model, train_data[test_index])
#         test_stack[:, :, fold] = get_augmented_preds(model, test_data)
        
#         limit_mem()
    
#     train_stack = pd.DataFrame(train_stack,columns=['Type_1', 'Type_2', 'Type_3'])
#     train_stack.to_csv(ROOT_DIR + 'run_{}_train_stack'.format(i), index=None)
#     test_stack = np.mean(test_stack, axis=2)
#     test_stack = pd.DataFrame(test_stack,columns=['Type_1', 'Type_2', 'Type_3'])
#     test_stack.to_csv(ROOT_DIR + 'run_{}_test_stack'.format(i), index=None)

.............run-1, begins..............
...........fold-0, begins.........


  app.launch_new_instance()


Epoch 1/300
20s - loss: 1.2742 - val_loss: 1.0206
Epoch 2/300
16s - loss: 1.0715 - val_loss: 1.0376
Epoch 3/300
16s - loss: 1.0334 - val_loss: 1.0128
Epoch 4/300
16s - loss: 0.9993 - val_loss: 0.9719
Epoch 5/300
17s - loss: 0.9817 - val_loss: 0.9731
Epoch 6/300
16s - loss: 0.9690 - val_loss: 0.9362
Epoch 7/300
17s - loss: 0.9600 - val_loss: 0.9453
Epoch 8/300
16s - loss: 0.9396 - val_loss: 0.9635
Epoch 9/300
16s - loss: 0.9438 - val_loss: 0.9238
Epoch 10/300
16s - loss: 0.9302 - val_loss: 0.9236
Epoch 11/300
16s - loss: 0.9245 - val_loss: 0.9180
Epoch 12/300
17s - loss: 0.9234 - val_loss: 0.9286
Epoch 13/300
16s - loss: 0.9151 - val_loss: 0.9643
Epoch 14/300
16s - loss: 0.9209 - val_loss: 0.9692
Epoch 15/300
17s - loss: 0.9105 - val_loss: 0.9162
Epoch 16/300
17s - loss: 0.9084 - val_loss: 0.9800
Epoch 17/300
17s - loss: 0.9002 - val_loss: 0.9392
Epoch 18/300
17s - loss: 0.9032 - val_loss: 0.8911
Epoch 19/300
17s - loss: 0.8885 - val_loss: 0.8922
Epoch 20/300
17s - loss: 0.8894 - val_lo

In [19]:
def get_test_predictions(model_path, nb_aug):

    test_pred = np.zeros((len(test_data), 3))
    model = create_model3()
    
    model.load_weights(model_path)
    for i,img in enumerate(test_data):
        single_img_batch = datagen.flow(np.expand_dims(img,0))
        aug_imgs = np.concatenate([next(single_img_batch).astype(np.float32) for _ in range (nb_aug)], axis=0)
        
        batch_pred = model.predict(aug_imgs)
        batch_pred = np.mean(batch_pred,axis=0)
        test_pred[i] = batch_pred
    limit_mem()    
    return test_pred

In [20]:
predictions = [get_test_predictions(m,nb_aug=10) for m in model_paths]
predictions = np.stack(predictions, axis=2)
predictions = np.mean(predictions, axis=2)
predictions.shape



(512, 3)

In [21]:
filenames = glob.glob(ROOT_DIR+'test/*/*.jpg')

In [23]:
import os
submission_path = ROOT_DIR + 'submissions/'
nb_runs = 8
nb_epoch = 300
nb_aug = 10


def write_submission(predictions, filenames, clip=0.01):
    preds = np.clip(predictions, clip, 1-clip)
    
    sub_fn = submission_path + '{0}epoch_{1}aug_{2}clip_{3}runs'.format(nb_epoch,nb_aug,clip,nb_runs)
    #sub_fn = submission_path + 'anil_net_enhanced_{0}aug_{1}clip_{2}runs'.format(10,clip, nb_runs)   

    with open(sub_fn+'.csv','w') as f:
        print('Writing predictions to a csv...')
        f.write('image_name,Type_1,Type_2,Type_3\n')
        for i,image_name in enumerate(filenames):
            pred = ['%.6f' % p for p in preds[i,:]]
            f.write('%s,%s\n'% (os.path.basename(image_name),','.join(pred)))
        print('Done')


write_submission(predictions,filenames,0.05)

Writing predictions to a csv...
Done


In [11]:
a = range(12)

0
1


In [12]:
a[:-2]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [13]:
a

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]