In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import cv2 
import glob
import os 
import gc; gc.enable()
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import KFold
from sklearn.metrics import f1_score
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from subprocess import check_output
print(check_output(['ls', 'input']).decode('utf8'))

In [None]:
train_img_path = 'input/train_/'
test_img_path = 'input/test_/'
IMG_WIDTH = 128
IMG_HIGHT = 128
le = LabelEncoder()


In [None]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')

In [None]:
def get_img_cv2(path):
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    resized = cv2.resize(img,(IMG_WIDTH, IMG_HIGHT), cv2.INTER_LINEAR)
    return resized

In [None]:
def load_train():
    X_train = []    
    img_names = train.image_name.values.tolist()
    for img_name in tqdm(img_names):
        img = get_img_cv2(train_img_path+img_name)
        X_train.append(img)
    X_train_id = train.row_id.values.tolist()
    X_train = np.array(X_train, dtype=np.int8)
    y_train = le.fit_transform(train.detected.values).astype(np.int8)
    
    return X_train, y_train, X_train_id

In [None]:
def load_test():
    X_test = []   
    img_names = test.image_name.values.tolist()
    for img_name in tqdm(img_names):
        img = get_img_cv2(test_img_path+img_name)
        X_test.append(img)
        
    X_test = np.array(X_test, dtype=np.int8)   
    test_id = test.row_id.values
    
    return X_test, test_id

In [None]:
def read_and_normalise_train():
    train_img, train_target, train_id = load_train()
    
    print('Reshape train_img for Tensorflow...')
    train_img = train_img.transpose((0, 1, 2, 3))
    
    print('convert to float and normalize....')
    train_img = train_img.astype(np.float32)
    train_img = train_img/255.
    train_target = np_utils.to_categorical(train_target, 14) 
    
    print('Train shpae: {} Train target shape: {}'.format(train_img.shape, train_target.shape))
    
    return train_img, train_target, train_id

In [None]:
def read_and_normalise_test():
    test_img, test_id = load_test()
    
    print('Resahpe test_img for Tesnorflow...')
    test_img = test_img.transpose((0, 1, 2, 3))
    
    print('convert to float and normalize...')
    test_img = test_img.astype(np.float32)
    test_img = test_img/255.
    
    print('Test shpae: {}'.format(test_img.shape))
    return test_img, test_id
    

In [None]:
def create_model():
    model = Sequential()
    model.add(ZeroPadding2D((1,1), input_shape =(128, 128, 3)))
    model.add(Convolution2D(4, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(4, (3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(8, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(8, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))    
    #model.add(Dense(512, activation='relu'))
    #model.add(Dropout(0.5))
    #model.add(Dense(1024, activation='relu'))
    #model.add(Dropout(0.5))
    model.add(Dense(14, activation='softmax'))

    sgd = SGD(lr= 1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy')
    
    return model

In [None]:
def run_cv_create_model(n_folds = 2):
    
    batch_size = 16
    nb_epoch = 30
    random_state = 31
    
    train_img, train_target, train_id = read_and_normalise_train()
    
    kf = KFold(len(train_id), n_folds=n_folds, shuffle=True, random_state=random_state )
    yfull_train = {}
    n_fold = 0
    sum_score = 0
    models =[]   
    
    for train_idx, valid_idx in kf:
        model = create_model()
        X_train = train_img[train_idx]
        y_train = train_target[train_idx]
        X_valid = train_img[valid_idx]
        y_valid = train_target[valid_idx]
        
        n_fold += 1
        print('Start fold num {} from {}'.format(n_fold, n_folds))
        print('Train fold {} traget fold {}'.format(len(X_train), len(y_train)))
        print('valid fold {} traget fold {}'.format(len(X_valid), len(y_valid)))
        
        callbacks = [EarlyStopping(monitor='val_loss', patience=3, verbose=0)]
        
        model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch, shuffle=True,
                    verbose=2, validation_data = (X_valid, y_valid), callbacks=callbacks)
        
        predict_valid = model.predict(X_valid.astype(np.float32), batch_size=batch_size, verbose=2)
        
        score = f1_score(y_valid.argmax(axis=1), predict_valid.argmax(axis=1), average='weighted')
        print('F1 score: ', score)
        sum_score += score*len(valid_idx)
        
        for i in range(len(valid_idx)):
            yfull_train[valid_idx[i]] = predict_valid[i]            
             
        models.append(model)
        
    score = sum_score/len(train_img)
    print('F1 Score train independent avg {}'.format(score))
    
    info_str = 'f1_' + str(round(score, 3)) + '_folds_' + str(n_folds) + '_ep_' + str(nb_epoch)
    return info_str, models

In [None]:
def merge_several_folds_mean(data, nfolds):
    a = np.array(data[0])
    for i in range(1, nfolds):
        a += np.array(data[i])
    a /= nfolds
    return a.tolist()

In [None]:
def run_cv_process_test(models):
    batch_size = 16
    n_fold = 0
    yfull_test = []
    n_folds =len(models)
    
    for i in range(n_folds):
        model = models[i]
        n_fold += 1
        print('Start KFold number {} from {}'.format(n_fold, n_folds))
        test_img, test_id = read_and_normalise_test()
        test_pred = model.predict(test_img, batch_size=batch_size, verbose=2)
        yfull_test.append(test_pred)
        
    test_res = merge_several_folds_mean(yfull_test, n_folds)    
    
    return test_res, test_id

In [None]:
info_str, models = run_cv_create_model(2)
#del train_data, train_target, train_id, test_img, test_id

test_res, test_id = run_cv_process_test(models)

In [None]:
pred = le.inverse_transform(np.array(test_res).argmax(axis=1))
sub = pd.DataFrame({'row_id':test_id, 'detected':pred})
sub = sub[['row_id', 'detected']]
sub.head(5)

In [None]:
sub.to_csv(info_str+'.csv', index=False)

In [None]:
sub.detected.unique()