In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

# Any results you write to the current directory are saved as output.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
train = pd.read_json('../input/train.json')
test = pd.read_json("../input/test.json")

In [None]:
#inc_angle to Numeric from float
train['inc_angle'] = pd.to_numeric(train['inc_angle'],errors='coerce')
test['inc_angle'] = pd.to_numeric(test['inc_angle'],errors='coerce')

In [None]:
f,ax = plt.subplots(1,1,figsize=(15,6))
sns.barplot(x=['not an iceberg','iceberg'],y=train.groupby(['is_iceberg'],as_index=False).count()['id'])
plt.show()

In [None]:
train['inc_angle'].describe() # NA exsists

In [None]:
icebergs = train[train.is_iceberg==1].sample(n=9,random_state=123)
ships = train[train.is_iceberg==0].sample(n=9,random_state=456)
#Plotting Images:Iceberg
fig = plt.figure(1,figsize=(15,15))
for i in range(9):
    ax = fig.add_subplot(3,3,i+1)
    arr = np.reshape(np.array(icebergs.iloc[i,0]),(75,75))
    ax.imshow(arr)
plt.show()


In [None]:
#Plotting Images:Iceberg Band 2
fig = plt.figure(1,figsize=(15,15))
for i in range(9):
    ax = fig.add_subplot(3,3,i+1)
    arr = np.reshape(np.array(icebergs.iloc[i,1]),(75,75))
    ax.imshow(arr)
plt.show()

In [None]:
def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))

    return np.array(imgs)    

In [None]:
#Image with 3 Bands
Xtrain = get_scaled_imgs(train)
Ytrain = np.array(train['is_iceberg'])

In [None]:
import cv2
def transform_image(image,ang_range,shear_range,trans_range):
# Rotation
    ang_rot = np.random.uniform(ang_range)-ang_range/2
    rows,cols,ch = image.shape    
    Rot_M = cv2.getRotationMatrix2D((cols/2,rows/2),ang_rot,1)
# Translation
    tr_x = trans_range*np.random.uniform()-trans_range/2
    tr_y = trans_range*np.random.uniform()-trans_range/2
    Trans_M = np.float32([[1,0,tr_x],[0,1,tr_y]])
    image = cv2.warpAffine(image,Rot_M,(cols,rows))
    image = cv2.warpAffine(image,Trans_M,(cols,rows))
    
    
    return image

In [None]:
# Augmentation Translation + Rotation


In [None]:
# Flip Images
def get_more_images(imgs):
    more_images = []
    aug_imgs = []
    vert_flip_imgs = []
    hori_flip_imgs = []
      
    for i in range(0,imgs.shape[0]):
        a=imgs[i,:,:,0]
        b=imgs[i,:,:,1]
        c=imgs[i,:,:,2]
        
        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        bv=cv2.flip(b,1)
        bh=cv2.flip(b,0)
        cv=cv2.flip(c,1)
        ch=cv2.flip(c,0)
        
        vert_flip_imgs.append(np.dstack((av, bv, cv)))
        hori_flip_imgs.append(np.dstack((ah, bh, ch)))
      
    v= np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
    
    for j in range(2):
        for i in range(0,imgs.shape[0]):
            aug_imgs.append(transform_image(imgs[i],10,10,2))
    
    aug_image=np.array(aug_imgs)
       
    more_images = np.concatenate((imgs,v,h,aug_image))
    
    return more_images

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(Xtrain, Ytrain, test_size=0.25)

Xtr_more = get_more_images(X_train) 
Ytr_more = np.concatenate((y_train,y_train,y_train,y_train,y_train))
X_valid_more = get_more_images(X_valid)
y_valid_more = np.concatenate([y_valid,y_valid,y_valid,y_valid,y_valid])


In [None]:
Xtr_more.shape,Ytr_more.shape,X_valid_more.shape,y_valid_more.shape

In [None]:
# Import Keras.
from matplotlib import pyplot
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation, GlobalAveragePooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras import initializers
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau

In [None]:
def getModel():
    #Building the model
    model=Sequential()
    #Conv Layer 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu',padding='same', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.2))

    #Conv Layer 2
    model.add(Conv2D(128, kernel_size=(3, 3),padding='same', activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #Conv Layer 3
    model.add(Conv2D(128, kernel_size=(3, 3),padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #Conv Layer 4
    model.add(Conv2D(64, kernel_size=(3, 3),padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.2))

#     #Flatten the data for upcoming dense layers
#     model.add(Flatten())

    #Dense Layers
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense Layer 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    #sigmoid Layer
    model.add(Dense(1, activation='sigmoid'))

    mypotim=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#     mypotim = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='binary_crossentropy',optimizer=mypotim,metrics=['accuracy'])
    model.summary()
    return model

In [None]:
def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]
file_path = ".model_stack1_weights.hdf5"
callbacks = get_callbacks(filepath=file_path, patience=5)

In [None]:
import os
model=getModel()
model.fit(Xtr_more, Ytr_more,
          batch_size=24,
          epochs=20,
          verbose=1,
          validation_data=(X_valid_more, y_valid_more),
          callbacks=callbacks)

In [None]:
model.load_weights(filepath=file_path)
score = model.evaluate(X_valid_more, y_valid_more, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# 1604/1604 [==============================] - 10s 6ms/step
# Test loss: 0.166926259783
# Test accuracy: 0.937032418953

In [None]:
#Submission
Xtest = get_scaled_imgs(test)
predicted_test=model.predict_proba(Xtest)
submission = pd.DataFrame()
submission['id']=test['id']
submission['is_iceberg']=predicted_test.reshape((predicted_test.shape[0]))
submission.to_csv('submission_Stack1.csv', index=False)

In [None]:
# Model 2

In [None]:
def get_model2():
    
    """
    Keras Sequential model

    """
    
    model=Sequential()
    
    # Conv block 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu',padding='same', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
   
    # Conv block 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu',padding='same', ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
   
    # Conv block 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu',padding='same',))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    
    #Conv block 4
    model.add(Conv2D(256, kernel_size=(3, 3), activation='relu',padding='same',))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.2))

    #Dense 1
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.4))

    #Dense 2
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))

    
    model.compile(loss='binary_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    
    return model

In [None]:
from sklearn.model_selection import StratifiedKFold
seed = 1234
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
for fold_n, (train, test) in enumerate(kfold.split(Xtr_more, Ytr_more)):
    print("FOLD nr: ", fold_n)
    model2 = get_model2()
    
    MODEL_FILE = 'mdl_simple_k{}_wght.hdf5'.format(fold_n)
    batch_size = 32
    mcp_save = ModelCheckpoint(MODEL_FILE, save_best_only=True, monitor='val_loss', mode='min')
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=15, verbose=1, epsilon=1e-4, mode='min')

    # set the epochs to 30 before training on your GPU
    model2.fit(Xtr_more[train], Ytr_more[train],
        batch_size=batch_size,
        epochs=15,
        verbose=1,
        validation_data=(Xtr_more[test], Ytr_more[test]),
        callbacks=[mcp_save, reduce_lr_loss])
    
    model2.load_weights(filepath = MODEL_FILE)
    
    score = model2.evaluate(Xtr_more[test], Ytr_more[test], verbose=1)
    print('\n Val score:', score[0])
    print('\n Val accuracy:', score[1])
    
    SUBMISSION = 'sub_STACK_Model2_{}.csv'.format(fold_n)
    
    df_test = pd.read_json('../input/test.json')
    df_test.inc_angle = df_test.inc_angle.replace('na',0)
    Xtest = (get_scaled_imgs(df_test))
    pred_test = model2.predict(Xtest)

    submission = pd.DataFrame({'id': df_test["id"], 'is_iceberg': pred_test.reshape((pred_test.shape[0]))})
    print(submission.head(10))

    submission.to_csv(SUBMISSION, index=False)
    print("submission saved")

In [None]:
print("done")

In [None]:
# #load Submission File
# sub_path = "../input/statoil-iceberg-submissions"
# all_files = os.listdir(sub_path)

# # Read and concatenate submissions
# outs = [pd.read_csv(os.path.join(sub_path, f), index_col=0) for f in all_files]
# concat_sub = pd.concat(outs, axis=1)
# cols = list(map(lambda x: "is_iceberg_" + str(x), range(len(concat_sub.columns))))
# concat_sub.columns = cols
# concat_sub.reset_index(inplace=True)
# concat_sub.head()

In [None]:
# # check correlation
# concat_sub.corr()

In [None]:
# #MinMax + Median Stacking
# concat_sub['is_iceberg'] = np.where(np.all(concat_sub.iloc[:,1:6] > cutoff_lo, axis=1), 
#                                     concat_sub['is_iceberg_max'], 
#                                     np.where(np.all(concat_sub.iloc[:,1:6] < cutoff_hi, axis=1),
#                                              concat_sub['is_iceberg_min'], 
#                                              concat_sub['is_iceberg_median']))
# concat_sub[['id', 'is_iceberg']].to_csv('stack_minmax_median.csv', 
#                                         index=False, float_format='%.6f')