# Putting it all together

In [11]:
# basic high reg model
# from above, need to see the amount of epochs (80 +) that val stops increasing at, then do 
# that in one full go and save the model
from keras.models import load_model
import pandas as pd
import random
import numpy as np

# get list of hold out data for ensemble training
random.seed(17332)

testing_path = "../../Test_data/test.json"
test_data = pd.read_json(testing_path)

# for basic
def get_scaled_imgs_basic(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))    # , c)))

    return np.array(imgs)

# no third for inc angle
def get_scaled_imgs_inc(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        # band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        # a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        # b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        # c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((band_1, band_2)))    # , c)))

    return np.array(imgs)

# for transfer model
def get_scaled_imgs_trans(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))    # , c)))

    return np.array(imgs)


print("complete")

complete


In [5]:
# for basic test set submission
filepath_basic = 'model_weights_basic_reg10_ensem.hdf5'
inf_model_basic = load_model(filepath_basic)

X_sub_basic = get_scaled_imgs_basic(test_data)

predicted_test_basic=inf_model_basic.predict(X_sub_basic)

print("len of pred test", len(predicted_test_basic))
print("len of id", len(test_data['id']))

submission = pd.DataFrame()
submission['id']=test_data['id']
submission['is_iceberg']=predicted_test_basic

print("sub length", len(submission))

submission.to_csv('sub_test_basic_FINAL.csv', index=False)

print("complete")

len of pred test 8424
len of id 8424
sub length 8424
complete


In [9]:
# for inc2 angle model
filepath_inc = 'model_weights_inc2_full_reg01_ensem.hdf5'
inf_model_inc = load_model(filepath_inc)


X_sub = get_scaled_imgs_inc(test_data)
inc_angle_t = test_data["inc_angle"]


predicted_test_basic=inf_model_inc.predict([X_sub, inc_angle_t])

print("len of pred test", len(predicted_test_basic))
print("len of id", len(test_data['id']))

submission = pd.DataFrame()
submission['id']=test_data['id']
submission['is_iceberg']=predicted_test_basic
submission.to_csv('sub_test_inc_FINAL.csv', index=False)

len of pred test 8424
len of id 8424


In [20]:
# for transfer model
# from above, need to see the amount of epochs (80 +) that val stops increasing at, then do 
# that in one full go and save the model
import cv2

filepath_full = 'model_weights_transfer_ensem.hdf5'
inf_model_trans = load_model(filepath_full)

X_trans = get_scaled_imgs_trans(test_data)

# resize to 150x150
X_sub_trans = []
for i in X_trans:
    X_sub_trans.append(cv2.resize(i, (150,150)))
X_sub_trans = np.array(X_sub_trans)


predicted_test_trans=inf_model_trans.predict(X_sub_trans)

print("len of pred test", len(predicted_test_trans))
print("len of id", len(test_data['id']))

submission_t = pd.DataFrame()
submission_t['id']=test_data['id']
submission_t['is_iceberg']=predicted_test_trans
submission_t.to_csv('sub_trans_inc_FINAL.csv', index=False)

len of pred test 8424
len of id 8424


In [21]:
# load ensemble classifier, and create data
import pickle

filename = 'rf_ensem_mod.sav'
rf_model = pickle.load(open(filename, 'rb'))

# load model submissions
basic_model = pd.read_csv("sub_test_basic_FINAL.csv")
inc_model = pd.read_csv("sub_test_inc_FINAL.csv")
trans_model = pd.read_csv("sub_trans_inc_FINAL.csv")

# load and concat the inc angles and iceberg response
ids = test_data["id"]
inc_angles = test_data["inc_angle"]
inc_id = pd.concat([ids, inc_angles], axis = 1)


# start merging
merged = pd.merge(basic_model, inc_model, how='inner', on='id')
merged = pd.merge(merged, trans_model, how='inner', on='id')

merged.rename(index=str, columns={"is_iceberg_x": "basic",
                                  "is_iceberg_y": "inc",
                                 "is_iceberg": "trans"}, inplace=True)

merged = pd.merge(merged, inc_id, how='inner', on='id')

X = merged.loc[:,['basic', 'inc', 'trans', 'inc_angle']]


probs = rf_model.predict_proba(X)

final_probs = [np.clip(x[1], 0.01, 0.99) for x in probs]
print(final_probs)

submission_FIN = pd.DataFrame()
submission_FIN['id']=test_data['id']
submission_FIN['is_iceberg']=final_probs
submission_FIN.to_csv('sub_ensem_FINAL.csv', index=False)

[0.040000000000000001, 0.83999999999999997, 0.31, 0.98999999999999999, 0.42999999999999999, 0.45000000000000001, 0.19, 0.98999999999999999, 0.01, 0.01, 0.01, 0.69999999999999996, 0.01, 0.98999999999999999, 0.34000000000000002, 0.01, 0.56999999999999995, 0.51000000000000001, 0.71999999999999997, 0.98999999999999999, 0.17999999999999999, 0.65000000000000002, 0.96999999999999997, 0.01, 0.01, 0.20999999999999999, 0.46999999999999997, 0.81999999999999995, 0.81999999999999995, 0.98999999999999999, 0.13, 0.82999999999999996, 0.98999999999999999, 0.90000000000000002, 0.97999999999999998, 0.98999999999999999, 0.080000000000000002, 0.90000000000000002, 0.38, 0.81999999999999995, 0.34000000000000002, 0.88, 0.12, 0.46000000000000002, 0.85999999999999999, 0.54000000000000004, 0.98999999999999999, 0.02, 0.98999999999999999, 0.97999999999999998, 0.20000000000000001, 0.85999999999999999, 0.98999999999999999, 0.89000000000000001, 0.98999999999999999, 0.98999999999999999, 0.01, 0.040000000000000001, 0.2