In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pdb
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras import backend as K
from matplotlib import pyplot as plt
%matplotlib inline
import scipy
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom
from scipy.ndimage import imread
import helpers
from models import DaveModel, DaveVGG, DaveVGG19, SimpleModel, LeNetModel
from trainer import Trainer

Using TensorFlow backend.


In [2]:
train = pd.read_json("_RawData/train.json/data/processed/train.json")
test = pd.read_json("_RawData/test.json/data/processed/test.json")

In [3]:
X = helpers.get_images(train)
X_test = helpers.get_images(test)

In [4]:
y = to_categorical(train.is_iceberg.values,num_classes=2)

In [5]:
# Xtr, Xv, ytr, yv = train_test_split(X, y, shuffle=False, test_size=0.20)

In [6]:
trainRunner = Trainer(X, y, [
    DaveModel(),
    DaveVGG(),
    DaveVGG19(),
    LeNetModel()
])

In [7]:
trainRunner.models[0].model.load_weights("davemodel20171203-181713.h5")
trainRunner.models[1].model.load_weights("vgg20171203-182704.h5")
trainRunner.models[2].model.load_weights("vgg1920171203-183725.h5")
trainRunner.models[3].model.load_weights("lenet20171203-184024.h5")

In [8]:
results = pd.DataFrame()

for model in trainRunner.models:
    name = model.get_name()
    print(name)
    test = model.model.predict(X_test)
    results[name] = test[:, 0]

davemodel
vgg
vgg19
lenet


In [9]:
results.head()

# results.iloc[:, 0]



Unnamed: 0,davemodel,vgg,vgg19,lenet
0,0.904778,0.50692,0.729676,0.523765
1,0.686788,0.277934,0.972092,0.611775
2,0.994779,0.248336,0.985353,0.991685
3,0.001994,0.011655,0.003505,0.00478
4,0.968177,0.280054,0.898017,0.604213


In [10]:
results["very_positive"] = results.apply(lambda x: x["davemodel"] > 0.5 and x["vgg"] > 0.5 and x["vgg19"] > 0.5 and x["lenet"] > 0.5, axis = 1)
results["very_negative"] = results.apply(lambda x: x["davemodel"] <= 0.5 and x["vgg"] <= 0.5 and x["vgg19"] <= 0.5 and x["lenet"] <= 0.5, axis = 1)
results["definitive"] = results["very_positive"] | results["very_negative"]

In [11]:
# results.head()
# results[results["definitive"]]
# results[results["very_positive"] != True & results["very_negative"] != True]

def get_score(value):
    score = 0
    
    if value > 0.5:
        score += 1
    else:
        score -= 1
        
    return score

def get_label(score):
    label = -1
    
    if score > 0:
        label = 1
    elif score < 0:
        label = 0
        
    return label

results["score"] = results.apply(lambda x: get_score(x["davemodel"]) + get_score(x["vgg"]) + get_score(x["vgg19"]) + get_score(x["lenet"]), axis = 1)

results["label"] = results.apply(lambda x: get_label(x["score"]), axis = 1)


In [12]:
X_train_pseudo = X_test[results["label"] >= 0]
y_train_pseudo = results[results["label"] >= 0]["label"]
X_train_pseudo.shape

(6984, 75, 75, 3)

In [13]:
ytr
results[results["label"] >= 0]["label"]

y_train_pseudo = np.array(pd.get_dummies(results[results["label"] >= 0]["label"]))
# y_train_pseudo = np.zeros((6984, 2))
y_train_pseudo
results[results["label"] >= 0]["label"]
# results.iloc[0]
# results[results["label"] >= 0]["label"]

test = []
stuff = results[results["label"] >= 0]["label"]

for record in stuff:
    if record == 0:
        test.append([0., 1.])
    elif record == 1:
        test.append([1., 0.])

y_train_pseudo = np.array(test)

In [14]:
print(X_train_pseudo.shape)
print(y_train_pseudo.shape)
print(Xtr.shape)
print(ytr.shape)

(6984, 75, 75, 3)
(6984, 2)
(1283, 75, 75, 3)
(1283, 2)


In [15]:
Xtr_p, Xv_p, ytr_p, yv_p = train_test_split(X_train_pseudo, y_train_pseudo, shuffle=False, test_size=0.20)

In [16]:
trainRunner = Trainer([
    DaveModel(Xtr_p, ytr_p, Xv_p, yv_p),
    DaveVGG(Xtr_p, ytr_p, Xv_p, yv_p),
    DaveVGG19(Xtr_p, ytr_p, Xv_p, yv_p),
    LeNetModel(Xtr_p, ytr_p, Xv_p, yv_p)
])

In [17]:
trainRunner.train(epoch = 50)

Model: davemodel
Batch Size: 32
Epochs: 50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: vgg
Batch Size: 32
Epochs: 50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: vgg19
Batch Size: 32
Epochs: 50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50


Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: lenet
Batch Size: 32
Epochs: 50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50


Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
from keras.preprocessing.image import ImageDataGenerator


# After this, let's try and predict the non-definitive data, and see if it's now definitive
predict = []
# X_test = helpers.get_images(test)
blah = X_test[results["label"] < 0]

for model in trainRunner.models:
    pred_gen = ImageDataGenerator()
    predict.append(model.model.predict_generator(pred_gen.flow(blah, batch_size=32, shuffle = False), len(blah) / 32))

#     if submit:
#         self.create_submission(predict, test)
#     predict.append(model.predict(blah))
    
# Also, do a predict and submit



In [23]:
results2 = pd.DataFrame()

for model2 in trainRunner.models:
    name = model2.get_name()
    print(name)
    test2 = model2.model.predict(blah)
    results2[name] = test2[:, 0]

davemodel
vgg
vgg19
lenet


In [24]:
results2.head()

Unnamed: 0,davemodel,vgg,vgg19,lenet
0,0.977051,0.525496,0.897786,0.504032
1,0.753548,0.285678,0.986055,0.647878
2,0.237472,0.007978,0.062903,0.004565
3,0.901699,0.690666,0.51656,0.125253
4,0.182136,0.147875,0.963147,0.535211


In [25]:
results2["very_positive"] = results2.apply(lambda x: x["davemodel"] > 0.5 and x["vgg"] > 0.5 and x["vgg19"] > 0.5 and x["lenet"] > 0.5, axis = 1)
results2["very_negative"] = results2.apply(lambda x: x["davemodel"] <= 0.5 and x["vgg"] <= 0.5 and x["vgg19"] <= 0.5 and x["lenet"] <= 0.5, axis = 1)
results2["definitive"] = results2["very_positive"] | results2["very_negative"]

In [28]:
print(results2.shape)

results2[results2["definitive"]]

(1440, 7)


Unnamed: 0,davemodel,vgg,vgg19,lenet,very_positive,very_negative,definitive
0,0.977051,0.525496,0.897786,0.504032,True,False,True
2,0.237472,0.007978,0.062903,0.004565,False,True,True
7,0.999999,0.999595,1.000000,0.950964,True,False,True
8,0.999982,0.997978,1.000000,0.999996,True,False,True
9,1.000000,0.999806,1.000000,0.999868,True,False,True
10,0.929262,0.813650,0.999986,0.959610,True,False,True
11,0.974176,0.870007,0.997253,0.935104,True,False,True
12,0.987627,0.653146,0.647587,0.809050,True,False,True
14,0.999294,0.998570,1.000000,0.999980,True,False,True
15,1.000000,0.999618,1.000000,0.997085,True,False,True
