In [1]:
import pandas as pd
import numpy as np

from keras.layers import Input, Dense, BatchNormalization, Dropout, Activation
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import to_categorical
import keras.backend as K

from sklearn.model_selection import StratifiedKFold

import json

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
with open("../data/metadata_splits.json") as infile:
    data = json.load(infile)

In [3]:
data["X_train_1"][0]

{'aspect_ratio': 1.0,
 'b_accum_err': 0.017017211562164382,
 'b_mean': 164.05480625,
 'file_size': 94648,
 'g_accum_err': 0.017224911024578352,
 'g_mean': 163.66376875,
 'h': 800,
 'path': '../data/stage3_imgs/70691_65.jpg',
 'pix_pb': 6.761896712027724,
 'pixels': 640000,
 'r_accum_err': 0.01741619515134654,
 'r_mean': 166.2122390625,
 'w': 800}

In [4]:
data["X_train_1"][0].keys()

dict_keys(['path', 'h', 'w', 'pixels', 'aspect_ratio', 'file_size', 'pix_pb', 'r_mean', 'g_mean', 'b_mean', 'r_accum_err', 'g_accum_err', 'b_accum_err'])

In [5]:
X_train = pd.DataFrame(data["X_train_1"], columns=['h', 'w', 'pixels', 'aspect_ratio', 'file_size', 'pix_pb', 'r_mean', 'g_mean', 'b_mean', 'r_accum_err', 'g_accum_err', 'b_accum_err'])
X_test = pd.DataFrame(data["X_test_1"], columns=['h', 'w', 'pixels', 'aspect_ratio', 'file_size', 'pix_pb', 'r_mean', 'g_mean', 'b_mean', 'r_accum_err', 'g_accum_err', 'b_accum_err'])
y_train = np.array(data["y_train_1"])
y_test = np.array(data["y_test_1"])

In [6]:
X_train.head()

Unnamed: 0,h,w,pixels,aspect_ratio,file_size,pix_pb,r_mean,g_mean,b_mean,r_accum_err,g_accum_err,b_accum_err
0,800,800,640000,1.0,94648,6.761897,166.212239,163.663769,164.054806,0.017416,0.017225,0.017017
1,541,590,319190,1.090573,39796,8.020655,162.796294,158.073473,153.525897,0.006082,0.006002,0.006235
2,800,800,640000,1.0,54903,11.656922,211.940197,208.954892,207.897623,0.013972,0.015094,0.016104
3,498,750,373500,1.506024,45580,8.194384,142.680809,142.629668,144.688286,0.013406,0.011876,0.010733
4,480,480,230400,1.0,15351,15.008794,239.123312,239.176745,239.901237,0.026496,0.032382,0.037846


In [7]:
y_train[0:5]

array([65, 48, 65, 48, 44])

### Create a model

In [8]:
LR = 0.005
DECAY = 1e-4
DO = 0.15
# EPOCHS = 250
EPOCHS = 5
MINI_TRAINS = 10
BAT

# X_train
# zero base y_train
y_train -= 1
y_train = to_categorical(y_train, 128)

# X_test
# zero base y_test
y_test -= 1
y_test = to_categorical(y_test, 128)

_input = Input(shape=(12,))
x = Dense(256)(_input)
x = BatchNormalization()(x)
x = Dropout(DO)(x)
x = Activation("relu")(x)

x = Dense(512)(x)
x = BatchNormalization()(x)
x = Dropout(DO)(x)
x = Activation("relu")(x)

x = Dense(256)(x)
x = BatchNormalization()(x)
x = Dropout(DO)(x)
x = Activation("relu")(x)

output = Dense(128, activation='softmax')(x)
model = Model(inputs=_input, outputs=output)

OPTIMIZER = Adam(lr=LR, decay=DECAY)

model.compile(optimizer=OPTIMIZER, loss="categorical_crossentropy", metrics=["accuracy"])

for idx in range(MINI_TRAINS):
    temp_lr = LR / (2 * (idx + 1))
    
    K.set_value(OPTIMIZER.lr, temp_lr)
    
#     print("\nMini-Train: {:3d} Learning Rate: {:2.7f}".format((idx + 1), temp_lr))
    
    model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=EPOCHS, batch_size=1024, verbose=0)

    test_score = model.evaluate(X_test, y_test, batch_size=1024, verbose=0)
    train_score = model.evaluate(X_train, y_train, batch_size=BATCH, verbose=0)
    
    print("Mini-Train: {:3d} Test Accuracy: {:2.2f}% Learning Rate: {:2.7f}".format((idx + 1), score[1] * 100, temp_lr))
    
    model.save("../src/weights/model_raven.h5")

Mini-Train:   1 Test Accuracy: 7.71% Learning Rate: 0.0025000
Mini-Train:   2 Test Accuracy: 8.56% Learning Rate: 0.0012500
Mini-Train:   3 Test Accuracy: 8.89% Learning Rate: 0.0008333
Mini-Train:   4 Test Accuracy: 9.22% Learning Rate: 0.0006250
Mini-Train:   5 Test Accuracy: 9.18% Learning Rate: 0.0005000
Mini-Train:   6 Test Accuracy: 9.27% Learning Rate: 0.0004167
Mini-Train:   7 Test Accuracy: 9.29% Learning Rate: 0.0003571
Mini-Train:   8 Test Accuracy: 9.25% Learning Rate: 0.0003125
Mini-Train:   9 Test Accuracy: 9.30% Learning Rate: 0.0002778
Mini-Train:  10 Test Accuracy: 9.34% Learning Rate: 0.0002500


In [70]:
from sklearn.linear_model import LogisticRegression

In [75]:
logit = LogisticRegression()

logit.fit(X_train, data_dict["y_train_1"])

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [76]:
score = logit.score(X_test, data_dict["y_test_1"])

print(score)

0.04146469278134416


In [78]:
logit2 = LogisticRegression(C=1e5)

logit2.fit(X_train, data_dict["y_train_1"])
score = logit2.score(X_test, data_dict["y_test_1"])

print(score)


0.04100615747412551
