In [1]:
%cd /home/slav/ai/claim-rank
!export PYTHONPATH=.
%load_ext autoreload
%autoreload 2

/home/slav/ai/claim-rank


In [2]:
from tempfile import NamedTemporaryFile
import numpy as np
np.random.seed(42) # ! before importing keras!

In [3]:
from keras.layers import Input, Embedding, Conv1D, Dense
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout
from keras import optimizers
from sklearn.metrics import average_precision_score

Using TensorFlow backend.


In [18]:
def load_crossvalidation_data(name='crossval'):
    folds = np.load('folds/' + name + '.npy')
    return folds;

In [5]:
def create_baseline_model(in_count, out_count):
    input_layer = Input(shape=(in_count,))
    x = Dense(100, kernel_initializer='normal', activation='relu')(input_layer)
    
    outputs = list(map(lambda _: Dense(1, kernel_initializer='normal', activation='sigmoid')(x), range(out_count)))

    model = Model(inputs=[input_layer], outputs=outputs)
                         
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model;

In [6]:
def run_model(X_train, X_test, train_target, test_targets, create_model = create_baseline_model, epochs=25):
    model = create_model(X_train.shape[1], 1)
    model.fit(X_train, train_target, epochs=epochs, batch_size=550, verbose=0)
    return [average_precision_score(test_targets[j], model.predict(X_test)[:, 0])
         for j in range(len(test_targets))]
    

In [7]:
def run_model_multiple(X_train, X_test, train_targets, test_targets, log_indexes, create_model = create_baseline_model, epochs=50):
    model = create_model(X_train.shape[1], len(train_targets))
    model.fit(X_train, train_targets, epochs=epochs, batch_size=550, verbose=0)
    return [average_precision_score(test_targets[j], model.predict(X_test)[i][:, 0])
            for j, i in enumerate(log_indexes)]

In [8]:
def cross_validate(folds, train_targets, test_targets):
    models_sums = [0 for _ in test_targets]
    for fold in folds:
        X_train, X_test, y_train, y_test = fold
        y_train = list(y_train[:, train_targets].T)
        y_test = y_test[:, test_targets].T
        models = run_model(X_train, X_test, y_train, y_test)
        models_sums = [prev + curr for prev, curr in zip(models_sums, models)]

    print(np.array(models_sums) / len(folds))

In [9]:
def cross_validate_multiple(folds, train_targets, test_targets, log_indexes):
    models_sums = [0 for _ in test_targets]
    for fold in folds:
        X_train, X_test, y_train, y_test = fold
        y_train = list(y_train[:, train_targets].T)
        y_test = y_test[:, test_targets].T
        models = run_model_multiple(X_train, X_test, y_train, y_test, log_indexes)
        models_sums = [prev + curr for prev, curr in zip(models_sums, models)]

    print(np.array(models_sums) / len(folds))

In [10]:
folds = load_crossvalidation_data(name='experimental_no_val')


In [15]:
cross_validate(folds, [6], [6])

[ 0.20898874]


In [16]:
for i in range(1,10):
    cross_validate(folds, [i], [i])

[ 0.07586703]
[ 0.06432117]
[ 0.06344388]
[ 0.09402855]
[ 0.16960528]
[ 0.21435139]
[ 0.14386683]
[ 0.13278658]
[ 0.07033692]


In [17]:
cross_validate_multiple(folds, range(10), range(1, 10), range(1, 10))

[ 0.08047899  0.08417372  0.08295415  0.06489015  0.18148426  0.21316629
  0.12396332  0.15537306  0.0925475 ]


In [14]:
cross_validate_multiple(folds, [0, 6], [0, 6], [0, 1])

[ 0.38199638  0.21197333]


In [13]:
cross_validate_multiple(folds, range(0, 10), [0, 6], [0, 6])

[ 0.39816277  0.21093241]
