In [1]:
import pandas as pd
import numpy as np

%matplotlib inline

In [2]:
data = pd.read_csv("../../data/processed/pagos_por_unidad.csv",
                  dtype = {
                                "unidad_id": "object",
                            },
                  parse_dates = ["expensa_mes_pago"])

In [3]:
pagos_test = data.loc[data.expensa_mes_pago >= "2018-09-01"]
pagos_cv = data.loc[(data.expensa_mes_pago < "2018-09-01") & (data.expensa_mes_pago >= "2018-06-01")]
pagos_train = data.loc[data.expensa_mes_pago < "2018-06-01"]

pagos_train_cv = data.loc[data.expensa_mes_pago < "2018-09-01"]

X_train = pagos_train.drop(columns=["unidad_id","expensa_mes_pago", "target"])
Y_train = pagos_train.target
X_cv = pagos_cv.drop(columns=["unidad_id","expensa_mes_pago", "target"])
Y_cv = pagos_cv.target
X_test = pagos_test.drop(columns=["unidad_id","expensa_mes_pago", "target"])
Y_test = pagos_test.target

print(X_train.shape, Y_train.shape)
print(X_cv.shape, Y_cv.shape)
print(X_test.shape, Y_test.shape)

(108621, 6) (108621,)
(23226, 6) (23226,)
(37108, 6) (37108,)


In [4]:
words_dict = {
    "Impago":0,
    "Internet":1,
    "EntePago":2,
    "Efec-Cheque":3,
    "Otro":4,
    "NS/NC":5
}

def transform_values(X_i):
    for column in X_i.columns:
        X_i[column] = X_i[column].map(lambda x: words_dict[x])
    return X_i

In [5]:
X_train = transform_values(X_train)
X_cv = transform_values(X_cv)
X_test = transform_values(X_test)

In [6]:
print(X_train.shape, Y_train.shape)
print(X_cv.shape, Y_cv.shape)
print(X_test.shape, Y_test.shape)

(108621, 6) (108621,)
(23226, 6) (23226,)
(37108, 6) (37108,)


In [7]:
from keras import backend as K

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

Using TensorFlow backend.


In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def get_train_test_meassures(model, X_train, Y_train, X_test, Y_test):
    train_preds = model.predict(X_train)
    print("accuracy:", accuracy_score(Y_train, train_preds))
    print("precision:", precision_score(Y_train, train_preds))
    print("recall:", recall_score(Y_train, train_preds))
    print("f1:", f1_score(Y_train, train_preds))
    print("--------------------------------------------------")
    test_preds = model.predict(X_test)
    print("accuracy:", accuracy_score(Y_test, test_preds))
    print("precision:", precision_score(Y_test, test_preds))
    print("recall:", recall_score(Y_test, test_preds))
    print("f1:", f1_score(Y_test, test_preds))

In [8]:
from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
from keras.models import Sequential
from keras.layers import LSTM, Embedding

def create_model(layer1=10, layer2=10, layer3=10, dropout=0.1):
    model = Sequential()
    model.add(Embedding(input_dim=6, output_dim=6, input_length=6))
    model.add(Flatten())
    model.add(Dense(layer1, activation="relu"))
    model.add(Dropout(dropout))
    model.add(Dense(layer2, activation="relu"))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["acc",f1])
    return model

In [9]:
data = data.sort_values("expensa_mes_pago").reset_index().drop(columns="index")

In [10]:
min_cv_id = data.loc[data.expensa_mes_pago == "2018-06-01", "expensa_mes_pago"].idxmin()
min_test_id = data.loc[data.expensa_mes_pago == "2018-09-01", "expensa_mes_pago"].idxmin()

X_train_cv = pagos_train_cv.drop(columns=["unidad_id","expensa_mes_pago", "target"])
Y_train_cv = pagos_train_cv.target
X_train_cv = transform_values(X_train_cv)
print(X_train_cv.shape, Y_train_cv.shape)

(131847, 6) (131847,)


In [11]:
from keras.wrappers.scikit_learn import KerasClassifier
test = KerasClassifier(build_fn=create_model, verbose=5, layer1=10, layer2=10, layer3=10, dropout=0.1, epochs=3, batch_size = 128)

In [12]:
test.fit(X_train_cv, Y_train_cv)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x26609e9f5c0>

In [16]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "layer1":[10,20]
}

neural_network = GridSearchCV(
    estimator = KerasClassifier(build_fn=create_model, verbose=5),
    param_grid = param_grid,
    scoring = 'f1',
    n_jobs = 1,
    cv = [(list(range(0,min_cv_id)), list(range(min_cv_id,min_test_id)))],
    return_train_score = True,
    error_score = 0,
    verbose = 5
)


In [18]:
neural_network.fit(X_train_cv.values, Y_train_cv.values)

Fitting 1 folds for each of 2 candidates, totalling 2 fits
[CV] layer1=10 .......................................................
Epoch 1/1
[CV] .............. layer1=10, score=0.7479874988161758, total=   6.2s
[CV] layer1=20 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.5s remaining:    0.0s


Epoch 1/1
[CV] .............. layer1=20, score=0.7412767644726407, total=   6.4s


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   15.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   15.4s finished


Epoch 1/1


GridSearchCV(cv=[([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, ...4, 131835, 131836, 131837, 131838, 131839, 131840, 131841, 131842, 131843, 131844, 131845, 131846])],
       error_score=0,
       estimator=<keras.wrappers.scikit_learn.KerasClassifier object at 0x00000266146137B8>,
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'layer1': [10, 20]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score=True, scoring='f1', verbose=5)

In [20]:
neural_network.best_score_

0.7479874988161758

In [24]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from keras.wrappers.scikit_learn import KerasClassifier

search_spaces = {
    "layer1": Integer(1,100),
    "layer2": Integer(1,100),
    "layer3": Integer(1,100),
    "dropout": Real(0,1),
    "epochs": Integer(5,50),
    "batch_size": Integer(50, 10000)
}

neural_network = BayesSearchCV(
    estimator = KerasClassifier(build_fn=create_model, verbose=5),
    search_spaces = search_spaces,
    n_iter = 30,
    scoring = 'f1',
    n_jobs = 1,
    cv = [(list(range(0,min_cv_id)), list(range(min_cv_id,min_test_id)))],
    return_train_score = True,
    error_score = 0,
    random_state = 42,
    verbose = 5
)

In [25]:
neural_network.fit(X_train_cv.values, Y_train_cv.values)

Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=4131, dropout=0.7277257431773251, epochs=47, layer1=32, layer2=67, layer3=42 
Epoch 1/47
Epoch 2/47
Epoch 3/47
Epoch 4/47
Epoch 5/47
Epoch 6/47
Epoch 7/47
Epoch 8/47
Epoch 9/47
Epoch 10/47
Epoch 11/47
Epoch 12/47
Epoch 13/47
Epoch 14/47
Epoch 15/47
Epoch 16/47
Epoch 17/47
Epoch 18/47
Epoch 19/47
Epoch 20/47
Epoch 21/47
Epoch 22/47
Epoch 23/47
Epoch 24/47
Epoch 25/47
Epoch 26/47
Epoch 27/47
Epoch 28/47
Epoch 29/47
Epoch 30/47
Epoch 31/47
Epoch 32/47
Epoch 33/47
Epoch 34/47
Epoch 35/47
Epoch 36/47
Epoch 37/47
Epoch 38/47
Epoch 39/47
Epoch 40/47
Epoch 41/47
Epoch 42/47
Epoch 43/47
Epoch 44/47
Epoch 45/47
Epoch 46/47
Epoch 47/47
[CV]  batch_size=4131, dropout=0.7277257431773251, epochs=47, layer1=32, layer2=67, layer3=42, score=0.747821034282394, total=  15.7s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=8382, dropout=0.8833152773808622, epochs=19, layer1=95, layer2=87, layer3=7 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.8s finished


Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
[CV]  batch_size=8382, dropout=0.8833152773808622, epochs=19, layer1=95, layer2=87, layer3=7, score=0.7461493468512381, total=   9.1s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=4476, dropout=0.9187225216693771, epochs=10, layer1=44, layer2=20, layer3=46 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.2s finished


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  batch_size=4476, dropout=0.9187225216693771, epochs=10, layer1=44, layer2=20, layer3=46, score=0.74293059125964, total=   3.9s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=8133, dropout=0.1718715613965607, epochs=32, layer1=80, layer2=53, layer3=10 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.0s finished


Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
[CV]  batch_size=8133, dropout=0.1718715613965607, epochs=32, layer1=80, layer2=53, layer3=10, score=0.7405707443624027, total=  12.3s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=8006, dropout=0.43802918644092337, epochs=29, layer1=72, layer2=90, layer3=72 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.4s finished


Epoch 1/29
Epoch 2/29
Epoch 3/29
Epoch 4/29
Epoch 5/29
Epoch 6/29
Epoch 7/29
Epoch 8/29
Epoch 9/29
Epoch 10/29
Epoch 11/29
Epoch 12/29
Epoch 13/29
Epoch 14/29
Epoch 15/29
Epoch 16/29
Epoch 17/29
Epoch 18/29
Epoch 19/29
Epoch 20/29
Epoch 21/29
Epoch 22/29
Epoch 23/29
Epoch 24/29
Epoch 25/29
Epoch 26/29
Epoch 27/29
Epoch 28/29
Epoch 29/29
[CV]  batch_size=8006, dropout=0.43802918644092337, epochs=29, layer1=72, layer2=90, layer3=72, score=0.7374470659407139, total=  12.4s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=7354, dropout=0.9393697376027718, epochs=12, layer1=20, layer2=80, layer3=38 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.5s finished


Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
[CV]  batch_size=7354, dropout=0.9393697376027718, epochs=12, layer1=20, layer2=80, layer3=38, score=0.6146481570129249, total=   4.8s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=6190, dropout=0.7753643788278826, epochs=21, layer1=88, layer2=60, layer3=60 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s finished


Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
[CV]  batch_size=6190, dropout=0.7753643788278826, epochs=21, layer1=88, layer2=60, layer3=60, score=0.7458791208791209, total=   9.0s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=5457, dropout=0.9200092264373164, epochs=27, layer1=84, layer2=33, layer3=6 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.1s finished


Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27
[CV]  batch_size=5457, dropout=0.9200092264373164, epochs=27, layer1=84, layer2=33, layer3=6, score=0.739252711932503, total=  10.0s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=9557, dropout=0.700621444621366, epochs=44, layer1=42, layer2=39, layer3=22 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.1s finished


Epoch 1/44
Epoch 2/44
Epoch 3/44
Epoch 4/44
Epoch 5/44
Epoch 6/44
Epoch 7/44
Epoch 8/44
Epoch 9/44
Epoch 10/44
Epoch 11/44
Epoch 12/44
Epoch 13/44
Epoch 14/44
Epoch 15/44
Epoch 16/44
Epoch 17/44
Epoch 18/44
Epoch 19/44
Epoch 20/44
Epoch 21/44
Epoch 22/44
Epoch 23/44
Epoch 24/44
Epoch 25/44
Epoch 26/44
Epoch 27/44
Epoch 28/44
Epoch 29/44
Epoch 30/44
Epoch 31/44
Epoch 32/44
Epoch 33/44
Epoch 34/44
Epoch 35/44
Epoch 36/44
Epoch 37/44
Epoch 38/44
Epoch 39/44
Epoch 40/44
Epoch 41/44
Epoch 42/44
Epoch 43/44
Epoch 44/44
[CV]  batch_size=9557, dropout=0.700621444621366, epochs=44, layer1=42, layer2=39, layer3=22, score=0.7451018808777429, total=  12.4s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.5s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=86, dropout=0.8178300196637516, epochs=38, layer1=26, layer2=58, layer3=28 
Epoch 1/38
Epoch 2/38
Epoch 3/38
Epoch 4/38
Epoch 5/38
Epoch 6/38
Epoch 7/38
Epoch 8/38
Epoch 9/38
Epoch 10/38
Epoch 11/38
Epoch 12/38
Epoch 13/38
Epoch 14/38
Epoch 15/38
Epoch 16/38
Epoch 17/38
Epoch 18/38
Epoch 19/38
Epoch 20/38
Epoch 21/38
Epoch 22/38
Epoch 23/38
Epoch 24/38
Epoch 25/38
Epoch 26/38
Epoch 27/38
Epoch 28/38
Epoch 29/38
Epoch 30/38
Epoch 31/38
Epoch 32/38
Epoch 33/38
Epoch 34/38
Epoch 35/38
Epoch 36/38
Epoch 37/38
Epoch 38/38
[CV]  batch_size=86, dropout=0.8178300196637516, epochs=38, layer1=26, layer2=58, layer3=28, score=0.7375588264744167, total= 3.4min


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.5min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.5min finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=50, dropout=0.0, epochs=50, layer1=50, layer2=1, layer3=100 
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[CV]  batch_size=50, dropout=0.0, epochs=50, layer1=50, layer2=1, layer3=100, score=0.7405437352245863, total= 4.8min


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.9min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.9min finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=50, dropout=0.0, epochs=50, layer1=100, layer2=100, layer3=100 
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[CV]  batch_size=50, dropout=0.0, epochs=50, layer1=100, layer2=100, layer3=100, score=0.7372898120672602, total= 6.1min


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  6.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  6.2min finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=50, dropout=0.0, epochs=5, layer1=64, layer2=1, layer3=1 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV]  batch_size=50, dropout=0.0, epochs=5, layer1=64, layer2=1, layer3=1, score=0.747839596077289, total=  36.4s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   39.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   39.0s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=50, dropout=0.41551467932019337, epochs=50, layer1=1, layer2=1, layer3=5 
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


  'precision', 'predicted', average, warn_for)


[CV]  batch_size=50, dropout=0.41551467932019337, epochs=50, layer1=1, layer2=1, layer3=5, score=0.0, total= 3.3min


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.3min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.3min finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=10000, dropout=1.0, epochs=50, layer1=59, layer2=100, layer3=1 
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[CV]  batch_size=10000, dropout=1.0, epochs=50, layer1=59, layer2=100, layer3=1, score=0.7442868400315209, total=  10.4s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.5s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=1609, dropout=0.0, epochs=5, layer1=30, layer2=1, layer3=1 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV]  batch_size=1609, dropout=0.0, epochs=5, layer1=30, layer2=1, layer3=1, score=0.7403092990560353, total=   3.4s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.5s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=8916, dropout=0.4420849333686497, epochs=15, layer1=88, layer2=53, layer3=80 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[CV]  batch_size=8916, dropout=0.4420849333686497, epochs=15, layer1=88, layer2=53, layer3=80, score=0.7459794613447007, total=   7.6s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.7s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=9943, dropout=0.7516402721552441, epochs=25, layer1=11, layer2=29, layer3=88 
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[CV]  batch_size=9943, dropout=0.7516402721552441, epochs=25, layer1=11, layer2=29, layer3=88, score=0.7496896189475695, total=   6.2s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.2s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=3923, dropout=0.7626977018348454, epochs=30, layer1=26, layer2=58, layer3=75 
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[CV]  batch_size=3923, dropout=0.7626977018348454, epochs=30, layer1=26, layer2=58, layer3=75, score=0.7488124643739312, total=  11.1s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   11.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   11.2s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=10000, dropout=0.790757178956179, epochs=50, layer1=100, layer2=100, layer3=1 
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[CV]  batch_size=10000, dropout=0.790757178956179, epochs=50, layer1=100, layer2=100, layer3=1, score=0.7402170666135617, total=  25.6s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   25.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   25.7s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=8653, dropout=0.8575433267950109, epochs=28, layer1=100, layer2=92, layer3=1 
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epoch 20/28
Epoch 21/28
Epoch 22/28
Epoch 23/28
Epoch 24/28
Epoch 25/28
Epoch 26/28
Epoch 27/28
Epoch 28/28
[CV]  batch_size=8653, dropout=0.8575433267950109, epochs=28, layer1=100, layer2=92, layer3=1, score=0.7497632127296836, total=  14.6s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   14.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   14.7s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=3377, dropout=0.9086365121826798, epochs=21, layer1=72, layer2=1, layer3=12 
Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.6s finished


[CV]  batch_size=3377, dropout=0.9086365121826798, epochs=21, layer1=72, layer2=1, layer3=12, score=0.0, total=   8.6s
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=1559, dropout=0.0, epochs=5, layer1=100, layer2=26, layer3=1 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV]  batch_size=1559, dropout=0.0, epochs=5, layer1=100, layer2=26, layer3=1, score=0.744282334384858, total=   4.1s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.2s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=8032, dropout=0.9400010453362124, epochs=10, layer1=59, layer2=23, layer3=16 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  batch_size=8032, dropout=0.9400010453362124, epochs=10, layer1=59, layer2=23, layer3=16, score=0.6701463746737774, total=   4.8s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=7602, dropout=0.6691337989630649, epochs=31, layer1=1, layer2=30, layer3=93 
Epoch 1/31
Epoch 2/31
Epoch 3/31
Epoch 4/31
Epoch 5/31
Epoch 6/31
Epoch 7/31
Epoch 8/31
Epoch 9/31
Epoch 10/31
Epoch 11/31
Epoch 12/31
Epoch 13/31
Epoch 14/31
Epoch 15/31
Epoch 16/31
Epoch 17/31
Epoch 18/31
Epoch 19/31
Epoch 20/31
Epoch 21/31
Epoch 22/31
Epoch 23/31
Epoch 24/31
Epoch 25/31
Epoch 26/31
Epoch 27/31
Epoch 28/31
Epoch 29/31
Epoch 30/31
Epoch 31/31
[CV]  batch_size=7602, dropout=0.6691337989630649, epochs=31, layer1=1, layer2=30, layer3=93, score=0.5312701482914249, total=   7.7s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.8s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=5767, dropout=0.8202974234125167, epochs=35, layer1=100, layer2=39, layer3=98 
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
[CV]  batch_size=5767, dropout=0.8202974234125167, epochs=35, layer1=100, layer2=39, layer3=98, score=0.7359008230870847, total=  15.9s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   16.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   16.0s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=10000, dropout=0.7765338192021353, epochs=24, layer1=13, layer2=29, layer3=81 
Epoch 1/24
Epoch 2/24
Epoch 3/24
Epoch 4/24
Epoch 5/24
Epoch 6/24
Epoch 7/24
Epoch 8/24
Epoch 9/24
Epoch 10/24
Epoch 11/24
Epoch 12/24
Epoch 13/24
Epoch 14/24
Epoch 15/24
Epoch 16/24
Epoch 17/24
Epoch 18/24
Epoch 19/24
Epoch 20/24
Epoch 21/24
Epoch 22/24
Epoch 23/24
Epoch 24/24
[CV]  batch_size=10000, dropout=0.7765338192021353, epochs=24, layer1=13, layer2=29, layer3=81, score=0.7440855992932169, total=   6.5s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.6s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=5924, dropout=0.629900496028121, epochs=29, layer1=10, layer2=58, layer3=34 
Epoch 1/29
Epoch 2/29
Epoch 3/29
Epoch 4/29
Epoch 5/29
Epoch 6/29
Epoch 7/29
Epoch 8/29
Epoch 9/29
Epoch 10/29
Epoch 11/29
Epoch 12/29
Epoch 13/29
Epoch 14/29
Epoch 15/29
Epoch 16/29
Epoch 17/29
Epoch 18/29
Epoch 19/29
Epoch 20/29
Epoch 21/29
Epoch 22/29
Epoch 23/29
Epoch 24/29
Epoch 25/29
Epoch 26/29
Epoch 27/29
Epoch 28/29
Epoch 29/29
[CV]  batch_size=5924, dropout=0.629900496028121, epochs=29, layer1=10, layer2=58, layer3=34, score=0.7507658033973822, total=   9.9s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.9s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=9142, dropout=0.5166423966850628, epochs=20, layer1=12, layer2=28, layer3=42 
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV]  batch_size=9142, dropout=0.5166423966850628, epochs=20, layer1=12, layer2=28, layer3=42, score=0.750741289844329, total=   6.1s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.2s finished


Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV] batch_size=5630, dropout=0.6622499588708453, epochs=10, layer1=55, layer2=29, layer3=77 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV]  batch_size=5630, dropout=0.6622499588708453, epochs=10, layer1=55, layer2=29, layer3=77, score=0.7473330129745316, total=   5.6s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.6s finished


Epoch 1/29
Epoch 2/29
Epoch 3/29
Epoch 4/29
Epoch 5/29
Epoch 6/29
Epoch 7/29
Epoch 8/29
Epoch 9/29
Epoch 10/29
Epoch 11/29
Epoch 12/29
Epoch 13/29
Epoch 14/29
Epoch 15/29
Epoch 16/29
Epoch 17/29
Epoch 18/29
Epoch 19/29
Epoch 20/29
Epoch 21/29
Epoch 22/29
Epoch 23/29
Epoch 24/29
Epoch 25/29
Epoch 26/29
Epoch 27/29
Epoch 28/29
Epoch 29/29


BayesSearchCV(cv=[([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, ...4, 131835, 131836, 131837, 131838, 131839, 131840, 131841, 131842, 131843, 131844, 131845, 131846])],
       error_score=0,
       estimator=<keras.wrappers.scikit_learn.KerasClassifier object at 0x000002662FF046A0>,
       fit_params=None, iid=True, n_iter=30, n_jobs=1, n_points=1,
       optimizer_kwargs=None, pre_dispatch='2*n_jobs', random_state=42,
       refit=True, return_train_score=True, scoring='f1',
       search_spaces={'layer1': Integer(low=1, high=100), 'layer2': Integer(low=1, high=100), 'layer3': Integer(low=1, high=100), 'dropout': Real(low=0, high=1, prior='uniform', transform='identity'), 'epochs': Integer(low=5, high=50), 'batch_size': Integer(low=50, hig

In [27]:
neural_network.best_params_

{'batch_size': 5924,
 'dropout': 0.629900496028121,
 'epochs': 29,
 'layer1': 10,
 'layer2': 58,
 'layer3': 34}

In [37]:
get_train_test_meassures(neural_network, X_train_cv, Y_train_cv, X_test, Y_test)

accuracy: 0.8797924867460011
precision: 0.7943632875903792
recall: 0.7519135147214928
f1: 0.7725557166023277
--------------------------------------------------
accuracy: 0.8807534763393339
precision: 0.7660174269605331
recall: 0.6969802961408418
f1: 0.729869971308223


In [51]:
from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Conv1D, MaxPool1D

model = Sequential()
model.add(Embedding(input_dim=6, output_dim=6, input_length=6))
model.add(Conv1D(filters=10, kernel_size=3))
model.add(MaxPool1D(2))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["acc",f1])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_56 (Embedding)     (None, 6, 6)              36        
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 4, 10)             190       
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 2, 10)             0         
_________________________________________________________________
flatten_40 (Flatten)         (None, 20)                0         
_________________________________________________________________
dense_117 (Dense)            (None, 1)                 21        
Total params: 247
Trainable params: 247
Non-trainable params: 0
_________________________________________________________________


In [52]:
model.fit(X_train, Y_train, batch_size=128, validation_data=(X_cv, Y_cv), epochs=10)

Train on 108621 samples, validate on 23226 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x266787477f0>

In [56]:
def get_train_test_meassures2(model, X_train, Y_train, X_test, Y_test):
    train_preds = model.predict_classes(X_train)
    print("accuracy:", accuracy_score(Y_train, train_preds))
    print("precision:", precision_score(Y_train, train_preds))
    print("recall:", recall_score(Y_train, train_preds))
    print("f1:", f1_score(Y_train, train_preds))
    print("--------------------------------------------------")
    test_preds = model.predict_classes(X_test)
    print("accuracy:", accuracy_score(Y_test, test_preds))
    print("precision:", precision_score(Y_test, test_preds))
    print("recall:", recall_score(Y_test, test_preds))
    print("f1:", f1_score(Y_test, test_preds))

In [57]:
get_train_test_meassures2(model, X_train_cv, Y_train_cv, X_test, Y_test)

accuracy: 0.8804523424878837
precision: 0.8159455027122493
recall: 0.7227219397731717
f1: 0.7665096435872367
--------------------------------------------------
accuracy: 0.8809421149078366
precision: 0.7867089480215084
recall: 0.6652675760755509
f1: 0.7209096651926722
