In [2]:
from sklearn.model_selection import train_test_split
import json
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

In [3]:
with open('records-merged.json') as file:
    sessions = json.load(file)

with open('buys2.json') as file:
    buys = json.load(file)


In [4]:
features = pd.DataFrame(sessions)
labels = pd.DataFrame(buys)

# split to train and test
RANDOM_STATE = 55
TEST_SIZE = 0.25

X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                    test_size=TEST_SIZE,
                                                    shuffle=False,
                                                    random_state=RANDOM_STATE)

In [5]:
# FIT TRANSFORM
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


attrs_count = X_train.shape[1]

In [6]:
from sklearn.model_selection import GridSearchCV

def create_model(units1, units2):
    model = keras.Sequential()
    model.add(layers.Dense(units1, activation='relu', input_shape=(attrs_count,)))
    model.add(layers.Dense(units2, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    return model

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
kerasClf = KerasClassifier(build_fn=create_model,batch_size=25)

# Set the parameters by cross-validation
tuned_parameters = [{
    'units1': [100,120],
    'units2': [10,20,30],
    }]


clf = GridSearchCV(kerasClf, tuned_parameters, cv=5)
clf.fit(X_train, y_train)

print("Best Hyperparameters found are:")
print(clf.best_params_)

Best Hyperparameters found are:
{'units1': 100, 'units2': 20}


In [7]:
y_pred = clf.predict(X_test)
y_pred = [x[0] for x in y_pred]

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [8]:
print("Balanced accuracy score: ")
print(balanced_accuracy_score(y_test.values.tolist(), y_pred))

Balanced accuracy score: 
0.5991573185707978


In [9]:
print(confusion_matrix(y_test, y_pred))
print(confusion_matrix(y_test, [0 for _ in range(len(y_test))]))

[[11127    55]
 [  345    88]]
[[11182     0]
 [  433     0]]


In [10]:
par = clf.best_params_
model = create_model(par['units1'], par['units2'])
model.fit(X_train, y_train)
model.save('model.h5')

