# Training a Classifier on the *Salammbô* Dataset with Keras
Author: Pierre Nugues

We first need to import some modules

In [17]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

### Reading the dataset
We can read the data from a file with the svmlight format or directly create numpy arrays

In [18]:
X = np.array(
    [[35680, 2217], [42514, 2761], [15162, 990], [35298, 2274],
     [29800, 1865], [40255, 2606], [74532, 4805], [37464, 2396],
     [31030, 1993], [24843, 1627], [36172, 2375], [39552, 2560],
     [72545, 4597], [75352, 4871], [18031, 1119], [36961, 2503],
     [43621, 2992], [15694, 1042], [36231, 2487], [29945, 2014],
     [40588, 2805], [75255, 5062], [37709, 2643], [30899, 2126],
     [25486, 1784], [37497, 2641], [40398, 2766], [74105, 5047],
     [76725, 5312], [18317, 1215]
     ])

y = np.array(
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

## Scaling the Data
Scaling and normalizing are usually very significant with neural networks. We use sklean transformers. They consist of two main methods: `fit()` and `transform()`.

### Normalizing

In [19]:
from sklearn.preprocessing import Normalizer
normalizer = Normalizer()
normalizer.fit(X)
X_norm = normalizer.transform(X)
X_norm[:4]

array([[0.99807515, 0.06201605],
       [0.99789783, 0.06480679],
       [0.99787509, 0.06515607],
       [0.99793128, 0.06428964]])

### Standardizing

In [20]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
scaler.fit(X_norm)
X_scaled = scaler.transform(X_norm)
X_scaled[:4]

array([[ 1.68336574, -1.7197772 ],
       [ 0.57376529, -0.56145427],
       [ 0.43143908, -0.41648279],
       [ 0.78308579, -0.77610221]])

## Creating a Model

We set a seed to have reproducible results

In [21]:
np.random.seed(1337)

We create a classifier equivalent to a logistic regression 

In [22]:
model = keras.Sequential([
        layers.Dense(1, activation='sigmoid')
    ])

Or with one hidden layer

In [23]:
model2 = keras.Sequential([
        layers.Dense(10, activation='relu'),
        # layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')
    ])

To try the network with one hidden layer, set `complex` to true

In [24]:
complex = True
if complex == True:
    model = model2

## Fitting the Model

We compile and fit the model

In [25]:
model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
model.fit(X_scaled, y, epochs=30, batch_size=1, verbose=False)

<keras.callbacks.History at 0x7fe2304e1a00>

### The weights

In [26]:
model.get_weights()

[array([[-0.7965565 , -0.44522706, -0.35968745,  0.6257505 , -0.8306477 ,
          0.93574774, -0.05856686, -0.1642317 ,  0.4930141 ,  0.0694484 ],
        [ 0.8604017 , -0.10907561, -0.07557078,  0.65000653,  0.71487164,
         -1.0163488 , -0.4290638 , -0.22531436,  0.00753574, -0.46189457]],
       dtype=float32),
 array([ 0.44895697, -0.21531136,  0.14202812, -0.02591587,  0.5783034 ,
         0.32023472, -0.23404327, -0.11557139,  0.07403742, -0.16078186],
       dtype=float32),
 array([[ 9.4838035e-01],
        [-6.1242515e-01],
        [ 4.1775838e-01],
        [-3.9887774e-01],
        [ 1.2532274e+00],
        [-1.1199154e+00],
        [ 1.5655701e-01],
        [ 3.1844330e-01],
        [-2.7212921e-01],
        [-6.4928769e-05]], dtype=float32),
 array([-0.4107972], dtype=float32)]

## Prediction

We compute the probabilities to belong to class 1 for all the training set

In [27]:
y_pred_proba = model.predict(X_scaled, batch_size=1)
y_pred_proba[:4]



array([[0.0092844 ],
       [0.10853826],
       [0.14614746],
       [0.06986432]], dtype=float32)

We recompute it with matrices

In [28]:
from tensorflow.keras.activations import sigmoid, relu
if complex:
    print(sigmoid((relu(X_scaled@model.get_weights()[0] + model.get_weights()[1]))@model.get_weights()[2] + model.get_weights()[3])[:4])
else:
    print(sigmoid((X_scaled@model.get_weights()[0] + model.get_weights()[1]))[:4])

tf.Tensor(
[[0.0092844 ]
 [0.10853825]
 [0.14614747]
 [0.06986433]], shape=(4, 1), dtype=float64)


## Evaluation

In [29]:
from sklearn.metrics import accuracy_score

In [30]:
def predict_class(preds):
    c = []
    for x in range(len(preds)):
        if(preds[x] >= 0.5):
            c += [1]
        else:
            c += [0]
    return np.array(c)

In [31]:
y_pred = predict_class(y_pred_proba)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1])

In [32]:
accuracy_score(y, y_pred)

1.0

We computed the accuracy from the training set. This is not a good practice. We should use a dedicated test set instead.