# Training a Classifier on the *Salammbô* Dataset with Keras
Author: Pierre Nugues

We use three classes: French, English, and German

We first need to import some modules

In [25]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

### Reading the dataset
We can read the data from a file with the svmlight format or directly create numpy arrays

In [26]:
X = np.array(
    [[35680, 2217], [42514, 2761], [15162, 990], [35298, 2274],
     [29800, 1865], [40255, 2606], [74532, 4805], [37464, 2396],
     [31030, 1993], [24843, 1627], [36172, 2375], [39552, 2560],
     [72545, 4597], [75352, 4871], [18031, 1119], [36961, 2503],
     [43621, 2992], [15694, 1042], [36231, 2487], [29945, 2014],
     [40588, 2805], [75255, 5062], [37709, 2643], [30899, 2126],
     [25486, 1784], [37497, 2641], [40398, 2766], [74105, 5047],
     [76725, 5312], [18317, 1215]
     ])

y = np.array(
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

We add German data and we adjust `y`

In [27]:
X_de = np.array(
    [[37599, 1771], [44565, 2116], [16156, 715], [37697, 1804],
     [29800, 1865], [42606, 2146], [78242, 3813], [40341, 1955],
     [31030, 1993], [26676, 1346], [39250, 1902], [41780, 2106],
     [72545, 4597], [79195, 3988], [19020, 928]
     ])

X = np.vstack((X, X_de))

y = np.array(
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

## Scaling the Data
Scaling and normalizing are usually very significant with neural networks. We use sklean transformers. They consist of two main methods: `fit()` and `transform()`.

### Normalizing

In [28]:
from sklearn.preprocessing import Normalizer
normalizer = Normalizer()
X_norm = normalizer.fit_transform(X)
X_norm[:4]

array([[0.998, 0.062],
       [0.998, 0.065],
       [0.998, 0.065],
       [0.998, 0.064]])

### Standardizing

In [29]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
X_scaled = scaler.fit_transform(X_norm)
X_scaled[:4]

array([[-0.031,  0.094],
       [-0.413,  0.442],
       [-0.462,  0.486],
       [-0.341,  0.378]])

In [30]:
Y_cat = keras.utils.to_categorical(y)
Y_cat[:4]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

## Creating a Model

We set a seed to have reproducible results

In [31]:
np.random.seed(1337)

We create a classifier equivalent to a logistic regression with `softmax`

In [32]:
model = keras.Sequential([
        layers.Dense(3, activation='softmax')
    ])

Or with one hidden layer

In [33]:
model2 = keras.Sequential([
        layers.Dense(10, activation='relu'),
        # layers.Dropout(0.5),
        layers.Dense(3, activation='softmax')
    ])

To try the network with one hidden layer, set `complex` to true

In [34]:
complex = True
if complex == True:
    model = model2

## Fitting the Model

We compile and fit the model

In [35]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
model.fit(X_scaled, Y_cat, epochs=30, batch_size=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fe73ca7fc40>

### The weights

In [36]:
model.get_weights()

[array([[ 0.556,  0.675, -1.261, -0.44 , -0.341, -0.449,  0.075, -0.416,
          0.775, -0.126],
        [ 0.188,  0.056,  1.065, -0.406,  0.383,  0.059,  0.178, -0.213,
         -0.737, -0.013]], dtype=float32),
 array([-0.046,  0.657, -0.352, -0.068,  0.46 , -0.192, -0.116, -0.226,
         0.291,  0.15 ], dtype=float32),
 array([[-0.683,  0.56 ,  0.006],
        [ 0.384, -0.662,  0.439],
        [-0.495,  1.48 , -0.984],
        [-0.452, -0.361,  0.553],
        [ 0.435, -0.042, -0.605],
        [-0.606,  0.208,  0.527],
        [ 0.431, -0.568,  0.236],
        [-0.315, -0.372,  0.367],
        [-0.698, -0.585,  0.992],
        [ 0.344,  0.185, -0.004]], dtype=float32),
 array([ 0.578, -0.563, -0.015], dtype=float32)]

## Prediction
### Probabilities

We compute the probabilities to belong to the classes for all the training set

In [37]:
Y_pred_proba = model.predict(X_scaled)



In [38]:
np.set_printoptions(precision=3, suppress=True)
Y_pred_proba

array([[0.634, 0.081, 0.284],
       [0.594, 0.301, 0.105],
       [0.551, 0.357, 0.092],
       [0.644, 0.229, 0.128],
       [0.668, 0.086, 0.246],
       [0.617, 0.27 , 0.113],
       [0.64 , 0.234, 0.125],
       [0.673, 0.177, 0.151],
       [0.657, 0.206, 0.137],
       [0.524, 0.391, 0.084],
       [0.501, 0.421, 0.078],
       [0.618, 0.268, 0.114],
       [0.694, 0.125, 0.181],
       [0.626, 0.257, 0.117],
       [0.629, 0.08 , 0.291],
       [0.208, 0.769, 0.022],
       [0.124, 0.865, 0.011],
       [0.391, 0.556, 0.053],
       [0.12 , 0.869, 0.011],
       [0.266, 0.703, 0.031],
       [0.089, 0.904, 0.007],
       [0.265, 0.704, 0.031],
       [0.048, 0.948, 0.003],
       [0.108, 0.882, 0.009],
       [0.051, 0.945, 0.004],
       [0.039, 0.958, 0.003],
       [0.134, 0.854, 0.012],
       [0.167, 0.816, 0.017],
       [0.082, 0.912, 0.007],
       [0.401, 0.544, 0.055],
       [0.008, 0.001, 0.991],
       [0.009, 0.001, 0.99 ],
       [0.003, 0.   , 0.996],
       [0.

We recompute it with matrices

In [39]:
from tensorflow.keras.activations import softmax, relu
if complex:
    print(softmax((relu(X_scaled@model.get_weights()[0] + model.get_weights()[1]))@model.get_weights()[2] + model.get_weights()[3])[:4])
else:
    print(softmax((X_scaled@model.get_weights()[0] + model.get_weights()[1]))[:4])

tf.Tensor(
[[0.634 0.081 0.284]
 [0.594 0.301 0.105]
 [0.551 0.357 0.092]
 [0.644 0.229 0.128]], shape=(4, 3), dtype=float64)


### Classes

In [40]:
y_pred = np.argmax(Y_pred_proba, axis=-1)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2,
       2])

## Loss
We recompute the loss

For one observation

In [41]:
- Y_cat[0] @ np.log(Y_pred_proba[0]).T

0.4549896

For the dataset

In [42]:
-np.mean(np.log(Y_pred_proba[range(0, len(y)), y]))

0.35218993

## Evaluation

With Keras

In [43]:
model.evaluate(X_scaled, Y_cat)



[0.3521899878978729, 0.9333333373069763]

With sklearn

In [44]:
from sklearn.metrics import classification_report

print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.83      1.00      0.91        15
           1       1.00      1.00      1.00        15
           2       1.00      0.80      0.89        15

    accuracy                           0.93        45
   macro avg       0.94      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45



We computed the accuracy from the training set. This is not a good practice. We should use a dedicated test set instead.