# Classification with Keras

__Individual assignment__

Author of the assignment: Pierre Nugues

__Student name__:

## Imports
Imports you may use

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

### The dataset
You will use the arrays below:
1. `X` contains the counts of letters and of _A_ s as well as a column of ones for the intercept;
2. `y` contains the classes, where 0 is for English and 1 for French.

In [2]:
X = np.array([[1.0, 35680.0, 2217.0],
              [1.0, 42514.0, 2761.0],
              [1.0, 15162.0, 990.0],
              [1.0, 35298.0, 2274.0],
              [1.0, 29800.0, 1865.0],
              [1.0, 40255.0, 2606.0],
              [1.0, 74532.0, 4805.0],
              [1.0, 37464.0, 2396.0],
              [1.0, 31030.0, 1993.0],
              [1.0, 24843.0, 1627.0],
              [1.0, 36172.0, 2375.0],
              [1.0, 39552.0, 2560.0],
              [1.0, 72545.0, 4597.0],
              [1.0, 75352.0, 4871.0],
              [1.0, 18031.0, 1119.0],
              [1.0, 36961.0, 2503.0],
              [1.0, 43621.0, 2992.0],
              [1.0, 15694.0, 1042.0],
              [1.0, 36231.0, 2487.0],
              [1.0, 29945.0, 2014.0],
              [1.0, 40588.0, 2805.0],
              [1.0, 75255.0, 5062.0],
              [1.0, 37709.0, 2643.0],
              [1.0, 30899.0, 2126.0],
              [1.0, 25486.0, 1784.0],
              [1.0, 37497.0, 2641.0],
              [1.0, 40398.0, 2766.0],
              [1.0, 74105.0, 5047.0],
              [1.0, 76725.0, 5312.0],
              [1.0, 18317.0, 1215.0]])
y = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])

We remove the intercept as it is automatically added by Keras

In [3]:
X = X[:, 1:]

### Keras
Using the dataset of English and French datapoints, we apply logistic regression with Keras. We need the `Sequential`, `Dense`, and `Input` classes.

In [4]:
import os

# We use the torch backend.
os.environ['KERAS_BACKEND'] = 'torch'

In [5]:
from keras import Sequential
from keras.layers import Dense, Input

In [6]:
# Write your code here
model = Sequential()
model.add(Input(shape=(2,)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adamw',
              metrics=['accuracy'])
model.summary()

We normalize and standardize the dataset with sklearn functions

In [7]:
from sklearn.preprocessing import StandardScaler, Normalizer

In [8]:
normalizer = Normalizer()
scaler = StandardScaler()

X_n = normalizer.fit_transform(X)
X_ns = scaler.fit_transform(X_n)

We fit the model

In [9]:
X_ns

array([[ 1.68336574, -1.7197772 ],
       [ 0.57376529, -0.56145427],
       [ 0.43143908, -0.41648279],
       [ 0.78308579, -0.77610221],
       [ 1.50946833, -1.53481011],
       [ 0.6568289 , -0.64642544],
       [ 0.76463437, -0.75711155],
       [ 0.96998225, -0.96923123],
       [ 0.8609914 , -0.85643468],
       [ 0.35155679, -0.33545672],
       [ 0.28337412, -0.26648958],
       [ 0.66181241, -0.651532  ],
       [ 1.20247198, -1.21146528],
       [ 0.69465297, -0.68520787],
       [ 1.71267534, -1.7510841 ],
       [-0.57120787,  0.58348129],
       [-0.9400133 ,  0.94240556],
       [-0.01885242,  0.03712259],
       [-0.9622089 ,  0.96386122],
       [-0.37684158,  0.39245281],
       [-1.1617264 ,  1.15600177],
       [-0.38018445,  0.3957494 ],
       [-1.58557142,  1.55993407],
       [-1.03135987,  1.03060283],
       [-1.54634576,  1.52278881],
       [-1.73523572,  1.70122386],
       [-0.88802847,  0.89208989],
       [-0.73413296,  0.74260709],
       [-1.21552422,

In [10]:
model.fit(X_ns, y, epochs=300, batch_size=4, verbose=0)

<keras.src.callbacks.history.History at 0x33cab05f0>

In [11]:
y

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [12]:
model.predict(X_ns)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


array([[0.01205564],
       [0.18943426],
       [0.2536109 ],
       [0.11858784],
       [0.01909316],
       [0.1580839 ],
       [0.12378004],
       [0.07580756],
       [0.09868404],
       [0.29529217],
       [0.3337819 ],
       [0.15634267],
       [0.04234033],
       [0.14525308],
       [0.01115159],
       [0.82210183],
       [0.9227091 ],
       [0.5245655 ],
       [0.9266734 ],
       [0.7365589 ],
       [0.954679  ],
       [0.73823506],
       [0.9841367 ],
       [0.93784434],
       [0.9825032 ],
       [0.9890943 ],
       [0.9126398 ],
       [0.87551475],
       [0.96026963],
       [0.50765884]], dtype=float32)

In [13]:
(model.predict(X_ns) > 0.5).astype('int32')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1]], dtype=int32)

If you do not obtain a completely correct classification, you can rerun the training with more epochs. (Note that this will not have consequences on the approval of your notebook)