In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numba import cuda, jit, float32
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import string

# Data Preparation

In [2]:
df = pd.read_csv('../input/handwritten-az/handwritten_data_785.csv')

In [3]:
df.shape

(372037, 785)

In [4]:
features = df.values[:,1:] 
labels = df.values[:,0]


nr_to_letter = {k:v.upper() for k,v in enumerate(list(string.ascii_lowercase))}

In [5]:
# let's carbage collect the df data to release memory
import gc

del df

gc.collect()

60

In [6]:
features.shape

(372037, 784)

In [7]:
nr_to_letter

{0: 'A',
 1: 'B',
 2: 'C',
 3: 'D',
 4: 'E',
 5: 'F',
 6: 'G',
 7: 'H',
 8: 'I',
 9: 'J',
 10: 'K',
 11: 'L',
 12: 'M',
 13: 'N',
 14: 'O',
 15: 'P',
 16: 'Q',
 17: 'R',
 18: 'S',
 19: 'T',
 20: 'U',
 21: 'V',
 22: 'W',
 23: 'X',
 24: 'Y',
 25: 'Z'}

In [8]:
28*28

784

In [9]:
sample = features[0]
print(f'shape of sample {sample.shape}')

shape of sample (784,)


### Let's get the data only for A and B (we're doing binary classification)

In [10]:
labels.shape[0]/25

14881.48

In [11]:
# index of the first occurance of A
np.where(labels == 0)[0][0]

0

In [12]:
# index of the first occurance of B
np.where(labels == 1)[0][0]

14780

In [13]:
# index of the first occurance of C
np.where(labels == 2)[0][0]

23757

In [14]:
features = features[:23757,]

In [15]:
features.shape

(23757, 784)

In [16]:
labels = labels[:23757,]

In [17]:
labels.shape

(23757,)

In [18]:
labels

array([0, 0, 0, ..., 1, 1, 1])

## Building the model

In [19]:
import numpy as np
from random import sample
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [20]:
len(features) == len(labels)

True

In [21]:
# this is the whole data (training + testing)

X = features
Y = labels

test_item_indices = sample(range(len(X)), 100)

mask = np.ones(len(X), dtype=bool)
mask[test_item_indices] = False

X_train = X[mask]
X_test = X[~mask]

Y_train = Y[mask]
Y_test = Y[~mask]

# X_train = X
# Y_train = Y

print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print()
print(f'Y_train shape: {Y_train.shape}')
print(f'Y_test shape: {Y_test.shape}')

X_train shape: (23657, 784)
X_test shape: (100, 784)

Y_train shape: (23657,)
Y_test shape: (100,)


### Normalizing the data

In [22]:
# FOR SOME REASON, NORMALIZING THE DATA REPLACES MOST OF THE DATA POINTS WITH 'nan' SO IT'S COMMENTED TEMPORARILY.

# print(f"pixel 290 (random)   Max, Min pre normalization: {np.max(X_train[:,290]):0.2f}, {np.min(X[:,290]):0.2f}")
# norm_l = Normalization(axis=-1)
# norm_l.adapt(X_train)    # learns mean and variance
# Xn = norm_l(X_train)
# print(f"pixel 290 (random)    Max, Min post normalization: {np.max(Xn[:,290]):0.2f}, {np.min(Xn[:,290]):0.2f}")

In [23]:
# let's tile the data to make copies of it.
Xt = np.tile(X_train, (40, 1))
Yt = np.tile(Y_train, (40, 1))

print(Xt.shape)
print(Yt.shape)

(946280, 784)
(40, 23657)


In [24]:
tf.random.set_seed(4321)
model = Sequential([
    tf.keras.Input(shape=(784,)),
    Dense(5, activation='sigmoid', name='L1'),
    Dense(3, activation='sigmoid', name='L2'),
    Dense(1, activation='sigmoid', name='L3')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
L1 (Dense)                   (None, 5)                 3925      
_________________________________________________________________
L2 (Dense)                   (None, 3)                 18        
_________________________________________________________________
L3 (Dense)                   (None, 1)                 4         
Total params: 3,947
Trainable params: 3,947
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.get_layer('L2').get_weights()

[array([[ 0.06544602,  0.3397743 , -0.0363667 ],
        [-0.709519  ,  0.5357544 , -0.44102708],
        [-0.07758152,  0.7249933 , -0.66801214],
        [ 0.5746321 , -0.50072545, -0.09467423],
        [ 0.14078254,  0.72150177,  0.80555683]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [26]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
)

In [27]:
model.fit(
    X_train, Y_train,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7896bc45b150>

In [28]:
model.get_layer('L2').get_weights()

[array([[-0.26583132,  0.32866943, -0.31083363],
        [-1.3758298 ,  0.9005257 , -1.0420251 ],
        [-0.13899443,  0.7866308 , -0.728551  ],
        [ 5.0527496 , -4.855959  ,  3.8208673 ],
        [ 0.08080715,  0.6617811 ,  0.74554193]], dtype=float32),
 array([-0.66306895,  0.358897  , -0.5970942 ], dtype=float32)]

In [29]:
model.evaluate(X_test, Y_test)



0.10034599155187607

In [30]:
predictions = model.predict(X_test)

In [31]:
predictions[:10]

array([[0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842],
       [0.02878842]], dtype=float32)

In [32]:
yhat = (predictions >= 0.5).astype(int)

In [33]:

yhat

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
    

In [34]:
# probability of the first element being a 'B' (but we know it's 'A')
model.predict(X_train[0].reshape(1, 1, len(X_train[0])))

array([[[0.02878842]]], dtype=float32)