In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numba import cuda, jit, float32
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import string

# Data Preparation

In [2]:
df = pd.read_csv('../input/handwritten-az/handwritten_data_785.csv')

In [3]:
df.shape

(372037, 785)

In [4]:
features = df.values[:,1:] 
labels = df.values[:,0]


nr_to_letter = {k:v.upper() for k,v in enumerate(list(string.ascii_lowercase))}

In [5]:
features.shape

(372037, 784)

In [6]:
nr_to_letter

{0: 'A',
 1: 'B',
 2: 'C',
 3: 'D',
 4: 'E',
 5: 'F',
 6: 'G',
 7: 'H',
 8: 'I',
 9: 'J',
 10: 'K',
 11: 'L',
 12: 'M',
 13: 'N',
 14: 'O',
 15: 'P',
 16: 'Q',
 17: 'R',
 18: 'S',
 19: 'T',
 20: 'U',
 21: 'V',
 22: 'W',
 23: 'X',
 24: 'Y',
 25: 'Z'}

In [7]:
28*28

784

In [8]:
sample = features[0]
print(f'shape of sample {sample.shape}')

shape of sample (784,)


## Building the model

In [9]:
import numpy as np
from random import sample
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [10]:
len(features) == len(labels)

True

In [11]:
# this is the whole data (training + testing)

X = features
Y = labels

test_item_indices = sample(range(len(X)), 100)

mask = np.ones(len(X), dtype=bool)
mask[test_item_indices] = False

X_train = X[mask]
X_test = X[~mask]

Y_train = Y[mask]
Y_test = Y[~mask]

# X_train = X
# Y_train = Y

print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print()
print(f'Y_train shape: {Y_train.shape}')
print(f'Y_test shape: {Y_test.shape}')

X_train shape: (371937, 784)
X_test shape: (100, 784)

Y_train shape: (371937,)
Y_test shape: (100,)


### Normalizing the data

In [12]:
print(f"pixel 290 (random)   Max, Min pre normalization: {np.max(X_train[:,290]):0.2f}, {np.min(X[:,290]):0.2f}")
norm_l = Normalization(axis=-1)
norm_l.adapt(X)    # learns mean and variance
Xn = norm_l(X_train)
print(f"pixel 290 (random)    Max, Min post normalization: {np.max(Xn[:,290]):0.2f}, {np.min(Xn[:,290]):0.2f}")

pixel 290 (random)   Max, Min pre normalization: 255.00, 0.00
pixel 290 (random)    Max, Min post normalization: 1.21, -1.12


In [13]:
tf.random.set_seed(4321)
model = Sequential([
    tf.keras.Input(shape=(784,)),
    Dense(5, activation='sigmoid', name='L1'),
    Dense(4, activation='sigmoid', name='L2'),
    Dense(3, activation='softmax', name='L3')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
L1 (Dense)                   (None, 5)                 3925      
_________________________________________________________________
L2 (Dense)                   (None, 4)                 24        
_________________________________________________________________
L3 (Dense)                   (None, 3)                 15        
Total params: 3,964
Trainable params: 3,964
Non-trainable params: 0
_________________________________________________________________


In [14]:
model.get_layer('L2').get_weights()

[array([[ 0.06170315,  0.3203423 , -0.03428686, -0.668941  ],
        [ 0.5051141 , -0.41580436, -0.07314456,  0.6835302 ],
        [-0.6298079 ,  0.5417683 , -0.4720885 , -0.08925974],
        [ 0.13273102,  0.68023837,  0.7594863 ,  0.23648345],
        [ 0.15924227, -0.5777004 , -0.09970623, -0.79421216]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32)]

In [15]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
)

In [16]:
model.fit(
    Xn, Y_train,
    epochs=1
)



<tensorflow.python.keras.callbacks.History at 0x7c01180bf890>

In [17]:
model.get_layer('L2').get_weights()

[array([[nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan]], dtype=float32),
 array([nan, nan, nan, nan], dtype=float32)]