<a href="https://colab.research.google.com/github/rohitashwachaks/advanced_optimisation/blob/main/notebooks/topic1-NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST

In [3]:
import tensorflow as tf
import numpy as np


In [4]:
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

ndata_train = x_train.shape[0]
ndata_test = x_test.shape[0]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
x_train = x_train.reshape((ndata_train,28,28,1))
x_test = x_test.reshape((ndata_test,28,28,1))


xshape = x_train.shape[1:4]



In [6]:
NNmodel = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=xshape),
        tf.keras.layers.Dense(64,activation=tf.nn.relu,kernel_regularizer = tf.keras.regularizers.l1(0.0002)),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(64,activation=tf.nn.sigmoid),
        tf.keras.layers.Dense(10,activation=tf.nn.softmax)
        ])

In [7]:
NNmodel.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

NNmodel.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 64)                50240     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


In [8]:
NNmodel.fit(x_train,y_train,epochs=12,validation_split=0.2,batch_size=200)


Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f7ae0071250>

In [None]:
print('This model predicts '+str(NNmodel.evaluate(x_test,y_test)[1]*100) +'% of the test data correctly')

This model predicts 96.74000144004822% of the test data correctly


In [None]:
pred_probs = NNmodel.predict(x_test)
pred_probs.shape

(10000, 10)

In [None]:
np.sum(pred_probs[0,:])

1.0000001

In [None]:
pred_class = np.argmax(pred_probs, axis=1)
np.mean(pred_class==y_test)

0.9674

In [None]:
NNmodel2 = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=10,kernel_size=(5,5),activation=tf.nn.relu,input_shape=xshape),
        tf.keras.layers.MaxPooling2D(pool_size = (2,2),strides=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128,activation=tf.nn.relu,kernel_regularizer = tf.keras.regularizers.l1(0.0002)),
        tf.keras.layers.Dense(64,activation=tf.nn.softplus,kernel_regularizer = tf.keras.regularizers.l1(0.0005)),
        tf.keras.layers.Dense(10,activation=tf.nn.softmax)
        ])

In [None]:
NNmodel2 = tf.keras.models.Sequential()
NNmodel2.add(tf.keras.layers.Conv2D(filters=10,kernel_size=(5,5),activation=tf.nn.relu,input_shape=xshape))
NNmodel2.add(tf.keras.layers.MaxPooling2D(pool_size = (2,2),strides=2))
NNmodel2.add(tf.keras.layers.Flatten())
NNmodel2.add(tf.keras.layers.Dense(128,activation=tf.nn.relu,kernel_regularizer = tf.keras.regularizers.l1(0.0002)))
NNmodel2.add(tf.keras.layers.Dense(64,activation=tf.nn.softplus,kernel_regularizer = tf.keras.regularizers.l1(0.0005)))
NNmodel2.add(tf.keras.layers.Dense(10,activation=tf.nn.softmax))

In [None]:
NNmodel2.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

NNmodel2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 10)        260       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 10)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1440)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               184448    
_________________________________________________________________
dense_4 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_5 (Dense)              (None, 10)                650       
Total params: 193,614
Trainable params: 193,614
Non-trainable params: 0
________________________________________________

In [None]:
NNmodel2.fit(x_train,y_train,epochs=5,validation_split=0.2,batch_size=500)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff7293a3710>

In [None]:
NNmodel2.predict(x_test)

array([[6.5287873e-05, 7.5331895e-06, 6.3127524e-04, ..., 9.9758267e-01,
        3.0019635e-05, 8.2740368e-04],
       [2.2000216e-04, 8.0777390e-04, 9.9821234e-01, ..., 2.2396985e-07,
        4.5951057e-04, 5.8929968e-09],
       [2.1894729e-04, 9.8462325e-01, 1.9825413e-03, ..., 8.7869214e-03,
        1.6384514e-03, 1.6235378e-04],
       ...,
       [1.8284029e-06, 2.5975501e-06, 1.6124095e-06, ..., 3.9705160e-04,
        1.5841410e-03, 6.1630188e-03],
       [2.3194090e-05, 6.5072172e-06, 4.8997941e-07, ..., 8.5936273e-07,
        2.7071964e-02, 5.8631067e-05],
       [2.8942926e-03, 5.9954090e-07, 7.9331156e-03, ..., 3.1042458e-07,
        1.5429036e-04, 5.3218369e-06]], dtype=float32)

In [None]:
pred_probs2 = NNmodel2.predict(x_test)
pred2 = np.argmax(pred_probs2, axis=1)
print(np.mean(pred2==y_test))

0.9721


## Functional API
we will need this eventually when we do reinforcement learning

In [11]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Input

In [12]:
imp = Input(shape=xshape)
mid = Conv2D(16,(8,8),strides=4,activation='relu')(imp)
mid = Conv2D(32,(4,4),strides=2,activation='relu')(mid)
mid = Flatten()(mid)
mid = Dense(256,activation='relu')(mid)
out0 = Dense(10,activation='softmax')(mid)
model = Model(imp,out0)
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 6, 6, 16)          1040      
                                                                 
 conv2d_1 (Conv2D)           (None, 2, 2, 32)          8224      
                                                                 
 flatten_1 (Flatten)         (None, 128)               0         
                                                                 
 dense_3 (Dense)             (None, 256)               33024     
                                                                 
 dense_4 (Dense)             (None, 10)                2570      
                                                                 
Total params: 44,858
Trainable params: 44,858
Non-trainable p