In [2]:
import numpy as np
import matplotlib.pyplot as plt
from keras import models, layers, optimizers, datasets, utils, losses

(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

x_train = x_train.reshape(60000, 28, 28, 1)/255
# x_train = x_train[:1024,:,:,:]
x_test = x_test.reshape(10000, 28, 28, 1)/255
# x_test = x_test[:501,:,:,:]
y_train = utils.to_categorical(y_train, 10)
y_test = utils.to_categorical(y_test, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


CNN Model

In [27]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(lr=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [28]:
 model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test), verbose=1)
		# evaluate model

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f475c5c32b0>

In [29]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.043854448944330215
Test accuracy: 0.9871000051498413


Freezing all Layers

In [30]:
# freezing 
for layer in model.layers:
    layer.trainable = False

In [None]:
RL Model

In [31]:
numInput = model.output
h = layers.Dense(20, activation='relu')(numInput)
outputs = layers.Dense(2, activation='linear')(h)

model_rl = models.Model(inputs=model.inputs, outputs=[outputs,numInput])
RMSprop = optimizers.RMSprop(lr=0.01)
model_rl.compile(loss='mse', optimizer=RMSprop)

In [32]:
import numpy as np
import matplotlib.pyplot as plt
from keras import models, layers, optimizers

def tau(trial,s,a):
    numOut= model_rl.predict(s)
    argM = np.argmax(numOut[1])
    hotEncoding= utils.to_categorical(argM, 10)
    if (hotEncoding[0] and hotEncoding[9]) == 0 : s=x_train[trial+a,:].reshape(1,28, 28, 1)
    return s

def rho(s):
    numOut= model_rl.predict(s)
    argM = np.argmax(numOut[1])
    hotEncoding= utils.to_categorical(argM, 10)
    return ((hotEncoding[0]==1)+2*(hotEncoding[9]==1))

def terminal_state(s):
  numOut= model_rl.predict(s)
  argM = np.argmax(numOut[1])
  hotEncoding= utils.to_categorical(argM, 10)
  return (hotEncoding[0]==1 or hotEncoding[9]==1)    

gamma=0.8
invT = 0.8

In [35]:
for trial in range(1000):
    s = x_train[trial,:].reshape(1,28, 28, 1)
    #for t in range(0,10):
    if terminal_state(s): 
      break
    if trial > 1000 and invT > 0.1: invT -= 0.001
    prediction=model_rl.predict(s, steps=1, verbose=0)[0]
    aidx=np.argmax(prediction)
    if np.random.rand() < invT : aidx=1-aidx
    a=2*aidx-1
    next_s = tau(trial,s,a)
    if terminal_state(next_s): 
        y = rho(next_s)
    else:
        y = gamma*np.max(model_rl.predict(next_s, steps=1, verbose=0)[0])
    prediction[0,aidx]=y
    next_numOut= model_rl.predict(next_s, steps=1, verbose=0)[1]
    next_argM = np.argmax(next_numOut)
    next_hotEncoding= utils.to_categorical(next_argM, 10)
    predictionList = []
    predictionList.append(prediction.reshape(1,2))
    predictionList.append(next_hotEncoding.reshape(1,10))
    model_rl.fit(s, predictionList, epochs=1, verbose=0)
    s = np.copy(next_s)

In [36]:
policy = np.zeros(101)
Q=[]
for i in range(0,101):
  s = x_test[i,:].reshape(1,28, 28, 1)
  Qs=model_rl.predict(s, steps=1)[0]
  Q.append(Qs)
  aidx=np.argmax(Qs)
  policy[i]=2*aidx-1
    # s = np.roll(s,1)
print(np.transpose(Q))
print('policy:',np.transpose(policy))

[[[-0.22287118 -0.09293188  0.3947956  -0.20899694 -0.22633582
    0.39479765 -0.22633016 -0.09460001  0.02792939 -0.09460045
   -0.20899698  0.00626943 -0.09460019 -0.20899698  0.3948032
    0.02768115 -0.09460022 -0.22287118  0.15697539 -0.22633594
   -0.09460729  0.00626355  0.00626942  0.02768286 -0.22633457
   -0.20899698 -0.2228716  -0.22633594 -0.20899698  0.3947934
    0.16318549  0.3945507   0.16318549 -0.22636156 -0.22287117
   -0.09293188 -0.22285768  0.39479998 -0.09293188  0.3948018
    0.39479783 -0.22287111 -0.2263346  -0.09292728  0.16318135
    0.02768286  0.39478922 -0.09293188 -0.22633547 -0.22633594
    0.00626937  0.16318549  0.02768284  0.02768259  0.00626943
   -0.20899698 -0.22633594  0.39479917 -0.09460021  0.02758252
   -0.22287092  0.19598365 -0.09592002  0.162491   -0.22287117
   -0.2263275   0.00626922 -0.2263357   0.16318549 -0.20899698
   -0.22287121 -0.20899698 -0.09293185 -0.09462048  0.3948014
   -0.22287117  0.16318549 -0.09293006 -0.09458826 -0.22287

Freezing first 3 layers

In [43]:
for i, layer in enumerate(model.layers):
   print(i, layer.name)

for layer in model.layers[:4]:
   layer.trainable = False
for layer in model.layers[4:]:
   layer.trainable = True

0 conv2d_4
1 max_pooling2d_4
2 flatten_4
3 dense_10
4 dense_11


In [44]:
numInput = model.output
h = layers.Dense(20, activation='relu')(numInput)
outputs = layers.Dense(2, activation='linear')(h)

model_rl = models.Model(inputs=model.inputs, outputs=[outputs,numInput])
RMSprop = optimizers.RMSprop(lr=0.01)
model_rl.compile(loss='mse', optimizer=RMSprop)

In [45]:
import numpy as np
import matplotlib.pyplot as plt
from keras import models, layers, optimizers

def tau(trial,s,a):
    numOut= model_rl.predict(s)
    argM = np.argmax(numOut[1])
    hotEncoding= utils.to_categorical(argM, 10)
    if (hotEncoding[0] and hotEncoding[9]) == 0 : s=x_train[trial+a,:].reshape(1,28, 28, 1)
    return s

def rho(s):
    numOut= model_rl.predict(s)
    argM = np.argmax(numOut[1])
    hotEncoding= utils.to_categorical(argM, 10)
    return ((hotEncoding[0]==1)+2*(hotEncoding[9]==1))

def terminal_state(s):
  numOut= model_rl.predict(s)
  argM = np.argmax(numOut[1])
  hotEncoding= utils.to_categorical(argM, 10)
  return (hotEncoding[0]==1 or hotEncoding[9]==1)    

gamma=0.8
invT = 0.8

In [46]:
for trial in range(400):
    s = x_train[trial,:].reshape(1,28, 28, 1)
   
    if terminal_state(s): 
      break
    if trial > 1000 and invT > 0.1: invT -= 0.001
    prediction=model_rl.predict(s, steps=1, verbose=0)[0]
    aidx=np.argmax(prediction)
    if np.random.rand() < invT : aidx=1-aidx
    a=2*aidx-1
    next_s = tau(trial,s,a)
    if terminal_state(next_s): 
        y = rho(next_s)
    else:
        y = gamma*np.max(model_rl.predict(next_s, steps=1, verbose=0)[0])
        trial= trial+a
    prediction[0,aidx]=y
    next_numOut= model_rl.predict(next_s, steps=1, verbose=0)[1]
    next_argM = np.argmax(next_numOut)
    next_hotEncoding= utils.to_categorical(next_argM, 10)
    predictionList = []
    predictionList.append(prediction.reshape(1,2))
    predictionList.append(next_hotEncoding.reshape(1,10))
    model_rl.fit(s, predictionList, epochs=1, verbose=0)
    s = np.copy(next_s)

In [42]:
policy = np.zeros(101)
Q=[]
for i in range(0,101):
  s = x_test[i,:].reshape(1,28, 28, 1)
  Qs=model_rl.predict(s, steps=1)[0]
  Q.append(Qs)
  aidx=np.argmax(Qs)
  policy[i]=2*aidx-1
    # s = np.roll(s,1)
print(np.transpose(Q))
print('policy:',np.transpose(policy))

[[[ 0.11923891  0.12339538  0.140427   -0.02715863  0.09707148
    0.14042516  0.09706937 -0.02398691  0.255263   -0.02398761
   -0.02715859  0.32909346 -0.02398789 -0.02715861  0.14042673
    0.25502348 -0.0239878   0.11923891  0.01547405  0.09707162
   -0.02398371  0.32909125  0.32909343  0.2550246   0.09706973
   -0.02715859  0.1192386   0.09707162 -0.02715859  0.14042811
    0.03407424  0.14040288  0.03407425  0.09699713  0.11923891
    0.12339538  0.11924632  0.14042662  0.12339541  0.14042692
    0.14042604  0.11923902  0.09707001  0.12339769  0.03407716
    0.2550246   0.14042854  0.12339538  0.09707104  0.09707162
    0.3290935   0.03407424  0.25502458  0.25502443  0.32909346
   -0.02715859  0.09707162  0.14042588 -0.0239879   0.2550424
    0.11923966  0.27830437 -0.02134241  0.03365729  0.11923891
    0.0970615   0.32909325  0.09707136  0.03407424 -0.02715859
    0.1192389  -0.02715859  0.1233954  -0.02396581  0.14042695
    0.11923891  0.03407424  0.12339613 -0.02392989  0.11

In [None]:
Part 2


In [6]:
inputs = layers.Input(shape=(28, 28, 1,))
x=layers.Conv2D(32, kernel_size=(3, 3),activation='relu')(inputs)
x=layers.Conv2D(64, (3, 3), activation='relu')(x)
x=layers.MaxPooling2D(pool_size=(2, 2))(x)
x=layers.Dropout(0.25)(x)
x=layers.Flatten()(x)
x=layers.Dense(128, activation='relu')(x)
x=layers.Dropout(0.5)(x)
numOut=layers.Dense(10, activation='softmax')(x)
h = layers.Dense(20, activation='relu')(numOut)
outputs = layers.Dense(2, activation='linear')(h)

model_RL = models.Model(inputs=inputs, outputs=[outputs,numOut])
RMSprop = optimizers.RMSprop(lr=0.01)
model_RL.compile(loss='mse', optimizer=RMSprop)

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from keras import models, layers, optimizers

def tau(trial,s,a):
    numOut= model_RL.predict(s)
    argM = np.argmax(numOut[1])
    hotEncoding= utils.to_categorical(argM, 10)
    if (hotEncoding[0] and hotEncoding[9]) == 0 : s=x_train[trial+a,:].reshape(1,28, 28, 1)
    return s

def rho(s):
    numOut= model_rl.predict(s)
    argM = np.argmax(numOut[1])
    hotEncoding= utils.to_categorical(argM, 10)
    return ((hotEncoding[0]==1)+2*(hotEncoding[9]==1))

def terminal_state(s):
  numOut= model_RL.predict(s)
  argM = np.argmax(numOut[1])
  hotEncoding= utils.to_categorical(argM, 10)
  return (hotEncoding[0]==1 or hotEncoding[9]==1)    

gamma=0.8
invT = 0.8

In [17]:
for trial in range(400):
    s = x_train[trial,:].reshape(1,28, 28, 1)
    if terminal_state(s): 
      break
    if trial > 1000 and invT > 0.1: invT -= 0.001
    prediction=model_RL.predict(s, steps=1, verbose=0)[0]
    aidx=np.argmax(prediction)
    if np.random.rand() < invT : aidx=1-aidx
    a=2*aidx-1
    next_s = tau(trial,s,a)
    if terminal_state(next_s): 
        y = rho(next_s)
    else:
        y = gamma*np.max(model_RL.predict(next_s, steps=1, verbose=0)[0])
    prediction[0,aidx]=y
    next_numOut= model_RL.predict(next_s, steps=1, verbose=0)[1]
    next_argM = np.argmax(next_numOut)
    next_hotEncoding= utils.to_categorical(next_argM, 10)
    predictionList = []
    predictionList.append(prediction.reshape(1,2))
    predictionList.append(next_hotEncoding.reshape(1,10))
    model_RL.fit(s, predictionList, epochs=1, verbose=0)
    s = np.copy(next_s)

In [19]:
policy = np.zeros(101)
Q=[]
for i in range(0,101):
  s = x_test[i,:].reshape(1,28, 28, 1)
  Qs=model_RL.predict(s, steps=1)[0]
  Q.append(Qs)
  aidx=np.argmax(Qs)
  policy[i]=2*aidx-1
    # s = np.roll(s,1)
print(np.transpose(Q))
print('policy:',np.transpose(policy))

[[[0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.04073026 0.04073026 0.04073026 0.04073026
   0.04073026 0.04073026 0.0407302