In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras import models, layers, optimizers
import tensorflow
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import scipy.ndimage
from keras import backend as K

In [2]:
# Configuration options
num_classes = 10

# Load the data
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

print(X_train.shape)
print(Y_train.shape)

feature_vector_length = X_train.shape[1]*X_train.shape[2]
print(feature_vector_length)

# Reshape the data - MLPs do not understand such things as '2D'.
# Reshape to 28 x 28 pixels = 784 features
X_train = X_train.reshape(X_train.shape[0], feature_vector_length)
X_test = X_test.reshape(X_test.shape[0], feature_vector_length)
print(X_train.shape)
print(X_test.shape)

# Convert into greyscale
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255 - 0.5
X_test /= 255 - 0.5

# Convert target classes to categorical ones
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)

# Set the input shape
input_shape = (feature_vector_length,)
print(f'Feature shape: {input_shape}')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28)
(60000,)
784
(60000, 784)
(10000, 784)
Feature shape: (784,)


In [3]:
# Create the model
mnist_model = Sequential()
mnist_model.add(Dense(392, input_shape=input_shape, activation='relu'))
mnist_model.add(Dropout(0.2))
mnist_model.add(Dense(196, activation='relu'))
mnist_model.add(Dropout(0.5))
mnist_model.add(Dense(num_classes, activation='softmax'))

# Configure the model and start training
mnist_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
mnist_history = mnist_model.fit(X_train, Y_train, epochs=10, batch_size=250, verbose=1, validation_data=(X_test, Y_test))

# Test the model after training
test_results = mnist_model.evaluate(X_test, Y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test results - Loss: 0.06800847500562668 - Accuracy: 0.9815999865531921%


In [37]:
def tau(s,a):
    if (s[0] and s[9]) == 0 : s=np.roll(s,a)
    return s

def rho(s):
    return ((s[0]==1)+2*(s[9]==1))    

def terminal_state(s):
    return (s[0]==1 or s[9]==1)    

def getNextStateImage(next_s):
  for image in X_test:
    pred = mnist_model.predict(image.reshape(1,784))
    # plt.imshow(image.reshape(28,28), cmap='gray')
    # plt.show()
    if pred.argmax() == next_s.argmax():
      # plt.imshow(image.reshape(28,28), cmap='gray')
      # plt.show()
      return image

gamma=0.5
invT = 1

In [4]:
# Create the model
combined_model = Sequential()
combined_model.add(Dense(392, input_shape=input_shape, activation='relu'))
combined_model.add(Dropout(0.2))
combined_model.add(Dense(196, activation='relu'))
combined_model.add(Dropout(0.5))
combined_model.add(Dense(num_classes, activation='softmax'))
combined_model.add(Dense(10, activation='relu'))
combined_model.add(Dense(2, activation='linear'))
RMSprop = optimizers.RMSprop(lr=0.01)
combined_model.compile(loss='mse', optimizer=RMSprop)

In [10]:
combined_model.predict(X_train[0].reshape(1,784))

array([[0.05006948, 0.15662144]], dtype=float32)

In [30]:
mnist_model.predict(X_train[0].reshape(1,784)).argmax()

5

In [32]:
to_categorical(mnist_model.predict(X_train[0].reshape(1,784)).argmax(), num_classes=10).astype(int)

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

In [None]:
for trial in range(400):
    s = X_train[0] #np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
    for t in range(0,10):
        print("for trail:",trial,"-",t)
        # prediction=mnist_model.predict(s.reshape(1,784), steps=1, verbose=0)
        # layer_output = get_4th_layer_output(s.reshape(1,784))[0]
        # indices = [layer_output.argmax()]
        # depth = 10
        state = to_categorical(mnist_model.predict(s.reshape(1,784)).argmax(), num_classes=10).astype(int)#tensorflow.one_hot(indices, depth).numpy()
        if terminal_state(state): break
        if trial > 30 and invT > 0.1: invT -= 0.001
        prediction=combined_model.predict(s.reshape(1,784), steps=1, verbose=0)
        aidx=np.argmax(prediction)
        if np.random.rand() < invT : aidx=1-aidx
        a=2*aidx-1
        next_state = tau(state,a)
        if terminal_state(next_state): 
            y = rho(next_state)
            next_s = getNextStateImage(next_state)
        else:
            next_s = getNextStateImage(next_state)
            y = gamma*np.max(combined_model.predict(next_s.reshape(1,784), steps=1, verbose=0))
        prediction[0,aidx]=y
        combined_model.fit(s.reshape(1,784), prediction, epochs=1, verbose=0)
        s = np.copy(next_s)

In [46]:
policy = np.zeros(10)
Q=[]
s = getNextStateImage(np.array([1,0,0,0,0,0,0,0,0,0]))
for i in range(0,10):
    Qs=combined_model.predict(s.reshape(1,784), steps=1)
    Q.append(Qs)
    aidx=np.argmax(Qs)
    policy[i]=2*aidx-1
    policy[0]=policy[9]=0
    state = to_categorical(mnist_model.predict(s.reshape(1,784)).argmax(), num_classes=10).astype(int)
    state = np.roll(state,1)
    s = getNextStateImage(state)
print(np.transpose(Q))
print('policy:',np.transpose(policy))

[[[0.3333138 0.3333138 0.3333138 0.3333138 0.3333138 0.3333138 0.3333138
   0.3333138 0.3333138 0.3333138]]

 [[0.6931435 0.6931435 0.6931435 0.6931435 0.6931435 0.6931435 0.6931435
   0.6931435 0.6931435 0.6931435]]]
policy: [0. 1. 1. 1. 1. 1. 1. 1. 1. 0.]
