In [1]:
import gym
import random
import numpy as np
from PIL import Image
import cv2
import keras
from keras.preprocessing.image import img_to_array
from keras.layers import Dense,Conv2D,Flatten
from keras.models import Sequential
from keras.optimizers import RMSprop
from collections import deque
import matplotlib.pyplot as plt
from keras.applications import imagenet_utils

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
class evaluationforBowling:
    def __init__(self,input_shape,action_size):
        
        
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0 
        self.learning_rate = 0.001
        self.input_size  =  input_shape
        self.action_size  =  action_size
        self.model  = self.architectureforModel()
        self.model_source =  self.sourceforModel()
        
        self.memory = deque(maxlen = 20)
        
        
        
    
    
    def sourceforModel(self):
        model = Sequential()
        model.add(Conv2D(32,
                              8,
                              strides=(4, 4),
                              padding="valid",
                              activation="relu",
                              input_shape=self.input_size,
                              data_format="channels_first"))
        model.add(Conv2D(64,
                              4,
                              strides=(2, 2),
                              padding="valid",
                              activation="relu",
                              input_shape=self.input_size,
                              data_format="channels_first"))
        model.add(Conv2D(64,
                              3,
                              strides=(1, 1),
                              padding="valid",
                              activation="relu",
                              input_shape=self.input_size,
                              data_format="channels_first"))
        model.add(Flatten())
        
        model.add(Dense(512, activation="relu"))
        model.add(Dense(self.action_size))
        model.compile(loss="mean_squared_error",
                           optimizer=RMSprop(lr=0.00025,
                                             rho=0.95,
                                             epsilon=0.01),
                           metrics=["accuracy"])
        
        print(model.summary())
        
        return model
        
        
    
    
    
    def architectureforModel(self):
        
        model = Sequential()
        model.add(Conv2D(32,
                              8,
                              strides=(4, 4),
                              padding="valid",
                              activation="relu",
                              input_shape=self.input_size,
                              data_format="channels_first"))
        model.add(Conv2D(64,
                              4,
                              strides=(2, 2),
                              padding="valid",
                              activation="relu",
                              input_shape=self.input_size,
                              data_format="channels_first"))
        model.add(Conv2D(64,
                              3,
                              strides=(1, 1),
                              padding="valid",
                              activation="relu",
                              input_shape=self.input_size,
                              data_format="channels_first"))
        model.add(Flatten())
        
        model.add(Dense(512, activation="relu"))
        model.add(Dense(self.action_size))
        model.compile(loss="mean_squared_error",
                           optimizer=RMSprop(lr=0.00025,
                                             rho=0.95,
                                             epsilon=0.01),
                           metrics=["accuracy"])
        
        
        return model
    
    
    
    def pick_action(self,state): #choosing the random action
        #if any random is less than epsilon then
        if random.random() < self.epsilon:
            return random.randrange(self.action_size)
        #
        return np.argmax(self.model.predict(state)[0])
    
    



In [3]:
def imageprocessing(image):
    processedImage  = img_to_array(image)
    
    
    processedImage  = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)   
    processedImage  = cv2.resize(processedImage,(84,84))
    processedImage  = np.expand_dims(processedImage,axis=0)
    processedImage  = np.expand_dims(processedImage,axis=0)
 
    
    
    return processedImage

In [4]:
def plottingValues(name):
    gettingDatafromFile = open("Bowling-v0.txt","r").read()
    iterationAppend = []
    rewardAppend = []
    myData  =  gettingDatafromFile.split("\n")
    for data in myData:
        try:
            valueSplitting  = data.split(",")
            valuereward  =  valueSplitting[1]
            valueiteration = valueSplitting[0]

            
            iterationAppend.append(float(valueiteration))
            rewardAppend.append(float(valuereward))
            
            
        except:
            pass 
    
    nameforImage="Bowling-v0.png"
    
    plt.plot(iterationAppend,rewardAppend)
    plt.xlabel("Episodes")
    plt.ylabel("Reward")
    plt.title("Reward and Episode Diagram")
    plt.savefig(nameforImage, bbox_inches="tight")
    plt.close()

In [5]:
env  = gym.make('Bowling-v0') #creating Bowling environment
action_size  = env.action_space.n 
state_size = (1,84,84) #


nn = evaluationforBowling(state_size,action_size)
episodes  =  1000
mbatchsize  =  32



for eps in range(episodes):
    observation  = env.reset()
   
    observation  =  imageprocessing(observation)
       
    done = False
    maxReward  = 0
    while not done:
        env.render()
        action =  nn.pick_action(observation)
        
        next_state, reward, done,_ =  env.step(action)
        
        next_state =  imageprocessing(next_state)     
        maxReward  += reward
        
        observation  =  next_state
        
    
    
    if eps%10 == 0:
        print(maxReward)
        
        openTxtFile  = open("Bowling-v0.txt",'a')
        openTxtFile.write(str(eps)+","+str(maxReward))
        openTxtFile.write("\n")
        plottingValues("Bowling-v0")
env.close()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 32, 20, 20)        2080      
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 64, 9, 9)          32832     
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 64, 7, 7)          36928     
_________________________________________________________________
flatten_2 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               1606144   
_________________________________________________________________
dense_4 (Dense)              (None, 6)                 3078      
Total params: 1,681,062
Trainable params: 1,681,062
Non-trainable params: 0
____________________________________________

KeyboardInterrupt: 