
Implementation Details
1. Define the Cartpole Environment
2. Define HyperParameters
3. Define Neural Network'
   1. 3 Layers Neural Network in which the input of the cartpole environment
   2. Output is the softmax layer of the action size 
   3. Adam Optimizer
   4. Loss Function- reduce_mean(discounted_reward of the episode*neg_loss_with_logits)
   5. Loop through the episodes
       1. Pass through the neural network and get probability ditribution.
       2. Take action depending upon the probability distribution
       3. Store all the states, rewards and actions untill the episode is done depending upon the actions given by the network.
       4. when done calculate the discounted rewards and pass the states passed by the neural network to train the network.
   6. This way the whole policy update is done as after each episode the policy is updated

## Defining the Cartpole Environment

In [1]:
import gym
import numpy as np

In [2]:
env = gym.make("CartPole-v1")
env = env.unwrapped
env.seed(1)

[1]

## Hyperparameters

In [3]:
gamma=0.95   ##Discount factor
episodes=1000  ##Episodes
actions=env.action_space.n  ##Number of actions
state_size=4  ##Number of states
learning_rate=0.01

## Neural Network

In [4]:
import tensorflow as tf

In [5]:
tf.compat.v1.disable_eager_execution()
input_state=tf.compat.v1.placeholder(tf.float32, shape=[None, state_size],name="input_state")
action_space=tf.compat.v1.placeholder(tf.int32, shape=[None, actions],name="action_space")
discounted_reward=tf.compat.v1.placeholder(tf.float32, shape=[None,],name="discounted_reward")
fc1=tf.keras.layers.Dense(10,activation='relu',name="fc1")(input_state)
fc2=tf.keras.layers.Dense(actions,activation='relu',name="fc2")(fc1)
fc3=tf.keras.layers.Dense(actions,name="fc3")(fc2)
action_output=tf.keras.layers.Softmax(name="action_output")(fc3)
#(actions,activation='softmax',name="action_output")(fc3)
neg_loss_prob=tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(logits = fc3, labels = action_space)
loss=tf.math.reduce_mean(discounted_reward*neg_loss_prob)
training=tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


## Building Gameplay Bot for Cartpole

In [6]:
def discounted_rewards(rewards):
    cummulative_reward=0.0
    discounted_episode_rewards = np.zeros_like(rewards)
    for i in reversed(range(len(rewards))):
        cummulative_reward = cummulative_reward * gamma + rewards[i]
        discounted_episode_rewards[i] = cummulative_reward
    mean = np.mean(discounted_episode_rewards)
    std = np.std(discounted_episode_rewards)
    discounted_episode_rewards = (discounted_episode_rewards - mean) / (std)
    return discounted_episode_rewards

In [7]:
allRewards = []
episode_states=[]
episode_rewards=[]
episode_actions=[]
total_rewards = 0
maximumRewardRecorded = 0
saver = tf.compat.v1.train.Saver()
with tf.compat.v1.Session() as sess:
    sess.run(tf.compat.v1.global_variables_initializer())
    for each in range(episodes):
        episode_rewards_sum = 0
        state = env.reset()
        env.render()
        while True:
            action_probability=sess.run(action_output,feed_dict={input_state:state.reshape([1,4])})
            action = np.random.choice(range(action_probability.shape[1]), p=action_probability.ravel())
            new_state, reward, done, info=env.step(action)
            episode_states.append(state)
            episode_rewards.append(reward)
            action_ = np.zeros(actions)
            action_[action] = 1
            episode_actions.append(action_)
            if done:
                episode_rewards_sum = np.sum(episode_rewards)
                allRewards.append(episode_rewards_sum)
                total_rewards = np.sum(allRewards)
                maximumRewardRecorded = np.amax(allRewards)
                print("==========================================")
                print("Episode: ", each+1)
                print("Reward: ", episode_rewards_sum)
                print("Max reward so far: ", maximumRewardRecorded)
                discounted_episode_rewards=discounted_rewards(episode_rewards)
                loss_, _ = sess.run([loss, training], feed_dict={input_state: np.vstack(np.array(episode_states)),
                                                                 action_space: np.vstack(np.array(episode_actions)),
                                                                 discounted_reward: discounted_episode_rewards 
                                                                })
                episode_states, episode_actions, episode_rewards = [],[],[]
                break
            state = new_state
        if each % 100 == 0:
            saver.save(sess, "./models/model.ckpt")
            print("Model saved")    
env.close()

Episode:  1
Reward:  30.0
Max reward so far:  30.0
Model saved
Episode:  2
Reward:  19.0
Max reward so far:  30.0
Episode:  3
Reward:  124.0
Max reward so far:  124.0
Episode:  4
Reward:  31.0
Max reward so far:  124.0
Episode:  5
Reward:  16.0
Max reward so far:  124.0
Episode:  6
Reward:  79.0
Max reward so far:  124.0
Episode:  7
Reward:  14.0
Max reward so far:  124.0
Episode:  8
Reward:  33.0
Max reward so far:  124.0
Episode:  9
Reward:  36.0
Max reward so far:  124.0
Episode:  10
Reward:  36.0
Max reward so far:  124.0
Episode:  11
Reward:  61.0
Max reward so far:  124.0
Episode:  12
Reward:  36.0
Max reward so far:  124.0
Episode:  13
Reward:  33.0
Max reward so far:  124.0
Episode:  14
Reward:  39.0
Max reward so far:  124.0
Episode:  15
Reward:  15.0
Max reward so far:  124.0
Episode:  16
Reward:  45.0
Max reward so far:  124.0
Episode:  17
Reward:  12.0
Max reward so far:  124.0
Episode:  18
Reward:  21.0
Max reward so far:  124.0
Episode:  19
Reward:  14.0
Max reward so far

Episode:  92
Reward:  18.0
Max reward so far:  124.0
Episode:  93
Reward:  38.0
Max reward so far:  124.0
Episode:  94
Reward:  12.0
Max reward so far:  124.0
Episode:  95
Reward:  27.0
Max reward so far:  124.0
Episode:  96
Reward:  43.0
Max reward so far:  124.0
Episode:  97
Reward:  14.0
Max reward so far:  124.0
Episode:  98
Reward:  28.0
Max reward so far:  124.0
Episode:  99
Reward:  11.0
Max reward so far:  124.0
Episode:  100
Reward:  28.0
Max reward so far:  124.0
Episode:  101
Reward:  14.0
Max reward so far:  124.0
Model saved
Episode:  102
Reward:  10.0
Max reward so far:  124.0
Episode:  103
Reward:  13.0
Max reward so far:  124.0
Episode:  104
Reward:  33.0
Max reward so far:  124.0
Episode:  105
Reward:  14.0
Max reward so far:  124.0
Episode:  106
Reward:  49.0
Max reward so far:  124.0
Episode:  107
Reward:  14.0
Max reward so far:  124.0
Episode:  108
Reward:  15.0
Max reward so far:  124.0
Episode:  109
Reward:  10.0
Max reward so far:  124.0
Episode:  110
Reward:  1

Episode:  182
Reward:  32.0
Max reward so far:  132.0
Episode:  183
Reward:  31.0
Max reward so far:  132.0
Episode:  184
Reward:  26.0
Max reward so far:  132.0
Episode:  185
Reward:  33.0
Max reward so far:  132.0
Episode:  186
Reward:  49.0
Max reward so far:  132.0
Episode:  187
Reward:  31.0
Max reward so far:  132.0
Episode:  188
Reward:  108.0
Max reward so far:  132.0
Episode:  189
Reward:  73.0
Max reward so far:  132.0
Episode:  190
Reward:  32.0
Max reward so far:  132.0
Episode:  191
Reward:  20.0
Max reward so far:  132.0
Episode:  192
Reward:  49.0
Max reward so far:  132.0
Episode:  193
Reward:  92.0
Max reward so far:  132.0
Episode:  194
Reward:  87.0
Max reward so far:  132.0
Episode:  195
Reward:  24.0
Max reward so far:  132.0
Episode:  196
Reward:  86.0
Max reward so far:  132.0
Episode:  197
Reward:  17.0
Max reward so far:  132.0
Episode:  198
Reward:  161.0
Max reward so far:  161.0
Episode:  199
Reward:  66.0
Max reward so far:  161.0
Episode:  200
Reward:  77.

Episode:  268
Reward:  631.0
Max reward so far:  631.0
Episode:  269
Reward:  540.0
Max reward so far:  631.0
Episode:  270
Reward:  672.0
Max reward so far:  672.0
Episode:  271
Reward:  525.0
Max reward so far:  672.0
Episode:  272
Reward:  552.0
Max reward so far:  672.0
Episode:  273
Reward:  381.0
Max reward so far:  672.0
Episode:  274
Reward:  507.0
Max reward so far:  672.0
Episode:  275
Reward:  372.0
Max reward so far:  672.0
Episode:  276
Reward:  273.0
Max reward so far:  672.0
Episode:  277
Reward:  264.0
Max reward so far:  672.0
Episode:  278
Reward:  183.0
Max reward so far:  672.0
Episode:  279
Reward:  126.0
Max reward so far:  672.0
Episode:  280
Reward:  222.0
Max reward so far:  672.0
Episode:  281
Reward:  57.0
Max reward so far:  672.0
Episode:  282
Reward:  233.0
Max reward so far:  672.0
Episode:  283
Reward:  303.0
Max reward so far:  672.0
Episode:  284
Reward:  350.0
Max reward so far:  672.0
Episode:  285
Reward:  226.0
Max reward so far:  672.0
Episode:  2

Episode:  352
Reward:  188.0
Max reward so far:  672.0
Episode:  353
Reward:  162.0
Max reward so far:  672.0
Episode:  354
Reward:  107.0
Max reward so far:  672.0
Episode:  355
Reward:  165.0
Max reward so far:  672.0
Episode:  356
Reward:  230.0
Max reward so far:  672.0
Episode:  357
Reward:  190.0
Max reward so far:  672.0
Episode:  358
Reward:  203.0
Max reward so far:  672.0
Episode:  359
Reward:  310.0
Max reward so far:  672.0
Episode:  360
Reward:  215.0
Max reward so far:  672.0
Episode:  361
Reward:  159.0
Max reward so far:  672.0
Episode:  362
Reward:  148.0
Max reward so far:  672.0
Episode:  363
Reward:  154.0
Max reward so far:  672.0
Episode:  364
Reward:  168.0
Max reward so far:  672.0
Episode:  365
Reward:  144.0
Max reward so far:  672.0
Episode:  366
Reward:  140.0
Max reward so far:  672.0
Episode:  367
Reward:  126.0
Max reward so far:  672.0
Episode:  368
Reward:  117.0
Max reward so far:  672.0
Episode:  369
Reward:  140.0
Max reward so far:  672.0
Episode:  

Episode:  437
Reward:  139.0
Max reward so far:  672.0
Episode:  438
Reward:  123.0
Max reward so far:  672.0
Episode:  439
Reward:  152.0
Max reward so far:  672.0
Episode:  440
Reward:  143.0
Max reward so far:  672.0
Episode:  441
Reward:  143.0
Max reward so far:  672.0
Episode:  442
Reward:  143.0
Max reward so far:  672.0
Episode:  443
Reward:  129.0
Max reward so far:  672.0
Episode:  444
Reward:  173.0
Max reward so far:  672.0
Episode:  445
Reward:  121.0
Max reward so far:  672.0
Episode:  446
Reward:  165.0
Max reward so far:  672.0
Episode:  447
Reward:  122.0
Max reward so far:  672.0
Episode:  448
Reward:  116.0
Max reward so far:  672.0
Episode:  449
Reward:  148.0
Max reward so far:  672.0
Episode:  450
Reward:  156.0
Max reward so far:  672.0
Episode:  451
Reward:  122.0
Max reward so far:  672.0
Episode:  452
Reward:  147.0
Max reward so far:  672.0
Episode:  453
Reward:  111.0
Max reward so far:  672.0
Episode:  454
Reward:  116.0
Max reward so far:  672.0
Episode:  

Episode:  521
Reward:  317.0
Max reward so far:  672.0
Episode:  522
Reward:  332.0
Max reward so far:  672.0
Episode:  523
Reward:  381.0
Max reward so far:  672.0
Episode:  524
Reward:  272.0
Max reward so far:  672.0
Episode:  525
Reward:  607.0
Max reward so far:  672.0
Episode:  526
Reward:  253.0
Max reward so far:  672.0
Episode:  527
Reward:  286.0
Max reward so far:  672.0
Episode:  528
Reward:  238.0
Max reward so far:  672.0
Episode:  529
Reward:  285.0
Max reward so far:  672.0
Episode:  530
Reward:  202.0
Max reward so far:  672.0
Episode:  531
Reward:  197.0
Max reward so far:  672.0
Episode:  532
Reward:  241.0
Max reward so far:  672.0
Episode:  533
Reward:  172.0
Max reward so far:  672.0
Episode:  534
Reward:  229.0
Max reward so far:  672.0
Episode:  535
Reward:  224.0
Max reward so far:  672.0
Episode:  536
Reward:  239.0
Max reward so far:  672.0
Episode:  537
Reward:  236.0
Max reward so far:  672.0
Episode:  538
Reward:  177.0
Max reward so far:  672.0
Episode:  

Episode:  607
Reward:  187.0
Max reward so far:  672.0
Episode:  608
Reward:  242.0
Max reward so far:  672.0
Episode:  609
Reward:  170.0
Max reward so far:  672.0
Episode:  610
Reward:  234.0
Max reward so far:  672.0
Episode:  611
Reward:  394.0
Max reward so far:  672.0
Episode:  612
Reward:  224.0
Max reward so far:  672.0
Episode:  613
Reward:  246.0
Max reward so far:  672.0
Episode:  614
Reward:  284.0
Max reward so far:  672.0
Episode:  615
Reward:  357.0
Max reward so far:  672.0
Episode:  616
Reward:  339.0
Max reward so far:  672.0
Episode:  617
Reward:  215.0
Max reward so far:  672.0
Episode:  618
Reward:  229.0
Max reward so far:  672.0
Episode:  619
Reward:  345.0
Max reward so far:  672.0
Episode:  620
Reward:  317.0
Max reward so far:  672.0
Episode:  621
Reward:  309.0
Max reward so far:  672.0
Episode:  622
Reward:  202.0
Max reward so far:  672.0
Episode:  623
Reward:  257.0
Max reward so far:  672.0
Episode:  624
Reward:  344.0
Max reward so far:  672.0
Episode:  

Episode:  691
Reward:  362.0
Max reward so far:  672.0
Episode:  692
Reward:  251.0
Max reward so far:  672.0
Episode:  693
Reward:  410.0
Max reward so far:  672.0
Episode:  694
Reward:  437.0
Max reward so far:  672.0
Episode:  695
Reward:  329.0
Max reward so far:  672.0
Episode:  696
Reward:  287.0
Max reward so far:  672.0
Episode:  697
Reward:  335.0
Max reward so far:  672.0
Episode:  698
Reward:  243.0
Max reward so far:  672.0
Episode:  699
Reward:  250.0
Max reward so far:  672.0
Episode:  700
Reward:  332.0
Max reward so far:  672.0
Episode:  701
Reward:  383.0
Max reward so far:  672.0
Model saved
Episode:  702
Reward:  255.0
Max reward so far:  672.0
Episode:  703
Reward:  340.0
Max reward so far:  672.0
Episode:  704
Reward:  356.0
Max reward so far:  672.0
Episode:  705
Reward:  209.0
Max reward so far:  672.0
Episode:  706
Reward:  225.0
Max reward so far:  672.0
Episode:  707
Reward:  229.0
Max reward so far:  672.0
Episode:  708
Reward:  219.0
Max reward so far:  672.

Episode:  776
Reward:  176.0
Max reward so far:  672.0
Episode:  777
Reward:  152.0
Max reward so far:  672.0
Episode:  778
Reward:  151.0
Max reward so far:  672.0
Episode:  779
Reward:  151.0
Max reward so far:  672.0
Episode:  780
Reward:  146.0
Max reward so far:  672.0
Episode:  781
Reward:  154.0
Max reward so far:  672.0
Episode:  782
Reward:  125.0
Max reward so far:  672.0
Episode:  783
Reward:  137.0
Max reward so far:  672.0
Episode:  784
Reward:  126.0
Max reward so far:  672.0
Episode:  785
Reward:  134.0
Max reward so far:  672.0
Episode:  786
Reward:  149.0
Max reward so far:  672.0
Episode:  787
Reward:  121.0
Max reward so far:  672.0
Episode:  788
Reward:  141.0
Max reward so far:  672.0
Episode:  789
Reward:  126.0
Max reward so far:  672.0
Episode:  790
Reward:  108.0
Max reward so far:  672.0
Episode:  791
Reward:  117.0
Max reward so far:  672.0
Episode:  792
Reward:  114.0
Max reward so far:  672.0
Episode:  793
Reward:  118.0
Max reward so far:  672.0
Episode:  

Episode:  860
Reward:  136.0
Max reward so far:  672.0
Episode:  861
Reward:  169.0
Max reward so far:  672.0
Episode:  862
Reward:  168.0
Max reward so far:  672.0
Episode:  863
Reward:  156.0
Max reward so far:  672.0
Episode:  864
Reward:  160.0
Max reward so far:  672.0
Episode:  865
Reward:  147.0
Max reward so far:  672.0
Episode:  866
Reward:  156.0
Max reward so far:  672.0
Episode:  867
Reward:  157.0
Max reward so far:  672.0
Episode:  868
Reward:  169.0
Max reward so far:  672.0
Episode:  869
Reward:  169.0
Max reward so far:  672.0
Episode:  870
Reward:  174.0
Max reward so far:  672.0
Episode:  871
Reward:  167.0
Max reward so far:  672.0
Episode:  872
Reward:  157.0
Max reward so far:  672.0
Episode:  873
Reward:  203.0
Max reward so far:  672.0
Episode:  874
Reward:  197.0
Max reward so far:  672.0
Episode:  875
Reward:  186.0
Max reward so far:  672.0
Episode:  876
Reward:  188.0
Max reward so far:  672.0
Episode:  877
Reward:  178.0
Max reward so far:  672.0
Episode:  

Reward:  204.0
Max reward so far:  672.0
Episode:  945
Reward:  199.0
Max reward so far:  672.0
Episode:  946
Reward:  170.0
Max reward so far:  672.0
Episode:  947
Reward:  175.0
Max reward so far:  672.0
Episode:  948
Reward:  191.0
Max reward so far:  672.0
Episode:  949
Reward:  171.0
Max reward so far:  672.0
Episode:  950
Reward:  202.0
Max reward so far:  672.0
Episode:  951
Reward:  217.0
Max reward so far:  672.0
Episode:  952
Reward:  172.0
Max reward so far:  672.0
Episode:  953
Reward:  164.0
Max reward so far:  672.0
Episode:  954
Reward:  167.0
Max reward so far:  672.0
Episode:  955
Reward:  166.0
Max reward so far:  672.0
Episode:  956
Reward:  170.0
Max reward so far:  672.0
Episode:  957
Reward:  191.0
Max reward so far:  672.0
Episode:  958
Reward:  202.0
Max reward so far:  672.0
Episode:  959
Reward:  158.0
Max reward so far:  672.0
Episode:  960
Reward:  154.0
Max reward so far:  672.0
Episode:  961
Reward:  138.0
Max reward so far:  672.0
Episode:  962
Reward:  1

## Testing the model

In [8]:
with tf.compat.v1.Session() as sess:
    env.reset()
    rewards = []
    saver.restore(sess, "./models/model.ckpt")
    for episode in range(10):
        state = env.reset()
        step = 0
        done = False
        total_rewards = 0
        print("****************************************************")
        print("EPISODE ", episode)

        while True:
            action_probability=sess.run(action_output,feed_dict={input_state:state.reshape([1,4])})
            action = np.random.choice(range(action_probability.shape[1]), p=action_probability.ravel())
            new_state, reward, done, info=env.step(action)
            total_rewards += reward
            if done:
                rewards.append(total_rewards)
                print ("Score", total_rewards)
                break
            state = new_state
    env.close()
    print ("Score over time: " +  str(sum(rewards)/10))

INFO:tensorflow:Restoring parameters from ./models/model.ckpt
****************************************************
EPISODE  0
Score 41.0
****************************************************
EPISODE  1
Score 24.0
****************************************************
EPISODE  2
Score 64.0
****************************************************
EPISODE  3
Score 12.0
****************************************************
EPISODE  4
Score 26.0
****************************************************
EPISODE  5
Score 10.0
****************************************************
EPISODE  6
Score 35.0
****************************************************
EPISODE  7
Score 38.0
****************************************************
EPISODE  8
Score 12.0
****************************************************
EPISODE  9
Score 55.0
Score over time: 31.7
