In [1]:
import gym
import random
import numpy as np

In [2]:
env = gym.make('CartPole-v0')
env.reset()

array([-0.02387893,  0.00539569, -0.03564763, -0.00441803])

In [3]:
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

Observation space: Box(4,)
Action space: Discrete(2)


In [4]:
print(env.observation_space.shape[0])

4


In [10]:
class Agent:
    def __init__(self, env):
        self.state_dim = env.observation_space.shape
        self.action_size = env.action_space.n
        self.weights = np.random.randn(self.state_dim[0], self.action_size)
        self.optimal_weights = np.copy(self.weights)
        self.best_reward = -1e4
        self.noise = 0.1
    
    def choose_action(self, state):
        action = np.argmax(np.dot(state, self.weights))
        return action
    
    def update_model(self, reward):
        if reward >= self.best_reward:
            self.best_reward = reward
            self.best_weights = np.copy(self.weights)
            self.noise /= 2
        else:
            self.noise *= 1.5
        
        self.weights = self.best_weights + self.noise * np.random.rand(self.state_dim[0], self.action_size)

In [12]:
agent = Agent(env)
episodes = 100

for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    done = False
    
    while not done:
        action = agent.choose_action(state)
        state, reward, done, info = env.step(action)
#         env.render()
        total_reward += reward
    
    agent.update_model(total_reward)
    print(f"Episode: {episode+1} finished with a reward: {total_reward}")

env.close()

Episode: 1 finished with a reward: 17.0
Episode: 2 finished with a reward: 40.0
Episode: 3 finished with a reward: 15.0
Episode: 4 finished with a reward: 25.0
Episode: 5 finished with a reward: 30.0
Episode: 6 finished with a reward: 22.0
Episode: 7 finished with a reward: 49.0
Episode: 8 finished with a reward: 14.0
Episode: 9 finished with a reward: 40.0
Episode: 10 finished with a reward: 27.0
Episode: 11 finished with a reward: 24.0
Episode: 12 finished with a reward: 14.0
Episode: 13 finished with a reward: 25.0
Episode: 14 finished with a reward: 39.0
Episode: 15 finished with a reward: 27.0
Episode: 16 finished with a reward: 17.0
Episode: 17 finished with a reward: 9.0
Episode: 18 finished with a reward: 69.0
Episode: 19 finished with a reward: 39.0
Episode: 20 finished with a reward: 44.0
Episode: 21 finished with a reward: 77.0
Episode: 22 finished with a reward: 26.0
Episode: 23 finished with a reward: 43.0
Episode: 24 finished with a reward: 9.0
Episode: 25 finished with a