## Documentation

https://gymnasium.farama.org/environments/classic_control/cart_pole/

## Code

### Import dependencies

In [None]:
import os
import gymnasium as gym
import pygame
from pygame import gfxdraw
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import time



### Set Variables 

In [None]:
envName = 'CartPole-v1'
PPO_path = os.path.join('Saved_Models', 'PPO_model1K')

### Load the environment

In [None]:
#env = gym.make(envName)
env = gym.make(envName, render_mode='human')

### Run the Model

In [None]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    truncated = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, truncated, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

### Train the model

In [None]:
model = PPO('MlpPolicy', env, verbose = 1)
model.learn(total_timesteps=1000)

### Save the model

In [None]:
model.save(PPO_path)

### Load the Model

In [None]:
model = PPO.load(PPO_path, env=env)

### Evaluation of the trained model

In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

# Test Model
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, done, truncated, info = env.step(action)
    env.render()
    if done: 
        print('info', info)
        break
env.close()