In [18]:
from vizdoom import *

In [19]:
import random
import time
import numpy as np
import matplotlib.pyplot as plt
from gymnasium import Env

In [17]:
game = DoomGame()
game.load_config('/Users/raghavsuri/Desktop/RLDOOM/Github/ViZDoom-master/scenarios/basic.cfg')
game.init()

In [4]:
#Set of possible actions in environment
actions = np.identity(3, dtype=np.uint8)

In [5]:
actions

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=uint8)

In [6]:
actions[0]

array([1, 0, 0], dtype=uint8)

In [7]:
random.choice(actions)

array([1, 0, 0], dtype=uint8)

In [8]:
game.new_episode()

In [9]:
game.is_episode_finished()

False

In [10]:
for i in range(20):
    game.make_action(random.choice(actions))

In [11]:
state = game.get_state()

In [12]:
state.game_variables

array([49.])

In [13]:
game.close()

In [14]:
episodes = 10
for i in range(episodes):
    game.new_episode()
    while not game.is_episode_finished():
        state = game.get_state()
        img = state.screen_buffer
        info = state.game_variables
        reward = game.make_action(random.choice(actions),4)
        print('reward: ', reward)
        time.sleep(0.02)
    print("Result: ", game.get_total_reward())
    time.sleep(2)

ViZDoomIsNotRunningException: Controlled ViZDoom instance is not running or not ready.

In [None]:
game.close()

In [None]:
game.get_state().screen_buffer.shape

In [None]:
game.close()

In [20]:
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import cv2

In [21]:
class vizDoomGym(Env):
    
    def __init__(self, render=False): ##Function called on starting env
        
        ##inherit from env class
        super().__init__()
        
        #Game setup
        self.game = DoomGame()
        self.game.load_config('/Users/raghavsuri/Desktop/RLDOOM/Github/ViZDoom-master/scenarios/basic.cfg')

        
        if render ==False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)

            
            
        self.game.init()
        
        
        ##Creating action and observatio
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8)
        self.action_space = Discrete(3)
        
        
        
        
        pass
    
    
    def step(self, action): ## Taking steps in env
        actions = np.identity(3, dtype=np.uint8)
        reward = self.game.make_action(actions[action], 4)
        
        
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo  = self.game.get_state().game_variables[0]
            info = ammo
            #info = {'info': info}
            
        else:
            state = np.zeros(self.observation_space.shape)
            info = None
            

        done = self.game.is_episode_finished()
        if not done:
            
            truncated = self.game.is_episode_finished()
        else: 
            truncated = False
            
        info = {'info': info}            
        return state, reward, done, truncated, info
        pass
    
    
    
    def render(): ## defining how to render env/game
        pass
    

    def reset(self, seed=0): ## starting new game
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        ammo  = self.game.get_state().game_variables[0]
        info = {'ammo': ammo}
            
        return self.grayscale(state), info
            
    
    def grayscale(self, observation): ## grayscale the game frame and resize it 
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state

    
    def close(self):
        self.game.close()
        pass

In [None]:
env = vizDoomGym(True)

In [None]:
env.step(0)

In [None]:
state = env.reset()

In [None]:
info

In [None]:
state[0].shape

In [None]:
env.close()

In [None]:
np.moveaxis(state,0,-1).shape

In [None]:
env.step(2)

In [None]:
state[0].shape

In [None]:
plt.imshow(cv2.cvtColor(state[0], cv2.COLOR_BGR2RGB))

In [None]:
env.close()

In [None]:
from stable_baselines3.common import env_checker

In [None]:
env_checker.check_env(env)

In [None]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

In [None]:
##Callback to save model
import os
# Import callback class from stable baselines3 for saving and callback of rl model
from stable_baselines3. common.callbacks import BaseCallback

In [None]:
class TrainAndLoggingCallback(BaseCallback):
    
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def __init__callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
    
        return True
    

In [None]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

In [None]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [22]:
from stable_baselines3 import PPO

In [None]:
env = vizDoomGym()

In [None]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=2048)

In [None]:
model.learn(total_timesteps=100000, callback=callback)

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

In [23]:
model = PPO.load('/Users/raghavsuri/Desktop/RLDOOM/train/train_basic/best_model_100000.zip')

[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.


In [28]:
env = vizDoomGym(render = True)

In [None]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1000)

In [None]:
mean_reward

In [None]:
obs[0]

In [None]:
model.predict(obs[0])

In [None]:
obs = env.reset()
model.predict(obs[0])

In [None]:
env.step()

In [None]:
array = obs[0]

In [29]:
for episode in range(10):
    obs = env.reset()
    obs = obs[0]
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done,truncated ,info = env.step(action)
        time.sleep(0.25)
        total_reward += reward
    print("Total reward for episode {} is {}".format(episode, total_reward))
    time.sleep(2)

Total reward for episode 0 is 71.0
Total reward for episode 1 is 83.0
Total reward for episode 2 is 95.0
Total reward for episode 3 is 91.0
Total reward for episode 4 is 72.0
Total reward for episode 5 is 67.0
Total reward for episode 6 is 21.0
Total reward for episode 7 is 79.0
Total reward for episode 8 is 91.0
Total reward for episode 9 is 87.0


In [30]:
env.close()