In [None]:
! pip install gym gym-retro # Install the necessary packages

In [None]:
!pip install torch==1.10.2+cu113 torchvision==0.11.3+cu113 torchaudio===0.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html


### Imports and basic testing of installation

In [1]:
import retro # The main library
import time # For timing learning, if needed
import pygame # For rendering the game

pygame 2.1.0 (SDL 2.0.16, Python 3.6.13)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
retro.data.list_games()
# There are actually many games provided by Gym Retro: this command let's you check them out!

['1942-Nes',
 '1943-Nes',
 '3NinjasKickBack-Genesis',
 '8Eyes-Nes',
 'AaahhRealMonsters-Genesis',
 'AbadoxTheDeadlyInnerWar-Nes',
 'AcceleBrid-Snes',
 'ActRaiser2-Snes',
 'ActionPachio-Snes',
 'AddamsFamily-GameBoy',
 'AddamsFamily-Genesis',
 'AddamsFamily-Nes',
 'AddamsFamily-Sms',
 'AddamsFamily-Snes',
 'AddamsFamilyPugsleysScavengerHunt-Nes',
 'AddamsFamilyPugsleysScavengerHunt-Snes',
 'AdvancedBusterhawkGleylancer-Genesis',
 'Adventure-Atari2600',
 'AdventureIsland-GameBoy',
 'AdventureIsland3-Nes',
 'AdventureIslandII-Nes',
 'AdventuresOfBatmanAndRobin-Genesis',
 'AdventuresOfBayouBilly-Nes',
 'AdventuresOfDinoRiki-Nes',
 'AdventuresOfDrFranken-Snes',
 'AdventuresOfKidKleets-Snes',
 'AdventuresOfMightyMax-Genesis',
 'AdventuresOfMightyMax-Snes',
 'AdventuresOfRockyAndBullwinkleAndFriends-Genesis',
 'AdventuresOfRockyAndBullwinkleAndFriends-Nes',
 'AdventuresOfRockyAndBullwinkleAndFriends-Snes',
 'AdventuresOfStarSaver-GameBoy',
 'AdventuresOfYogiBear-Snes',
 'AeroFighters-Snes',
 

In [8]:

# Command to create your environment
env = retro.make(game = "StreetFighterIISpecialChampionEdition-Genesis")

In [9]:
env.close()

### Your implementation

In [13]:
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
import matplotlib

In [14]:
class StreetFighter(Env):
    def __init__(self):
        
        super().__init__()
        self.observation_space = Box(low=0,high=255,shape=(84,84,1),dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game= retro.make(game = "StreetFighterIISpecialChampionEdition-Genesis",
                             use_restricted_actions = retro.Actions.FILTERED)
    def step(self):
        
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        
        frame_change = obs - self.previous_frame
        self.previous_frame = obs
        
        reward = info['score'] - self.score
        self.score = info['score']
        
        return frame_change , reward, done, info
         
        
    def render(self,*args,**kwargs):
        self.game.render()
        
    def reset(self):
        
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
        
    def preprocess(self,observation):
        #grayscaling
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        #resize
        resize = cv2.resize(gray, (84,84),interpolation=cv2.INTER_CUBIC)
        
        channels = np.reshape(resize, (84,84,1))
        
        
        return channels
        
    def close():
        self.game.close()
    

In [15]:
obs = env.reset()
# Set flag to false
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        time.sleep(0.01)
    
        
env.close()

In [29]:
import optuna
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import os

In [17]:
LOG_DIR = './logs/'
OPT_DIR = './opt/'

In [18]:
def optimize_ppo(trial):
    return{
        'n_steps':trial.suggest_int('n_steps', 2048, 8192),
        'gamma':trial.suggest_loguniform('gamma',0.8,0.9999),
        'learning_rate':trial.suggest_loguniform('learning_rate',1e-5,1e-4),
        'clip_range':trial.suggest_uniform('clip_range',0.1,0.4),
        'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8,0.99)
    }

In [20]:
def optimize_agent(trial):
    try:
        model_params = optimize_ppo(trial)
        
        env = StreetFighter()
        env = Monitor(env, LOG_DIR)
        env = DummyVecEnv([lambda: env])
        env = VecFrameStack(env, 4, channels_order = 'last')
        
        model = PPO('CnnPolicy',env, tensorboard_log=LOG_DIR, verbose=0, **model_params)
        model.learn(total_timesteps=30000)
        
        mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)
        env.close()
        
        
        SAVE_PATH = os.path.join(OPT_DIR,'trial_()_best_model'.format(trial.number))
        model.save(SAVE_PATH)
        
        return mean_reward
        
    except Exception as e:
        return -1000

In [21]:
study = optuna.create_study(direction='maximize')
study.optimize(optimize_agent,n_trials=10,n_jobs=1)
env.close()

[32m[I 2023-08-06 23:58:25,926][0m A new study created in memory with name: no-name-f3231fc7-aa8a-4b4e-af4b-100417512ad6[0m
  after removing the cwd from sys.path.
  """
  
  import sys
[32m[I 2023-08-06 23:58:27,178][0m Trial 0 finished with value: -1000.0 and parameters: {'n_steps': 2748, 'gamma': 0.9069567142329975, 'learning_rate': 3.7848543416778434e-05, 'clip_range': 0.36918867303309255, 'gae_lambda': 0.8342523077201767}. Best is trial 0 with value: -1000.0.[0m
[32m[I 2023-08-06 23:58:27,341][0m Trial 1 finished with value: -1000.0 and parameters: {'n_steps': 4727, 'gamma': 0.8213730901454327, 'learning_rate': 3.0333372230463855e-05, 'clip_range': 0.11542959424191075, 'gae_lambda': 0.8250225851660739}. Best is trial 0 with value: -1000.0.[0m
[32m[I 2023-08-06 23:58:27,458][0m Trial 2 finished with value: -1000.0 and parameters: {'n_steps': 3903, 'gamma': 0.8438067487387058, 'learning_rate': 5.6421366083169964e-05, 'clip_range': 0.3842248471676717, 'gae_lambda': 0.80823

In [22]:
env.close()

In [24]:
env = StreetFighter()
env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

In [25]:
model_params = study.best_params
model_params['n_steps'] = 7488
model_params

In [26]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, **model_params)

In [27]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1,**model_params)

In [28]:
model.learn(total_timesteps=100000)

In [None]:
obs = env.reset()
done = False
for game in range(1):
    while not done:
        if done :
            obs = env.reset()
        env.render.()
        action = model.predict(obs)[0]
        obs, reward, done, info = env.step(action)
        time.sleep(0.01)
        