# StreetFighter

%pip install requirements.txt

!python -m retro.import "location/of/rom/"

In [1]:
# Import retro to play Street Fighter using a ROM
import retro
# Import time to slow down game
import time

In [2]:
# Import environment base class for a wrapper 
from gym import Env 
# Import the space shapes for the environment
from gym.spaces import MultiBinary, Box
# Import numpy to calculate frame delta 
import numpy as np
# Import opencv for grayscaling
import cv2
# Import matplotlib for plotting the image
from matplotlib import pyplot as plt

In [3]:
# Importing the optimzation frame - HPO
import optuna
# PPO algo for RL
from stable_baselines3 import PPO
# Bring in the eval policy method for metric calculation
from stable_baselines3.common.evaluation import evaluate_policy
# Import the sb3 monitor for logging 
from stable_baselines3.common.monitor import Monitor
# Import the vec wrappers to vectorize and frame stack
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
# Import os to deal with filepaths
import os

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Import base callback 
from stable_baselines3.common.callbacks import BaseCallback

In [5]:
# import Env from module
from StreetFighter_env import StreetFighter

In [6]:
# env.close()

In [7]:
# # Create custom environment 
# class StreetFighter(Env): 
#     def __init__(self):
#         super().__init__()
#         # Specify action space and observation space 
#         self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
#         self.action_space = MultiBinary(12)
#         # Startup and instance of the game 
#         self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
#     def reset(self):
#         # Return the first frame 
#         obs = self.game.reset()
#         obs = self.preprocess(obs) 
#         self.previous_frame = obs 
        
#         # Create a attribute to hold the score delta 
#         self.score = 0 
#         return obs
    
#     def preprocess(self, observation): 
#         # Grayscaling 
#         gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
#         # Resize 
#         resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
#         # Add the channels value
#         channels = np.reshape(resize, (84,84,1))
#         return channels 
    
#     def step(self, action): 
#         # Take a step 
#         obs, reward, done, info = self.game.step(action)
#         obs = self.preprocess(obs) 
        
#         # Frame delta 
#         frame_delta = obs - self.previous_frame
#         self.previous_frame = obs 
        
#         # Reshape the reward function
#         reward = info['score'] - self.score 
#         self.score = info['score'] 
        
#         return frame_delta, reward, done, info
    
#     def render(self, *args, **kwargs):
#         self.game.render()
        
#     def close(self):
#         self.game.close()

In [6]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [7]:
env = StreetFighter()

In [8]:
LOG_DIR = './logs/'
OPT_DIR = './opt/'

In [9]:
SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(1))

In [10]:
CHECKPOINT_DIR = './train/'

In [11]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [14]:
#used for initial training
# model = PPO('CnnPolicy', 
#             env, 
#             device='auto',
#             tensorboard_log=LOG_DIR,
#             verbose=1, 
#             gamma = 0.9,
#             clip_range = 0.25,
#             gae_lambda =  0.95,
#             n_steps = 7488,  # set n_steps to 7488 or a factor of 64
#             learning_rate = 5e-10)

In [15]:
model = PPO.load('./train/model_20M.zip',
                 env=env, 
                 device = 'cuda',
                 tensorboard_log=LOG_DIR,
                 verbose=1,
                 n_steps = 8192,  # set n_steps to 7488 or a factor of 64
                 learning_rate = 5e-7)

  th_object = th.load(file_content, map_location=device)


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


Track progress in TensorBoard in command line (PowerShell)
tensorboard --logdir logs

In [16]:
# Kick off training 
model.learn(total_timesteps=5000000, callback=callback)
# model.learn(total_timestep=100000) 

Logging to ./logs/PPO_16
-----------------------------
| time/              |      |
|    fps             | 115  |
|    iterations      | 1    |
|    time_elapsed    | 70   |
|    total_timesteps | 8192 |
-----------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.32e+04      |
|    ep_rew_mean          | 4.98e+04      |
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 2             |
|    time_elapsed         | 235           |
|    total_timesteps      | 16384         |
| train/                  |               |
|    approx_kl            | 1.5190672e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.25          |
|    entropy_loss         | -8.32         |
|    explained_variance   | -6.16e-05     |
|    learning_rate        | 5e-07         |
|    loss                 | 1.4e+05       |
|    n_updates   

<stable_baselines3.ppo.ppo.PPO at 0x22b1fd470a0>

# Test out the Model

In [12]:
model = PPO.load('./train/model_25M.zip',
                 env=env, 
                 device = 'cuda',)

  th_object = th.load(file_content, map_location=device)


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [21]:
obs = env.reset()

In [None]:
# Reset game to starting state
obs = env.reset()
# Set flag to flase
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        action, _ = model.predict(obs, deterministic=True)#[0]
        obs, reward, done, info = env.step(action)
        time.sleep(0.01)
        if reward > 0:
            print(reward)

AttributeError: 'tuple' object has no attribute 'shape'