In [1]:
# to slow down the actions
import time
# for the environment
import retro
from gym import Env # to wrap the environment
from gym.spaces import MultiBinary, Box # 
import numpy as np # to calculate the delta between the frames
import cv2 # for grayscaling
# the algorithm
from stable_baselines3 import PPO

In [2]:
# Creating custom environment that will carry out all the steps
# we pass our pass environment

class StreetFighter(Env): 
    def __init__(self):
        super().__init__()
        # Specify action space and observation space 
        # resizing and making gray scale
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) 
        self.action_space = MultiBinary(12)
        # Startup and instance of the game 
        # additional parameter to filter only valid actions
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # want to also keep track of the previous frame to calculate a delta between the frames
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): 
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): 
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta - use this to train our agent
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

In [3]:
# test model

def test_model_and_get_info(model):
    env = StreetFighter()
    
    # Reset game to starting state
    obs = env.reset()
    # Set flag to flase
    done = False
    for game in range(1): 
        while not done: 
            if done: 
                obs = env.reset()
            env.render()
            action = model.predict(obs)[0] # replace with model prediction
            obs, reward, done, info = env.step(action)
            time.sleep(0.01)
            if reward > 0:
                print(reward)

    return info

In [4]:
# load model
model = PPO.load(f"training/models/PPO_10000_SF")

info = test_model_and_get_info(model)



500
300
300
400
100
100
1000
1000
500
400


In [5]:
# summary of the match
info

{'continuetimer': 10,
 'enemy_matches_won': 2,
 'enemy_health': 0,
 'health': 0,
 'matches_won': 0,
 'score': 4600}

In [6]:
# track summary

summary = []
info.update({'experiment name': '10000 timesteps'}) # add an experiment name
summary.append(info)

In [13]:
# load and test another model
model = PPO.load(f"training/models/opt/PPO_1000_SF_4")
info = test_model_and_get_info(model)



500
1000
1000
1000
500
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
1000
10000
400
1000
300
400
1000
1000
300
1500
100
100
100
100
100
100
100
100
100
1000
1000
1000
1000
1000
500
500
300
300
500
1000
500
300


In [14]:
info

{'continuetimer': 10,
 'enemy_matches_won': 2,
 'enemy_health': 0,
 'health': 0,
 'matches_won': 0,
 'score': 32500}

In [15]:
info.update({'experiment name': '1000 timesteps tuned'}) # add an experiment name
summary.append(info)

In [16]:
# aggregrate results
import pandas as pd

pd.DataFrame(summary)[['experiment name', 'matches_won', 'score', 'enemy_matches_won']]

Unnamed: 0,experiment name,matches_won,score,enemy_matches_won
0,10000 timesteps,0,4600,2
1,1000 timesteps,0,5900,2
2,1000 timesteps,0,5900,2
3,1000 timesteps tuned,0,32500,2
