In [1]:
import retro

In [2]:
game = "StreetFighterIISpecialChampionEdition-Genesis"
state = "Champion.Level1.ChunLiVsGuile"
env = retro.make(game=game, state=state)

In [6]:
env.action_space.sample()

array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int8)

In [9]:
env.observation_space.sample().shape

(200, 256, 3)

In [16]:
observation = env.reset()
print(observation.shape)

action = env.action_space.sample()
obs, rewards, done, info = env.step(action)
print(info)

(200, 256, 3)
{'enemy_matches_won': 0, 'score': 0, 'matches_won': 0, 'continuetimer': 0, 'enemy_health': 176, 'health': 176}


In [19]:
from gym.spaces import Box, MultiBinary

print(MultiBinary(12))

MultiBinary(12)


In [37]:
import cv2

import gym
import numpy as np
from gym.spaces import Box, MultiBinary

class StreetFighter(gym.Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game="StreetFighterIISpecialChampionEdition-Genesis", use_restricted_actions=retro.Actions.FILTERED)
        
        self.full_hp = 176
        self.player_health = self.full_hp
        self.oppont_health = self.full_hp
        
        self.score = 0
        
    def __preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        return resize

    def step(self, action):

        obs, reward, done, info = self.game.step(action)
        custom_obs = self.__preprocess(obs) # It's just frame, not frame_delta

        # During fighting, either player or opponent has positive health points.
        if info['health'] > 0 or info['enemy_health'] > 0:

            # Player Loses
            if info['health'] < 0 and info['health'] != self.player_health and info['enemy_health'] != 0:
                reward = (-self.full_hp) * info['enemy_health']

            # Player Wins
            elif info['enemy_health'] < 0 and info['enemy_health'] != self.oppont_health and info['health'] != 0:
                reward = self.full_hp * info['health']

            # During Fighting
            else:
                reward = (self.oppont_health - info['enemy_health']) - (self.player_health - info['health'])
        
        self.player_health = info['health']
        self.oppont_health = info['enemy_health']
        
        return custom_obs, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def reset(self):
        obs = self.game.reset()
        custom_obs = self.__preprocess(obs)
        self.previous_frame = obs
    
        self.player_health = self.full_hp
        self.oppont_health = self.full_hp
        return custom_obs

    def close(self):
        self.game.close()


In [38]:
env.close()
env = StreetFighter()
print(env.observation_space.shape)
env.close()

(84, 84)


In [39]:
## Checking Rewards functionality
import time

env = StreetFighter()
obs = env.reset()
done = False

for game in range(5):
    while not done:
        if done:
            obs = env.reset()
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        if reward != 0:
            print(reward, info['health'], info['enemy_health'])
        time.sleep(0.01)



-22 154 176
-32 122 176
29 122 147
7 122 140
-31 91 140
29 91 111
-23 68 111
-24 44 111
-24 20 111
31 20 80
10 20 70
45 20 25
5 20 20
-15 5 20
19 5 1
-176 -1 1
46 176 130
7 176 123
-24 152 123
29 152 94
-24 128 94
7 128 87
39 128 48
-31 97 48
36 97 12
-24 73 12
-24 49 12
8624 49 -1
39 176 137
-24 152 137
-23 129 137
-23 106 137
-26 80 137
-24 56 137
-23 33 137
-21 12 137
-12 0 137
-24112 -1 137
