In [None]:
import random
import gym_fightingice
import gym
from gym import wrappers
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [None]:
# Agent class using Deep Q Learning
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 0.5  # exploration rate
        self.epsilon_min = 0.0001
        self.epsilon_decay = 0.999
        self.learning_rate = 0.0001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(100, input_dim=self.state_size, activation='relu'))
        model.add(Dense(100, activation='relu'))
        model.add(Dense(100, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        state = self.make_input(state)
        next_state = self.make_input(next_state)
        reward = reward if not done else -10
        
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
            
        state = self.make_input(state)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        if len(agent.memory) > batch_size:
            sample = random.sample(self.memory, batch_size)
        else:
            sample = (self.memory)
            sample = copy.copy(self.memory)
            random.shuffle(sample)

        for state, action, reward, next_state, done in sample:
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
            
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def make_input(self, input):
        return np.reshape(input, [1, self.state_size])
        
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

def append_to_file(data_fn, hp1, hp2, time, action_tracker):
    line = ",".join([str(hp1-hp2), str(hp1), str(hp2), str(time)]) 
    line += ","
    line += ",".join(str(x) for x in action_tracker)
    line += "\n"
    with open(data_fn, 'a') as file:
        file.write(line)

In [None]:
data_fn = "aigothands_v1_data.txt"
agent_fn = "aigothands_v1.h5"
adversary = "Machete"
EPISODES = 1000

# Determines whether to retrain old agent or start new one.
# WARNING: Setting this to True will CLEAR YOUR AI'S DATA!
# (if you use the same filename)
new_agent = True

In [None]:
env = gym.make('FightingiceDataFrameskip-v0', show_training=False)
state_size = 143
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
done = False
batch_size = 32

if not new_agent:
    agent.load(agent_fn)
else:
    with open(data_fn, 'w') as file:
        print("Cleared AI data file")

# env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1',force=True)

for e in range(EPISODES):
    try:
        state = env.reset()
        time = 0
        p1hp = 400
        p2hp = 400
        action_tracker = []
        for x in range(56):
            action_tracker.append(0)
        while(1):
            time +=1
            #env.render()

            if len(state) != 4:
                # First, we choose an action
                # and log this action choice in the Action Tracker.
                action = agent.act(state)
                action_tracker[action] += 1

                # Then, we step the environment forward.
                next_state, reward, done, _ = env.step(action)
                agent.remember(state, action, reward, next_state, done)
                state = next_state

    #             for index, x in enumerate(state):
    #                 state_tracker[index].append(x)

                # We have to track player HP in real-time because
                # it becomes inaccessible at round end...
                p1hp = env.p1.frameData.getCharacter(True).getHp()
                p2hp = env.p1.frameData.getCharacter(False).getHp()
            else:
                next_state, reward, done, _ = env.step(0)
                state = next_state
            if done:
    #             for index, x in enumerate(state_tracker):
    #                 print("Param " + str(index+1) + ": " + str(x) + "|| \n")
                print("episode: {}/{}, time: {}, P1_HP: {}, P2_HP: {}, e: {:.2}\n"
                      .format(e, EPISODES, time, p1hp, p2hp, agent.epsilon))
                append_to_file(data_fn, p1hp, p2hp, time, action_tracker)
                break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
        if e % 10 == 0:
            agent.save(agent_fn)
    except:
        print("Handling exception")
env.close()

In [None]:
env = gym.make('FightingiceDataFrameskip-v0', show_training=True)
state_size = 143
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
done = False
batch_size = 32
agent.load(agent_fn)

while True:
    done = False
    state = env.reset()
    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        state = next_state