<a href="https://colab.research.google.com/github/sukritis312/coronavirus_bot/blob/main/tictactoe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym

In [None]:
!wget https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/playing-tictactoe-with-reinforcement-learning-and-openai-gym/gym-tictactoe.zip

In [None]:
!unzip -o gym-tictactoe.zip

In [None]:
!pip install -e gym-tictactoe

In [None]:
!pip install --upgrade gym==0.19.0

In [None]:

![Policy Table](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/playing-tictactoe-with-reinforcement-learning-and-openai-gym/images/policy.png)


In [None]:
import gym
import random
import gym_tictactoe
#work with open-ai gym environment
env = gym.make("TicTacToe-v0")
#working with tictactoe evironment
env.state
env.hash()
new_state, reward, done, info = env.step(0, "X")
# variable to keep track of if the game is over
done = False
# Good practice to reset environment before you play a game to clear any old game
env.reset()
# Want to keep playing untill game is over
while not done:
    # Make a random action from the list of available actions for X
    new_state, reward, done, info = env.step(random.choice(env.available_actions()), "X")
    # Print state
    print(env.hash())
    
    # If the game is done on X action we dont want O to make an action
    if not done:
        # Make a random action from the list of available actions for O
        new_state, reward, done, info = env.step(random.choice(env.available_actions()), "O")
        # Print state
        print(env.hash())

In [None]:
class Agent():
    
    def __init__(self, env, player="X", alpha=.4, gamma=.9):
        self.alpha = alpha
        self.gamma = gamma
        self.env = env
        self.player = player
        self.player_number = 0 if player == "X" else 1
        self.V = {}

In [None]:
class Agent(Agent):

    def select_action(self, epsilon=.1):
        if (random.random() < epsilon):
            action = random.choice(self.env.available_actions())
        else:
            q_values = [] 
            for state in self.env.available_states(self.player):
                q_values.append(self.gamma*self.V[state[0]] + state[1][self.player_number])
            max_value = max(q_values)
            max_indexs = [i for i, j in enumerate(q_values) if j == max_value]
            action = self.env.available_actions()[random.choice(max_indexs)]
        return action

In [None]:
class Agent(Agent):
    
    def add_states(self):
        if (self.env.hash() not in self.V):
            self.V[self.env.hash()] = 0
        for state in self.env.available_states("X"):
            if (state[0] not in self.V):
                self.V[state[0]] = 0
        for state in self.env.available_states("O"):
            if (state[0] not in self.V):
                self.V[state[0]] = 0

In [None]:
class Agent(Agent):
    
    def update_state_values(self, new_state, old_state, reward):
        self.V[old_state] = self.V[old_state] + self.alpha*(reward + self.gamma*self.V[new_state] - self.V[old_state])

In [None]:
#Training the model
# number of games (episodes)
def train(episodes):
    # create our agents
    agent_x = Agent(env, "X")
    agent_o = Agent(env, "O")
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # X agents turn

            # adds states for both agents
            agent_x.add_states()
            agent_o.add_states()
            
            # records the state we are in before action
            old_state = env.hash()
            # get an action using policy
            action = agent_x.select_action()
            # performs an action
            new_state, reward, done, _ = env.step(action, agent_x.player)
            
            # update state values for both agents
            agent_x.update_state_values(new_state, old_state, reward[agent_x.player_number])
            agent_o.update_state_values(new_state, old_state, reward[agent_o.player_number])
            
            # if the game ends on X move, we don't want to make an O move
            if not done:
                
                # O agents turn
                
                # adds states for both agents
                agent_x.add_states()
                agent_o.add_states()

                # records the state we are in before action
                old_state = env.hash()
                # get an action using policy
                action = agent_o.select_action()
                # performs an action
                new_state, reward, done, _ = env.step(action, agent_o.player)

                # update state values for both agents
                agent_x.update_state_values(new_state, old_state, reward[agent_x.player_number])
                agent_o.update_state_values(new_state, old_state, reward[agent_o.player_number])
                
    return agent_x, agent_o

In [None]:
%%time

agent_x, agent_o = train(110000)

In [None]:
# number of games (episodes)
def test_x(episodes):
    # counters to keep track of results
    win = 0
    tie = 0
    loss = 0
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # adds states for X only because we are acting randomly and not updating state values for O
            agent_x.add_states()
            
            # always get the best action
            x_action = agent_x.select_action(epsilon=0)
            # performs an action
            new_state, reward, done, _ = env.step(x_action, agent_x.player)

            # if the game ends on X move, we don't want to make an O move
            if (not done):
                
                # O agents turn
                
                # adds states for X only because we are acting randomly and not updating state values for O
                agent_x.add_states()
                
                # O always makes a random action from the available actions
                o_action = random.choice(env.available_actions())
                new_state, reward, done, _ = env.step(o_action, "O")
                
        # record results when game is done
        if (reward == (10, -10)):
            win+=1
        elif (reward == (-10, 10)):
            loss+=1
        elif (reward == (0, 0)):
            tie+=1
    return win, loss, tie
episodes = 10000

win, loss, tie = test_x(episodes)

print("Win:", win, "Tie:", tie, "Loss:", loss)
print("Win Rate:", win/episodes*100, "Tie Rate:", tie/episodes*100, "Loss Rate:", loss/episodes*100)

In [None]:
# number of games (episodes)
def test_o(episodes):
    # counters to keep track of results
    win = 0
    tie = 0
    loss = 0
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # adds states for O only because we are acting randomly and not updating state values for X
            agent_o.add_states()
            
            # X always makes a random action from the available actions
            x_action = random.choice(env.available_actions())
            # performs an action
            new_state, reward, done, _ = env.step(x_action, "X")

            # if the game ends on X move, we don't want to make an O move
            if (not done):
                
                # O agents turn
                
                # adds states for O only because we are acting randomly and not updating state values for X
                agent_o.add_states()
                
                # always get the best action
                o_action = agent_o.select_action(epsilon=0)
                new_state, reward, done, _ = env.step(o_action, agent_o.player)
                
        # record results when game is done
        if (reward == (-10, 10)):
            win+=1
        elif (reward == (10, -10)):
            loss+=1
        elif (reward == (0, 0)):
            tie+=1
    return win, loss, tie

In [None]:
# number of games (episodes)
def test_o(episodes):
    # counters to keep track of results
    win = 0
    tie = 0
    loss = 0
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # adds states for O only because we are acting randomly and not updating state values for X
            agent_o.add_states()
            
            # X always makes a random action from the available actions
            x_action = random.choice(env.available_actions())
            # performs an action
            new_state, reward, done, _ = env.step(x_action, "X")

            # if the game ends on X move, we don't want to make an O move
            if (not done):
                
                # O agents turn
                
                # adds states for O only because we are acting randomly and not updating state values for X
                agent_o.add_states()
                
                # always get the best action
                o_action = agent_o.select_action(epsilon=0)
                new_state, reward, done, _ = env.step(o_action, agent_o.player)
                
        # record results when game is done
        if (reward == (-10, 10)):
            win+=1
        elif (reward == (10, -10)):
            loss+=1
        elif (reward == (0, 0)):
            tie+=1
    return win, loss, tie

In [None]:
episodes = 10000

win, loss, tie = test_o(episodes)

print("Win:", win, "Tie:", tie, "Loss:", loss)
print("Win Rate:", win/episodes*100, "Tie Rate:", tie/episodes*100, "Loss Rate:", loss/episodes*100)

In [None]:
# number of games (episodes)
def test(episodes):
    # counters to keep track of results
    x_win = 0
    o_win = 0
    tie = 0
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # adds states for both agents because we are using select_action on both
            agent_x.add_states()
            agent_o.add_states()
            
            # always get the best action
            x_action = agent_x.select_action(epsilon=0)
            # performs an action
            new_state, reward, done, _ = env.step(x_action, "X")

            # if the game ends on X move, we don't want to make an O move
            if (not done):
                
                # O agents turn
                
                # adds states for both agents because we are using select_action on both
                agent_x.add_states()
                agent_o.add_states()
                
                # always get the best action
                o_action = agent_o.select_action(epsilon=0)
                new_state, reward, done, _ = env.step(o_action, "O")
                
        # record results when game is done
        if (reward == (-10, 10)):
            o_win+=1
        elif (reward == (10, -10)):
            x_win+=1
        elif (reward == (0, 0)):
            tie+=1
    return x_win, o_win, tie

In [None]:
episodes = 10000

x_win, o_win, tie = test(episodes)

print("X Win:", x_win, "Tie:", tie, "O Win:", o_win)
print("X Win Rate:", x_win/episodes*100, "Tie Rate:", tie/episodes*100, "O Win Rate:", o_win/episodes*100)

In [None]:
#play against agent
# number of games (episodess)
def play_as_x(episodes=1):
    # counters to keep track of results
    x_win = 0
    o_win = 0
    tie = 0
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # print the environment before you go
            env.render()
            # print available actions
            print(env.available_actions())
            
            # adds states for O only because we are controlling X
            agent_o.add_states()
            
            # get user input
            x_action = int(input())
            # performs an action
            new_state, reward, done, _ = env.step(x_action, "X")

            # if the game ends on X move, we don't want to make an O move
            if (not done):
                
                # O agents turn
                
                # adds states for O only because we are controlling X 
                agent_o.add_states()
                
                # always get the best action
                o_action = agent_o.select_action(epsilon=0)
                new_state, reward, done, _ = env.step(o_action, "O")
        
        env.render()
        # record results when game is done
        if (reward == (-10, 10)):
            print("You Lose")
        elif (reward == (10, -10)):
            print("You Win")
        elif (reward == (0, 0)):
            print("Tie")

In [None]:
play_as_x()

In [None]:
# number of games (episodes)
def play_as_o(episodes=1):
    # counters to keep track of results
    x_win = 0
    o_win = 0
    tie = 0
    # loops for a certain number of games (episodes)
    for episode in range(episodes):
        # stops while loop when game is done
        done = False
        # resets environment when game is done
        env.reset()
        while not done:
            
            # adds states for X only because we are controlling O
            agent_x.add_states()
            
            # always get the best action
            x_action = agent_x.select_action(epsilon=0)
            # performs an action
            new_state, reward, done, _ = env.step(x_action, "X")

            # if the game ends on X move, we don't want to make an O move
            if (not done):
                
                # O agents turn
                
                # print the environment before you go
                env.render()
                # print available actions
                print(env.available_actions())
                
                # adds states for X only because we are controlling O
                agent_x.add_states()
                
                # get user input
                o_action = int(input())
                new_state, reward, done, _ = env.step(o_action, "O")
        
        env.render()
        # record results when game is done
        if (reward == (-10, 10)):
            print("You Win")
        elif (reward == (10, -10)):
            print("You Lose")
        elif (reward == (0, 0)):
            print("Tie")

In [None]:
play_as_o()