In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pickle

## Game Logic

In [None]:
board = np.full((3, 3), 0 ,dtype=int)
player = 1

def make_move(board, move, player):
    x,y = move[0], move[1]
    if board[y][x] == 0:
        board[y][x] = player
    print(board)

def check_line(line, player):
    # Returns True if all elements in the line match the player aka X or O
    return np.all(line == player)

def check_draw(board):
    if np.all(board != 0):
        return True
    return False



def check_win(board, player):
    #Check rows
    for row in range(3):
        if check_line(board[row, :], player):
            return True
    #Check Columns
    for col in range(3):
        if check_line(board[:, col], player):
            return True
    #Check diagonal
    if check_line(np.diag(board), player):
        return True
    #Check other diagonal
    if check_line(np.diag(np.fliplr(board)), player):
        return True
    #No win
    return False

# def clear_board():
#     global player
#     global board
#     board = np.full((3, 3), 0 ,dtype=int)
#     player = 1

def available_moves(board):
    open_space = np.argwhere(board == 0)
    moves = []
    count = 0
    for m in open_space:
        arr = np.full((len(open_space)), 0 ,dtype=int)
        arr[count] = 1
        moves.append(arr.tolist())
        count += 1
    return moves

def generate_cords(board, move):
    counter = 0
    x = 0
    y = 0
    for sqaure in board:
        if x >= 3:
            x = 0
            y += 1
        if sqaure == 0:
            if move[counter] == 1:
                return x, y
            counter += 1
        x += 1

def draw_board(board, opponent:str):
    plt.figure(figsize=(5, 5))
    plt.xlim(0, 3)
    plt.ylim(0, 3)

    plt.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labelleft=False)
    
    # Vertical
    plt.plot([1,1], [0,3], color='black', linewidth=2)
    plt.plot([2,2], [0,3], color='black', linewidth=2)
    # Horizontal
    plt.plot([0,3], [1,1], color='black', linewidth=2)
    plt.plot([0,3], [2,2], color='black', linewidth=2)

    #Place things
    for row in range(3):
        for col in range(3):
            symbol = board[row, col]

            if symbol != 0:
                x_pos = col + 0.5
                y_pos = 2.5 - row
                #Color
                color = 'red' if symbol == 1 else 'blue'
                #draw
                plt.text(x_pos, y_pos,'X' if symbol == 1 else 'O', fontsize=40, ha='center', va='center', color=color)
    plt.title("Tic Tac Toe vs " + opponent)
    plt.show()


# make_move(board, (0,0))
# make_move(board, (0,1))
# make_move(board, (0,2))
# available_moves(board)
# print(check_win(board))
# print(check_draw(board))
# #clear_board()
# draw_board(board, "MEEEEEEEEEEE")
# # while True:
# #     input()

[[1 0 0]
 [0 0 0]
 [0 0 0]]
[[1 0 0]
 [1 0 0]
 [0 0 0]]
[[1 0 0]
 [1 0 0]
 [1 0 0]]
[[0 1]
 [0 2]
 [1 1]
 [1 2]
 [2 1]
 [2 2]]
6
[[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1]]


[(np.int64(0), np.int64(1)),
 (np.int64(0), np.int64(2)),
 (np.int64(1), np.int64(1)),
 (np.int64(1), np.int64(2)),
 (np.int64(2), np.int64(1)),
 (np.int64(2), np.int64(2))]

## Policy things

In [7]:
#Load perfect strategy
perfect_strategy = pickle.load(open("perfectPolicy.p", "rb"))
print(len(perfect_strategy))
type(perfect_strategy[(0,0,0,0,0,0,0,0,0)])

4520


numpy.ndarray

## Agent

In [None]:
class QAgent:
    def __init__(self, symbol, epsilon=0.1, gamma=0.95, alpha=0.1):
        self.symbol = symbol
        self.q_table = {}
        self.epsilon = epsilon # Chansen att agenten gör ett slumpmässigt drag 
        self.gamma = gamma # Hur mycket framtida belöningar värderas jämfört med omedelbara.
        self.alpha = alpha # Inlärningshastighet
        self.last_state = None
        self.last_action = None

    def generate_state_string(self, board):
        return str(board.reshape(9)) # gör om numpy array till en string för användan i dict
    
    def make_move_string(self, move):
        return "".join(move)

    def choose_move(self, board):
        state = self.generate_state_string(board)
        moves = available_moves(board)

        #epsilon random exploration thing
        if random.random() < self.epsilon:
            return random.choice(moves)
        
        # utvärdera vilket drag man ska gör utifrån q tabellen

        # lägger alla drag som går att göra tillsamans med q värdet i en tupel och sedan lägger tupeln i en lista
        moves_with_q_value = []
        max_q = 0.0
        for m in available_moves:
            q_val = self.q_table.get((state, self.generate_state_string(m)), 0.0)
            moves_with_q_value.append(m, q_val)
            if q_val > max_q:
                max_q = q_val
        
        #filtrerar ut dom bästa dragen baserat på q värde
        best_moves = []
        for m in moves_with_q_value:
            if m[1] == max_q:
                best_moves.append(m)
        #väljer en slumpmässig move bland de bästa
        action = random.choice(best_moves)

        
        self.last_state = state
        self.last_action = action
        return action
    def generate_next_state_guess(self, board):
        move = self.choose_move(board)
        new_board = board.copy()
        x,y = generate_cords(board)
        new_board[y][x] = self.symbol
        return new_board
        
    def update_q_table(self, reward, game_over=False):
        #uppdatera q tabelen baserat på förgående drag typ

        #Finns inga föregånde drag så tränar inget
        if self.last_state == None or self.last_action is None:
            return
        
        #hämtar bästa q värdet från nästa omgång
        max_future_q = 0.0
        if not game_over:
            #borde kolla två steg frammåt för att få bättre resultat (tror jag) kollar bara ett steg framåt nu
            next_state = self.generate_state_string(self.generate_next_state_guess(board))
            available_moves = available_moves(next_state)
            temp_max_q = 0.0
            for m in available_moves:
                q_val = self.q_table.get((next_state, self.generate_state_string(m)), 0.0)

                if q_val > temp_max_q:
                    temp_max_q = q_val

            max_future_q = temp_max_q

        current_q = self.q_table.get((self.last_state, self.last_action), 0.0)

        # bellman ekvation
        new_q = current_q + self.alpha * (reward + self.gamma * max_future_q - current_q)

        #lägger till en ny state och action
        if (self.last_state, self.last_action) not in self.q_table:
            self.q_table[(self.last_state, self.generate_state_string(self.last_action))] = 0.0
        #updaterar q värdet
        self.q_table[(self.last_state, self.generate_state_string(self.last_action))] = 0.0
        
        


## Traning

In [None]:
def train_agent(episodes=10000):
    # Agent plays as 1
    agent1 = QAgent(symbol=1, epsilon=1.0) # Start with 100% exploration

    print("Training started...")

    for i in range(episodes):

        # Reset Game
        board = np.full((3, 3), 0, dtype=int)
        game_over = False
        winner = None

        # Game Loop
        while not game_over:
            # --- AGENT TURN (Player 1) ---
            move = agent.choose_move(board)
            board[move] = 1 # Apply move

            # Check if Agent won
            if check_win(board, 1):
                agent.update_q_table(reward=10, new_board=board, game_over=True)
                wins += 1
                game_over = True
                break
            
            # Check Draw
            if check_draw(board):
                agent.update_q_table(reward=1, new_board=board, game_over=True)
                draws += 1
                game_over = True
                break
            
            # --- OPPONENT TURN (Player 2 - Random) ---
            # Ideally, the agent updates here based on the board AFTER opponent moves
            # But strictly, the agent needs to update based on the state *it* acts on next.
            # However, if the opponent wins, the agent must be punished.
            
            opp_moves = available_moves(board)
            opp_move = random.choice(opp_moves) # Random opponent
            board[opp_move] = -1 # Opponent is -1

            if check_win(board, -1):
                # Punishment! The agent's last move allowed the opponent to win
                agent.update_q_table(reward=-10, new_board=board, game_over=True)
                losses += 1
                game_over = True
                break
            
            # Check Draw again
            if check_draw(board):
                agent.update_q_table(reward=2, new_board=board, game_over=True)
                draws += 1
                game_over = True
                break
            
            # Update Q-Table for the move just made, based on the board state 
            # NOW (after opponent moved). This allows the agent to look ahead.
            agent.update_q_table(reward=0, new_board=board, game_over=False)

    print(f"Training Done. Wins: {wins}, Losses: {losses}, Draws: {draws}")
    return agent

# --- PLAY VS THE TRAINED AGENT ---

def play_vs_agent(agent):
    print("\n--- Playing vs AI ---")
    board = np.full((3, 3), 0, dtype=int)
    agent.epsilon = 0 # Turn off randomness, play pure strategy
    
    while True:
        # User Turn
        try:
            r = int(input("Row (0-2): "))
            c = int(input("Col (0-2): "))
            if board[r, c] != 0:
                print("Occupied!")
                continue
            board[r, c] = -1 # You are -1
        except:
            print("Invalid input")
            continue

        if check_win(board, -1):
            print(board)
            print("You Win!")
            break
        if check_draw(board):
            print(board)
            print("Draw!")
            break

        # AI Turn
        move = agent.choose_move(board)
        board[move] = 1
        print(f"AI chose: {move}")
        print(board)

        if check_win(board, 1):
            print("AI Wins!")
            break
        if check_draw(board):
            print("Draw!")
            break

# --- EXECUTION ---
trained_agent = train_agent(20000)
# play_vs_agent(trained_agent) # Uncomment to play

## Game Player

In [None]:
def input_processor(move):
    move = move.split(' ')
    
    x = int(move[0])
    y = int(move[1])
    return (x,y)


opponent = "Human"
choice = input("1 = Human. 2 = Random. 3 = Perfect Policy. 4 = Home Grown Policy 5 = Dont play")
run_game = True
if choice == "5":
    run_game = False
while run_game:
    
    #First Player
    player = 1
    if choice == "1":
        opponent = "Human"
        make_move(board, input_processor(input("X players turn (input like this:1 2)")))
        draw_board(board, opponent)
        if check_win(board):
            run_game = False
            print("X player win")
            clear_board()
            continue
        if check_draw(board):
            run_game = False
            print("Draw")
            clear_board()
            continue
    elif choice == "2":
        opponent = "RNGesus"
        valid_play = False
        while not valid_play:
            x = random.randint(0, 2)
            y = random.randint(0, 2)
            if board[y][x] == 0:
                valid_play = True
                make_move(board, (x,y))
                draw_board(board, opponent)
                
        if check_win(board):
            run_game = False
            print("X player win")
            clear_board()
            continue            
        if check_draw(board):                
            run_game = False
            print("Draw")
            clear_board()
            continue

    
    #Second Player
    player = 2
    make_move(board, input_processor(input("O players turn (input like this:1 2)")))
    draw_board(board, opponent)
    if check_win(board):
        run_game = False
        print("O player win")
        clear_board()
    if check_draw(board):
        run_game = False
        print("Draw")
        clear_board()
        



ValueError: invalid literal for int() with base 10: ''