In [1]:
import pickle
from copy import deepcopy

In [2]:
class TicTacToe:
    def __init__(self):
        self.board = [0] * 9
        self.game_over = False
        self.winner = None

    def move(self, position, player):
        if self.board[position] == 0 and not self.game_over:
            self.board[position] = player
            self.check_game_over(player)

    def check_game_over(self, player):
        winning_positions = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  
            [0, 4, 8], [2, 4, 6]            
        ]
        for positions in winning_positions:
            if all(self.board[pos] == player for pos in positions):
                self.game_over = True
                self.winner = player
                return
        if 0 not in self.board:
            self.game_over = True 

    def reset(self):
        self.board = [0] * 9
        self.game_over = False
        self.winner = None


In [3]:
def features(state):
    winning_combinations = [
        [0, 1, 2], [3, 4, 5], [6, 7, 8],  
        [0, 3, 6], [1, 4, 7], [2, 5, 8],  
        [0, 4, 8], [2, 4, 6]             
    ]
    x0 = 1
    x1 = x2 = x3 = x4 = x5 = x6 = 0

    x3 = 1 if state[4] == 1 else 0

    for corner in [0, 2, 6, 8]:
        if state[corner] == 1:
            x4 += 1

    for combo in winning_combinations:
        pieces = [state[i] for i in combo]
        if pieces.count(1) == 2 and pieces.count(0) == 1:
            x1 += 1
        elif pieces.count(-1) == 2 and pieces.count(0) == 1:
            x2 += 1
        elif pieces.count(1) == 1 and pieces.count(0) == 2:
            x5 += 1
        elif pieces.count(1) == 3:
            x6 += 1

    return [x0, x1, x2, x3, x4, x5, x6]

In [4]:
# weights = [0.0] * 7  

In [5]:
def train(game, weights, alpha=0.05, epochs=1000, log_interval=100):
    outcomes = {'win': 0, 'loss': 0, 'draw': 0}

    for epoch in range(1, epochs + 1):
        game.reset()
        history = []
        player = 1 
        while not game.game_over:
            best_move = None
            best_value = float('-inf')
            for move in range(9):
                if game.board[move] == 0: 
                    game_copy = deepcopy(game)
                    game_copy.move(move, player)
                    future_value = sum(f * w for f, w in zip(features(game_copy.board), weights))
                    if future_value > best_value:
                        best_value = future_value
                        best_move = move
            game.move(best_move, player)
            history.append((game.board[:], player))
            player *= -1 

        final_reward = 0
        if game.winner is not None:
            final_reward = 10 if game.winner == 1 else -10
        
        for state, player in reversed(history):
            target_value = final_reward * player 
            current_features = features(state)
            predicted_value = sum(f * w for f, w in zip(current_features, weights))
            error = target_value - predicted_value
            for i in range(len(weights)):
                weights[i] += alpha * error * current_features[i]

        if game.winner == 1:
            outcomes['win'] += 1
        elif game.winner == -1:
            outcomes['loss'] += 1
        else:
            outcomes['draw'] += 1

        if epoch % log_interval == 0:
            print(f"Epochs {epoch - log_interval + 1}-{epoch}: Wins: {outcomes['win']}, Losses: {outcomes['loss']}, Draws: {outcomes['draw']}")
            outcomes = {'win': 0, 'loss': 0, 'draw': 0} 


    print(f"Trained weights after {epochs} epochs: {weights}")
    return weights

In [6]:
def value(state, weights):
    return sum(f*w for f, w in zip(features(state), weights))

In [7]:
def play_with_agent(game, weights):
    while not game.game_over:
        player_move = int(input("Your move (0-8): "))
        game.move(player_move, -1)
        if game.game_over:
            break
    
        best_move = None
        best_value = -float('inf')
        for move in range(9):
            if game.board[move] == 0:
                game_copy = deepcopy(game)
                game_copy.move(move, 1)
                move_value = value(game_copy.board, weights)
                if move_value > best_value:
                    best_value = move_value
                    best_move = move
        print(f"Agent moves at {best_move}")
        game.move(best_move, 1)

In [None]:
game = TicTacToe()
weights = [0] * 7

train(game, weights, alpha=0.004, epochs=600, log_interval=100)
with open('tic_tac_toe_weights_hardddfff.pkl', 'wb') as f:
    pickle.dump(weights, f)

print("Starting a new game against the trained agent. You are 'O' (-1), and the agent is 'X' (1).")

def print_board(state):
    symbols = {0: " ", 1: "X", -1: "O"}
    print("\nBoard:")
    for i in range(3):
        print("|".join(symbols[state[j]] for j in range(i*3, i*3+3)))
        if i < 2:
            print("-----")

game.reset()

while not game.game_over:
    print_board(game.board)
    player_move = int(input("Your move (0-8): "))
    game.move(player_move, -1)
    if game.game_over:
        break

    best_move = None
    best_value = -float('inf')
    for move in range(9):
        if game.board[move] == 0:
            game_copy = deepcopy(game)
            game_copy.move(move, 1)
            move_value = value(game_copy.board, weights)
            if move_value > best_value:
                best_value = move_value
                best_move = move
    print(f"Agent moves at position {best_move}")
    game.move(best_move, 1)

print_board(game.board)
if game.winner is None:
    print("The game is a draw.")
elif game.winner == 1:
    print("The agent wins!")
else:
    print("Congratulations, you win!")

 # [-3.8856471521263196, -0.5298458867275254, -1.5981292622877645, 0.5356547089956046, 1.3178908829676994, 0.706282542372419, 8.849588645016835]

Epochs 1-100: Wins: 78, Losses: 1, Draws: 21
Epochs 101-200: Wins: 0, Losses: 0, Draws: 100
Epochs 201-300: Wins: 0, Losses: 0, Draws: 100
Epochs 301-400: Wins: 0, Losses: 0, Draws: 100
Epochs 401-500: Wins: 0, Losses: 0, Draws: 100
Epochs 501-600: Wins: 0, Losses: 0, Draws: 100
Trained weights after 600 epochs: [-0.5903135097825771, -0.21895397111207904, -0.7959249070777004, 0.3710628864072751, 0.1045365055835832, 0.04267665486128699, 2.132727052492692]
Starting a new game against the trained agent. You are 'O' (-1), and the agent is 'X' (1).

Board:
 | | 
-----
 | | 
-----
 | | 
