引入模組

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import model_from_json

# tf.get_logger().setLevel('ERROR')

定義遊戲管理

In [2]:
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3))
        self.current_player = 1

    def make_move(self, row, col):
        if self.board[row, col] == 0:
            self.board[row, col] = self.current_player
            self.current_player = -self.current_player
            return True
        return False

    def check_winner(self):
        for i in range(3):
            if abs(np.sum(self.board[i, :])) == 3 : 
                return np.sum(self.board[i, :]) // 3

            if abs(np.sum(self.board[:, i])) == 3:
                return np.sum(self.board[:,i]) // 3
                
        if abs(np.sum(np.diag(self.board))) == 3 :
            return np.sum(np.diag(self.board)) // 3
    
        if abs(np.sum(np.diag(np.fliplr(self.board)))) == 3:
            return np.sum(np.diag(np.fliplr(self.board))) // 3
            
        if np.count_nonzero(self.board) == 9:
            return 0
        return None




檢查GPU

In [3]:
# 訓練模型
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs available: {gpus}")
    tf.config.experimental.set_memory_growth(gpus[0], True)
else:
    print("No GPUs found.")

GPUs available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


訓練

In [4]:
def safe_normalize(probs):
    probs = np.clip(probs, 1e-10, 1.0) 
    return probs / np.sum(probs)

定義神經網路

In [5]:
model = keras.Sequential([
    keras.layers.Dense(32, activation='relu', input_shape=(9,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(9, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0008), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [6]:

@tf.function
def predict_move(state):
    return model(state, training=False)

def train_model(num_games=20000, batch_size=32,epsilon = 1,gamma=0.85,epsilon_decay = 0.99):
    for _ in range(num_games // batch_size):
        states, actions, rewards = [], [], []
        for _ in range(batch_size):
            game = TicTacToe()
            game_states, game_actions = [], []
            while game.check_winner() is None:
                
                state = game.board.flatten()
                action_probs = predict_move(tf.convert_to_tensor([state], dtype=tf.float32))[0].numpy()
                valid_actions = [i for i in range(9) if state[i] == 0]
                valid_probs = action_probs[valid_actions]

                if np.isnan(valid_probs).any():
                    print("Warning: NaN detected in action probabilities")
                    valid_probs = np.ones_like(valid_probs) / len(valid_probs)
                else:
                    valid_probs = safe_normalize(valid_probs)

                if np.random.rand() <= (epsilon + 0.1):
                    action = np.random.choice(valid_actions)
                else:
                    action = np.random.choice(valid_actions, p=valid_probs)
                epsilon *= epsilon_decay
                    
                game_states.append(state)
                action_vector = np.zeros(9)
                action_vector[action] = 1
                game_actions.append(action_vector)

                game.make_move(action // 3, action % 3)

            winner = game.check_winner()

            game_reward = []
            
            for i in range(len(game_states)):
                player_at_that_time  = 1 if np.sum(game_states[i]) == 0 else -1
                reward = 1 if winner == player_at_that_time else (0 if winner == -player_at_that_time else 0.4)
                game_reward.append(reward * (gamma ** (1*(len(game_states) - i - 1))))
            
            states.extend(game_states)
            actions.extend(game_actions)
            rewards.extend(game_reward)

        model.fit(np.array(states), np.array(actions), sample_weight=np.array(rewards), epochs=1, verbose=1,batch_size=32)
        # print(f"Games played: {(_ + 1) * batch_size}", end='\r')

train_model(num_games=40000, batch_size=32,epsilon = 1,gamma=0.85,epsilon_decay =0.996)



保存模型

In [17]:
def save_model(n):
    model_json = model.to_json()
    open(n+".json",'w').write(model_json)
    model.save_weights(n+"_weights.h5")
# save_model("ooxx7")

引入其他模組

In [8]:
from IPython.display import clear_output
import time


導入神經網路

In [9]:
def import_model(n):
    global model
    with open(n + ".json", "r") as json_file:
        loaded_model_json = json_file.read()
    model = model_from_json(loaded_model_json)

    model.load_weights(n + "_weights.h5")

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
# import_model("ooxx7")

測試

In [10]:

@tf.function
def predict_move2(state):
    return model(state, training=False)

def get_best_move(board):
    state = board.flatten()
    action_probs = predict_move2(tf.convert_to_tensor([state], dtype=tf.float32))[0].numpy()
    valid_actions = [i for i in range(9) if state[i] == 0]
    return max(valid_actions, key=lambda x: action_probs[x])


In [16]:
def print_board(board):
    for i in range(3):
        print(" | ".join(["[X]" if x == 1 else "[O]" if x == -1 else f" {3*i+j} " for (j,x) in enumerate(board[i])] ))
        if i < 2:
            print("---------------")

    print("\n")
        

def human_move(game):
    while True:
        try:
            move = input("Enter your move (0-8): ")
            if move == "A" : 
                return get_best_move(game.board)
            if move == "Q" : 
                return None
            move = int(move)
            if 0 <= move <= 8 and game.board[move // 3, move % 3] == 0:
                return move
            else:
                print("Invalid move. Try again.")
        except ValueError:
            print("Invalid input. Please enter a number between 0 and 8.")

# 测试模型
game = TicTacToe()
clear_output(wait=True)
print_board(game.board)
F = 1 if (input("First mover ? (Y)/n") == "n") else -1
while game.check_winner() is None:
    clear_output(wait=True)
    print_board(game.board)
    
    if game.current_player == F:
        print( "AI's turn")
        time.sleep(0.5)
        move = get_best_move(game.board)

    else:
        print("Your turn (you are X)" if F == -1 else "Your turn (you are O)")
        time.sleep(0.5)
        move = human_move(game)
        
    if move is None:
        break
    
    game.make_move(move // 3, move % 3)

clear_output(wait=True)
print_board(game.board)

winner = game.check_winner()
if winner == F:
    print("AI wins!")
elif winner == -F:
    print("You win!")
else:
    print("It's a draw!")

[O] | [X] | [X]
---------------
[X] | [O] | [O]
---------------
[X] | [O] | [X]
It's a draw!
