In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, Flatten, Dense, Softmax
from tensorflow.keras.optimizers import Adam,RMSprop
import numpy as np
from sklearn.model_selection import train_test_split

2023-11-28 21:41:38.935211: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
tf.__version__

'2.4.0'

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[1], 'GPU')

2023-11-28 21:41:39.529766: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-11-28 21:41:39.530152: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-11-28 21:41:39.532808: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-28 21:41:39.532887: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 4060 Ti computeCapability: 8.9
coreClock: 2.655GHz coreCount: 34 deviceMemorySize: 15.71GiB deviceMemoryBandwidth: 268.25GiB/s
2023-11-28 21:41:39.532907: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA 

# Data Pre-Processing

Open **dan_train.csv** file and split the games into a list.
Every row of csv: `DL0000000001,B,B[pd],W[dp],B[pp],W[dc],B[de],...`. 

Columns are:

    1. DL0000000001: Game ID
    2. B: Player's color
    3-... : Moves
    
We cropped only the moves to game list as:

In [4]:
df = open('./Training Dataset/dan_train.csv').read().splitlines()
games = [i.split(',',2)[-1] for i in df]

Create a dictionary to convert the coordinates from characters to numbers

In [5]:
chars = 'abcdefghijklmnopqrs'
coordinates = {k:v for v,k in enumerate(chars)}
chartonumbers = {k:v for k,v in enumerate(chars)}
coordinates

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18}

We decided to build a DCNN model in this tutorial. We create data samples by using every move in every game, meaning that the target is to predict the next move by feeding the previous state of the table in every game for every move. Therefore, we can collect much more data samples from games.

For the simplicity, we used 4 dimensional feature map to represent the data as below:
 1. Positions of black stones: mark them as 1 and the rest of the table as 0
 2. Positions of white stones: mark them as 1 and the rest of the table as 0
 3. Empty areas of the table: mark the empty areas as 1 and occupied areas as 0
 4. The last move in the table: mark the position of the last move as 1 and the rest as 0
 
Target value is a number between 0-361(19\*19). Later this will be one-hot encoded.

In [6]:
def prepare_input(moves):
    x = np.zeros((19,19,11))
    map = []
    # x[:,:,6] = 1
    for move in moves:
        color = move[0]
        column = coordinates[move[2]]
        row = coordinates[move[3]]
        map.append(row)
        map.append(column)
        if color == 'B':
            x[row,column,0] = 1
            x[row,column,2] = 1
            # x[:,:,6] = 0
        if color == 'W':
            x[row,column,1] = 1
            x[row,column,2] = 1
            # x[:,:,6] = 0
    if moves:
        last_move_column = coordinates[moves[-1][2]]
        last_move_row = coordinates[moves[-1][3]]
        x[row,column,3] = 1

        color = moves[-1][0]
        if color == 'B':
            x[last_move_row,last_move_column,4] = 1
        else:
            x[last_move_row,last_move_column,5] = 1

        #最後一子的周邊分布
        for dr, dc in [(last_move_row, last_move_column+1), (last_move_row, last_move_column-1), (last_move_row+1, last_move_column), (last_move_row-1, last_move_column)]:   
            if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,0]:
                x[dr,dc,10]=1
        # #氣
        # for dr, dc in [(last_move_row, last_move_column+1), (last_move_row, last_move_column-1), (last_move_row+1, last_move_column), (last_move_row-1, last_move_column)]:
        #     if color == 'B':
        #         if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,5]:
        #             x[dr,dc,7]=1 
        #     else:
        #         if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,4]:
        #             x[dr,dc,8]=1

    for i in range(0, len(map), 2):
        # if (map[i+1] == 0 and map[i]== 0) or (map[i+1] == 18 and map[i]== 18) or (map[i+1] == 0 and map[i]== 18) or (map[i+1] == 18 and map[i]== 0):
        #     qi = 2
        # elif map[i+1] == 0 or map[i+1] == 18 or map[i] == 0 or map[i] == 18:
        #     qi = 3
        # else:
        qi = 4

        for dr, dc in [(map[i], map[i+1]+1), (map[i], map[i+1]-1), (map[i]+1, map[i+1]), (map[i]-1, map[i+1])]:   
            if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,0]:
                qi-=1
        if qi==1:
            x[map[i],map[i+1],6]=1
        if qi==2:
            x[map[i],map[i+1],7]=1
        if qi==3:
            x[map[i],map[i+1],8]=1
        if qi==4:
            x[map[i],map[i+1],9]=1


    x[:,:,2] = np.where(x[:,:,2] == 0, 1, 0)
    return x

def prepare_label(move):
    column = coordinates[move[2]]
    row = coordinates[move[3]]
    return column*19+row

In [7]:
# Check how many samples can be obtained
n_games = 0
n_moves = 0
for game in games:
    n_games += 1
    moves_list = game.split(',')
    for move in moves_list:
        n_moves += 1
print(f"Total Games: {n_games}, Total Moves: {n_moves}")

Total Games: 100160, Total Moves: 22853380


The code below is run for baseline model only by using only the first 500 games from the dataset. You might need to create a data generator to use complete dataset. Otherwise your RAM might not enough to store all (If you run the code on free version of Google Colab, it will crash above 500 game samples).

In [8]:
# x = []
# y = []
# for game in games[:1]:
#     moves_list = game.split(',')
#     for count, move in enumerate(moves_list):
#         x.append(prepare_input(moves_list[:count]))
#         y.append(prepare_label(moves_list[count]))
# x = np.array(x)
# y = np.array(y)

In [9]:
# x.shape

In [10]:
# y.shape

In [11]:
# y_one_hot = tf.one_hot(y, depth=19*19)

Dataset splitting: 90% Training, 10% validation

In [12]:
# x_train, x_val, y_train, y_val = train_test_split(x, y_one_hot.numpy(), test_size=0.10)

# Training

### Simple DCNN Model:

In [13]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, Add, Input, Flatten, Dense, Softmax

# def residual_block(inputs, filters, kernel_size=3, strides=1):
#     shortcut = inputs

#     outputs = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(inputs)
#     outputs = BatchNormalization()(outputs)
#     outputs = Activation('relu')(outputs)

#     outputs = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(outputs)
#     outputs = BatchNormalization()(outputs)

#     if shortcut.shape[-1] != filters:
#         shortcut = Conv2D(filters, kernel_size=1, strides=strides, padding='same')(shortcut)
    
#     outputs = Add()([outputs, shortcut])
#     outputs = Activation('relu')(outputs)
#     return outputs

# def create_model():
#     inputs = Input(shape=(19, 19, 6))

#     outputs = Conv2D(kernel_size=7, filters=32, padding='same', activation='relu')(inputs)
#     outputs = Conv2D(kernel_size=7, filters=32, padding='same', activation='relu')(outputs)
#     outputs = Conv2D(kernel_size=5, filters=32, padding='same', activation='relu')(outputs)
#     outputs = Conv2D(kernel_size=5, filters=32, padding='same', activation='relu')(outputs)
#     outputs = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu')(outputs)
#     outputs = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu')(outputs)

#     # Adding a residual block
#     outputs = residual_block(outputs, filters=32)

#     outputs = Conv2D(kernel_size=3, filters=1, padding='same')(outputs)
#     outputs = Flatten()(outputs)
#     outputs = Softmax()(outputs)
    
#     model = Model(inputs, outputs)
    
#     opt = tf.keras.optimizers.Adam(learning_rate=0.001)
#     model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
#     return model
from tensorflow.keras.layers import Dropout

def residual_block(inputs, filters, kernel_size=3, strides=1, dropout_rate=0.2):
    shortcut = inputs

    outputs = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(inputs)
    outputs = BatchNormalization()(outputs)
    outputs = Activation('relu')(outputs)
    # outputs = Dropout(dropout_rate)(outputs)  # 添加 Dropout

    outputs = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(outputs)
    outputs = BatchNormalization()(outputs)

    if shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, kernel_size=1, strides=strides, padding='same')(shortcut)
    
    outputs = Add()([outputs, shortcut])
    outputs = Activation('relu')(outputs)
    # outputs = Dropout(dropout_rate)(outputs)  # 添加 Dropout
    return outputs

def create_model():
    inputs = Input(shape=(19, 19, 11))

    outputs = Conv2D(kernel_size=7, filters=32, padding='same', activation='relu')(inputs)
    outputs = Conv2D(kernel_size=7, filters=32, padding='same', activation='relu')(outputs)
    outputs = Conv2D(kernel_size=7, filters=32, padding='same', activation='relu')(outputs)
    outputs = Conv2D(kernel_size=5, filters=32, padding='same', activation='relu')(outputs)
    outputs = Conv2D(kernel_size=5, filters=32, padding='same', activation='relu')(outputs)
    outputs = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu')(outputs)
    outputs = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu')(outputs)
    outputs = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu')(outputs)

    # Adding multiple residual blocks with dropout
    for _ in range(3):  # 添加三个残差块
        outputs = residual_block(outputs, filters=32, dropout_rate=0.3)  # 调整 dropout 比率

    outputs = Conv2D(kernel_size=3, filters=1, padding='same')(outputs)
    outputs = Flatten()(outputs)
    outputs = Softmax()(outputs)
    
    model = Model(inputs, outputs)
    itial_learning_rate = 0.01
    decay_steps = 5000  # 每隔多少步驟調整學習率
    decay_rate = 0.9  # 學習率衰減率

    
    # # 使用 ExponentialDecay 來調整學習率
    # lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    #     initial_learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True
    # )

    # # 創建 Adam 優化器並指定學習率
    # opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    # opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    opt = RMSprop(learning_rate=0.001, rho=0.9)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


In [14]:
# model = create_model()
# model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 19, 19, 11)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 19, 19, 32)   17280       input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 19, 19, 32)   50208       conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 19, 19, 32)   50208       conv2d_1[0][0]                   
______________________________________________________________________________________________

2023-11-28 21:41:40.767577: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-11-28 21:41:40.767700: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-28 21:41:40.767794: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:05:00.0 name: NVIDIA GeForce RTX 4060 Ti computeCapability: 8.9
coreClock: 2.655GHz coreCount: 34 deviceMemorySize: 15.71GiB deviceMemoryBandwidth: 268.25GiB/s
2023-11-28 21:41:40.767808: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2023-11-28 21:41:40.767820: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2023-11-28 21:41:40.767825: I tensorflow/stream_executor/plat

In [15]:
def games_generator():
    for game in games:
        yield from process_game(game)

def process_game(game):
    moves_list = game.split(',')
    for count, move in enumerate(moves_list):
        input_data = prepare_input(moves_list[:count])
        label = prepare_label(moves_list[count])
        yield input_data, label


games_dataset = tf.data.Dataset.from_generator(
    games_generator,
    output_types=(tf.float32, tf.int32),
    output_shapes=([19, 19, 11], [])
)


# 將數據集分成訓練和驗證集
val_dataset = games_dataset.take(10000) 
train_dataset = games_dataset.skip(10000)

# 對數據集進行批處理和預處理
train_dataset = train_dataset.batch(512).map(lambda x, y: (x, tf.one_hot(y, depth=19*19)))
val_dataset = val_dataset.batch(512).map(lambda x, y: (x, tf.one_hot(y, depth=19*19)))


from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint_path = "testmodel_bestdan.h5"
checkpoint = ModelCheckpoint(checkpoint_path, 
                             monitor='val_loss',  # 監控準確度
                             save_best_only=True,     # 只保存最佳模型
                             mode='min',              # 目標是最大化監控指標
                             verbose=1)

# 訓練模型
history = model.fit(
    train_dataset,
    epochs=20,
    validation_data=val_dataset,
    steps_per_epoch=1000,
    callbacks=[checkpoint]
)

2023-11-28 20:48:23.106153: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-11-28 20:48:23.106369: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2112000000 Hz


Epoch 1/20


2023-11-28 20:48:25.071293: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8
2023-11-28 20:48:26.112094: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2023-11-28 20:48:26.113246: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11



Epoch 00001: val_loss improved from inf to 4.48819, saving model to testmodel_bestdan.h5
Epoch 2/20

Epoch 00002: val_loss improved from 4.48819 to 3.95320, saving model to testmodel_bestdan.h5
Epoch 3/20

Epoch 00003: val_loss improved from 3.95320 to 3.56984, saving model to testmodel_bestdan.h5
Epoch 4/20

Epoch 00004: val_loss improved from 3.56984 to 3.38391, saving model to testmodel_bestdan.h5
Epoch 5/20

Epoch 00005: val_loss improved from 3.38391 to 3.14696, saving model to testmodel_bestdan.h5
Epoch 6/20

Epoch 00006: val_loss improved from 3.14696 to 3.04408, saving model to testmodel_bestdan.h5
Epoch 7/20

Epoch 00007: val_loss improved from 3.04408 to 2.91914, saving model to testmodel_bestdan.h5
Epoch 8/20

Epoch 00008: val_loss improved from 2.91914 to 2.79189, saving model to testmodel_bestdan.h5
Epoch 9/20

Epoch 00009: val_loss did not improve from 2.79189
Epoch 10/20

Epoch 00010: val_loss improved from 2.79189 to 2.76889, saving model to testmodel_bestdan.h5
Epoch 

: 

In [14]:
# history = model.fit(
#     x = x_train, 
#     y = y_train,
#     batch_size = 256,
#     epochs = 25,
#     validation_data=(x_val, y_val),
# )

2023-11-21 10:26:03.647589: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 10559286100 exceeds 10% of free system memory.
2023-11-21 10:26:06.949069: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-11-21 10:26:06.949291: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2112000000 Hz


Epoch 1/25


2023-11-21 10:26:07.171023: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8
2023-11-21 10:26:09.054404: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2023-11-21 10:26:09.055742: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [37]:
model.save('./model_dan_tutorial5.h5')

## ALL DONE!

For using the model and creating a submission file, follow the notebook **Create Public Upload CSV.ipynb**

# End of Tutorial

You are free to use more modern NN architectures, a better pre-processing, feature extraction methods to achieve much better accuracy!