In [28]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, Flatten, Dense, Softmax, BatchNormalization, Dropout, Add
from tensorflow.keras.optimizers import Adam, SGD, RMSprop,Adadelta
from tensorflow.keras import regularizers

import numpy as np
from sklearn.model_selection import train_test_split

In [29]:
tf.__version__

'2.4.0'

In [30]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[0], 'GPU')

# Data Pre-Processing

Open **play_style_train.csv** file and split the games into a list.
Every row of csv: `PSL0000000001,1,B[pd],W[dp],B[qp],W[dc],B[nq],W[nc],B[qf],W[kd],B[ce],W[dg],B[dd],W[cc],B[fd],W[ed],B[ee],W[ec],B[ge],W[gc],B[di]`. 

Columns are:

    1. PSL0000000001: Game ID
    2. 1: Game Style
    3-... : Moves, the last move represents the play style (B[di] in this case)
    
We cropped only the moves to game list as:

In [31]:
df = open('./Training Dataset/play_style_train.csv').read().splitlines()
games = [i.split(',',2)[-1] for i in df]
game_styles = [int(i.split(',',2)[-2]) for i in df]

Create a dictionary to convert the coordinates from characters to numbers

In [32]:
chars = 'abcdefghijklmnopqrs'
coordinates = {k:v for v,k in enumerate(chars)}
coordinates

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18}

In [33]:
l = [1,2,3]

In [34]:
for i in range(1,4):
  print(i)

1
2
3


We decided to build a DCNN model in this tutorial. We create data samples by using every move in every game, meaning that the target is to predict the next move by feeding the previous state of the table in every game for every move. Therefore, we can collect much more data samples from games.

For the simplicity, we used 2 dimensional feature map to represent the data as below:
 1. Occupied areas: mark them as 1 and the empty places as 0
 2. The last move in the table: mark the position of the last move as 1 and the rest as 0
 
The target is to predict the game style (1, 2 or 3) from the state of the game table. Later this will be one-hot encoded.

In [35]:
def prepare_input(moves):
    x = np.zeros((19,19,17))
    map=[]
    for move in moves:
        color = move[0]
        column = coordinates[move[2]]
        row = coordinates[move[3]]
        map.append(row)
        map.append(column)
        x[row,column,0] = 1
        if color == 'B':
            x[row,column,4] = 1
        else:
            x[row,column,5] = 1

    for i in range(1,4):
            if i>=len(moves):
                break
            lc = coordinates[moves[-1*i][2]]
            lr = coordinates[moves[-1*i][3]]
            x[lr,lc,i] = 1

    if moves:
        last_move_column = coordinates[moves[-1][2]]
        last_move_row = coordinates[moves[-1][3]]
        x[row,column,1] = 1
        

        #最後一子的顏色
        color = moves[-1][0]
        if color == 'B':
            x[last_move_row,last_move_column,15] = 1
        else:
            x[last_move_row,last_move_column,16] = 1
        
        #最後一子的周邊分布
        for dr, dc in [(last_move_row, last_move_column+1), (last_move_row, last_move_column-1), (last_move_row+1, last_move_column), (last_move_row-1, last_move_column)]:   
            if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,0]:
                x[dr,dc,6]=1

        #倒數第二子的氣
        second_last_move_column = coordinates[moves[-1][2]]
        second_last_move_row = coordinates[moves[-1][3]]
        for dr, dc in [(second_last_move_row, second_last_move_column+1), (second_last_move_row, second_last_move_column-1), (second_last_move_row+1, second_last_move_column), (second_last_move_row-1, second_last_move_column),(second_last_move_row-1, second_last_move_column+1),(second_last_move_row-1, second_last_move_column-1),(second_last_move_row+1, second_last_move_column-1),(second_last_move_row+1, second_last_move_column+1)]:   
            if color == 'B':
                if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,5]:
                    x[dr,dc,7]=1 
            else:
                if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,4]:
                    x[dr,dc,8]=1 

        #氣
        for dr, dc in [(last_move_row, last_move_column+1), (last_move_row, last_move_column-1), (last_move_row+1, last_move_column), (last_move_row-1, last_move_column)]:
            if color == 'B':
                if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,5]:
                    x[dr,dc,9]=1 
            else:
                if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,4]:
                    x[dr,dc,10]=1

             
    for i in range(0, len(map), 2):
        # if (map[i+1] == 0 and map[i]== 0) or (map[i+1] == 18 and map[i]== 18) or (map[i+1] == 0 and map[i]== 18) or (map[i+1] == 18 and map[i]== 0):
        #     qi = 2
        # elif map[i+1] == 0 or map[i+1] == 18 or map[i] == 0 or map[i] == 18:
        #     qi = 3
        # else:
        qi = 4

        for dr, dc in [(map[i], map[i+1]+1), (map[i], map[i+1]-1), (map[i]+1, map[i+1]), (map[i]-1, map[i+1])]:   
            if 0 <= dr < 19 and 0<= dc < 19 and not x[dr,dc,0]:
                qi-=1
        if qi==1:
            x[map[i],map[i+1],11]=1
        if qi==2:
            x[map[i],map[i+1],12]=1
        if qi==3:
            x[map[i],map[i+1],13]=1
        if qi==4:
            x[map[i],map[i+1],14]=1

    return x

In [36]:
# Check how many samples can be obtained
n_games = 0
for game in games:
    n_games += 1
print(f"Total Games: {n_games}")

Total Games: 26615


Since play style training has smaller dataset comparing to kyu or dan training, we can put the complete dataset to memory. Still, it is better to create a data generator.

In [37]:
x = []
for game in games:
    moves_list = game.split(',')
    x.append(prepare_input(moves_list))
x = np.array(x)
y = np.array(game_styles)-1

In [38]:
x.shape

(26615, 19, 19, 17)

In [39]:
y.shape

(26615,)

In [40]:
np.bincount(y)

array([8184, 9403, 9028])

Target is one-hot encoded and loss is changed to `categorical_crossentropy`

In [41]:
y_hot = tf.one_hot(y, depth=3)

2023-11-27 20:19:22.060395: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-11-27 20:19:22.060512: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-27 20:19:22.060600: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 4060 Ti computeCapability: 8.9
coreClock: 2.655GHz coreCount: 34 deviceMemorySize: 15.71GiB deviceMemoryBandwidth: 268.25GiB/s
2023-11-27 20:19:22.060612: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2023-11-27 20:19:22.060622: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2023-11-27 20:19:22.060627: I tensorflow/stream_executor/plat

Dataset splitting: 90% Training, 10% validation

In [42]:
x_train, x_val, y_train, y_val = train_test_split(x, y_hot.numpy(), test_size=0.10)

# Training

### Simple DCNN Model:

In [43]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, Add, Input, Flatten, Dense,Dropout,Attention
from tensorflow.keras.layers import LSTM, TimeDistributed,Reshape
def residual_block(inputs, filters, kernel_size=3, strides=1):
    shortcut = inputs

    outputs = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(inputs)
    outputs = BatchNormalization()(outputs)
    outputs = Activation('relu')(outputs)
    outputs = Dropout(0.2)(outputs)

    outputs = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(outputs)
    outputs = BatchNormalization()(outputs)

    if shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, kernel_size=1, strides=strides, padding='same')(shortcut)
    
    outputs = Add()([outputs, shortcut])
    outputs = Activation('relu')(outputs)
    outputs = Dropout(0.2)(outputs)
    
    return outputs

def create_model():
    inputs = Input(shape=(19, 19, 17))

    outputs = tf.keras.layers.ZeroPadding2D(padding=(10, 10))(inputs)
    outputs = Conv2D(kernel_size=9, filters=32, strides=2, activation='relu')(outputs) 
    outputs = BatchNormalization()(outputs)
    outputs = tf.keras.layers.ZeroPadding2D(padding=(1, 1))(outputs)

    outputs = residual_block(outputs, filters=32)
    outputs = residual_block(outputs, filters=32)
    outputs = residual_block(outputs, filters=32)
    
    # # RNN部分
    # rnn_input = Reshape(target_shape=(17*17, 32))(outputs)  # 修改維度以符合RNN的要求
    # outputs = LSTM(units=361, return_sequences=True)(rnn_input)  # 這裡使用LSTM作為示例

    outputs = Flatten()(outputs)
    outputs = Dense(32, activation='relu')(outputs)
    outputs = BatchNormalization()(outputs)
    outputs = Dense(32, activation='relu')(outputs)
    outputs = BatchNormalization()(outputs)
    outputs = Dense(32, activation='relu')(outputs)
    outputs = BatchNormalization()(outputs)
    outputs = Dense(3, activation='softmax')(outputs)

    model = Model(inputs, outputs)
    # opt = tf.keras.optimizers.Adam(learning_rate=0.00005)
    opt = RMSprop(learning_rate=0.001, rho=0.5)
    # opt = Adadelta(learning_rate=1.0, rho=0.9, epsilon=1e-07)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [44]:
import tensorflow as tf
from tensorflow.keras.layers import LayerNormalization, MultiHeadAttention, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop

def transformer_block(inputs, num_heads=16, ff_dim=32, dropout_rate=0.1):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1])(inputs, inputs)
    attn_output = Dropout(dropout_rate)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = Dense(ff_dim, activation="relu")(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout_rate)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)
    return out2

def create_transformer_model():
    inputs = Input(shape=(19, 19, 15))

    # Modify inputs if necessary for transformer
    # Reshape or flatten inputs here as needed

    # Transformer blocks
    outputs = inputs
    for _ in range(3):  # Three transformer blocks as in the previous model
        outputs = transformer_block(outputs)

    # Flatten or reshape outputs as needed for dense layers
    outputs = Flatten()(outputs)
    outputs = Dense(32, activation='relu')(outputs)
    outputs = Dense(32, activation='relu')(outputs)
    outputs = Dense(32, activation='relu')(outputs)
    outputs = Dense(3, activation='softmax')(outputs)

    model = Model(inputs, outputs)
    opt = RMSprop(learning_rate=0.001, rho=0.5)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and compile the transformer model
transformer_model = create_transformer_model()


In [45]:
model = create_model()
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 19, 19, 17)] 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 39, 39, 17)   0           input_2[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 16, 16, 32)   44096       zero_padding2d[0][0]             
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 16, 16, 32)   128         conv2d[0][0]                     
____________________________________________________________________________________________

In [46]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint_path = "model_playstyle6.h5"
checkpoint = ModelCheckpoint(checkpoint_path, 
                             monitor='val_loss',  # 監控準確度
                             save_best_only=True,     # 只保存最佳模型
                             mode='min',              # 目標是最大化監控指標
                             verbose=1)
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 64,
    epochs = 10,
    validation_data=(x_val, y_val),
    # callbacks=[checkpoint]
)

2023-11-27 20:19:27.831138: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-11-27 20:19:27.831342: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2112000000 Hz


Epoch 1/10


2023-11-27 20:19:28.547875: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2023-11-27 20:19:28.813954: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2023-11-27 20:19:28.817441: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8


 37/375 [=>............................] - ETA: 1s - loss: 1.3277 - accuracy: 0.3624

2023-11-27 20:19:29.557893: I tensorflow/stream_executor/cuda/cuda_blas.cc:1838] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score
model1 = load_model('./model_playstyle15.h5')

In [None]:
predictions = model1.predict(x_val)

# 假設你的標籤是 one-hot 編碼的，如果是整數編碼，請使用 accuracy_score(val_labels, predictions) 進行計算
val_accuracy = accuracy_score(np.argmax(y_val, axis=1), np.argmax(predictions, axis=1))

print(f'Validation accuracy: {val_accuracy}')

In [94]:
model.save('./model_playstyle3.h5')

## ALL DONE!

For using the model and creating a submission file, follow the notebook **Create Public Upload CSV.ipynb**

# End of Tutorial

You are free to use more modern NN architectures, a better pre-processing, feature extraction methods to achieve much better accuracy!