In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import pad_sequences
import keras
from keras.layers import Embedding, Masking, Concatenate, GRU, Dense, Reshape
from model import CNN_with_mask

from get_training_data import get_rally_result, get_padding_data, permute_feature

In [2]:
np.random.seed(44)
tf.random.set_seed(44)

In [3]:
df = pd.read_csv('test.csv')

In [4]:
def replaced_by_mode(df, ex_feature):
    if(ex_feature == 'Original'):
        return df
    col = ['Team', 'No.', 'Space', 'Action']
    col.remove(ex_feature)
    for c in col:
        mode = df.mode(axis=0)[c][0]
        replace = {list(df.groupby(c).groups.keys())[i]: mode for i in range(len(df.groupby(c)))}
        df[c] = df[c].replace(replace)

    return df

Feature: Team, No., Space, Action

In [5]:
feature = 'Action'
# df = permute_feature(df, feature)
df = replaced_by_mode(df, feature)

In [6]:
space_replace = {list(df.groupby('Space').groups.keys())[i]: i+1 for i in range(len(df.groupby('Space')))}
action_replace = {list(df.groupby('Action').groups.keys())[i]: i+1 for i in range(len(df.groupby('Action')))}

df = pd.get_dummies(df, columns=['Team', 'No.'])
df['Space'] = df['Space'].replace(space_replace)
df['Action'] = df['Action'].replace(action_replace)

In [7]:
label, ignor = get_rally_result(df)

In [8]:
rally_set, rally_space_set, rally_action_set= get_padding_data(df, ignor)

# rally數, 最大回合數in one rally, 3, feature數
print(rally_set.shape)
print(rally_space_set.shape)
print(rally_action_set.shape)
print(label.shape)

(1291, 2, 3, 2)
(1291, 2, 3, 1)
(1291, 2, 3, 1)
(1291, 2)


In [9]:
rally_space_set = rally_space_set.squeeze()
rally_action_set = rally_action_set.squeeze()

In [10]:
rally_set_tensor = tf.convert_to_tensor(rally_set)
rally_space_set_tensor = tf.convert_to_tensor(rally_space_set)
rally_action_set_tensor = tf.convert_to_tensor(rally_action_set)
rally_result_tensor = tf.convert_to_tensor(label)

In [11]:
rally_size = rally_set.shape[1]
shot_size = 3
feature_dim = (rally_set.shape[-1], len(df.groupby('Space'))+1, len(df.groupby('Action'))+1)
space_embed_size = 8
action_embed_size = 8
shot_embed_size = 16

In [12]:
def create_model(feature_dim, space_embed_size, action_embed_size, shot_embed_size):
    '''
    framework: 
    1. 對 space, action 做 embeding, (input, output) = (feature_dim, embed_size)
    2. concat space, action, others 成一個 embedded vector for each atk, (input) =  ([feature_dim, embed_size, embed_size])
    3. 先做 embedding
    4. CNN, filters = shot_embed_size
    5. GRU
    '''
    # each input: 三個維度, rally shot feature
    input_others = keras.Input(shape=(rally_size, shot_size, feature_dim[0]))
    input_space = keras.Input(shape=(rally_size, shot_size))
    input_action = keras.Input(shape=(rally_size, shot_size))

    # space & action 先做 embedding, 再和 others concat
    embed_space_layer = Embedding(input_dim=feature_dim[1], output_dim=space_embed_size, mask_zero=True, name='Space_Embedding')
    embed_action_layer = Embedding(input_dim=feature_dim[2], output_dim=action_embed_size, mask_zero=True, name='Action_Embedding')
    masking_layer = Masking(mask_value=0)   # for input_others (還沒有經過mask)
    concat_layer = Concatenate(name='Input_Concat')

    embed_shot_layer = CNN_with_mask(kernel_size=3, filters=shot_embed_size, strides=3, name='Shot_Embedding')

    cnn_layer = CNN_with_mask(kernel_size=3, filters=shot_embed_size, strides=1, name='CNN_Layer')
    gru_layer = GRU(units=16, name='GRU_Layer')
    dense_layer = Dense(units=2, activation='softmax')

    # forward
    inputs = [input_others, input_space, input_action]

    embed_space = embed_space_layer(input_space)
    embed_action = embed_action_layer(input_action)
    masked_others = masking_layer(tf.cast(input_others, tf.float32))
    embed_input = concat_layer([masked_others, embed_space, embed_action])
    embed_shot = tf.squeeze(embed_shot_layer(embed_input), axis=2)

    cnn_output = cnn_layer(embed_shot)
    gru_output = gru_layer(cnn_output)
    output = dense_layer(gru_output)
    model = keras.Model(inputs=inputs, outputs=output, name='Classification')
    return model

In [13]:
model = create_model(feature_dim, space_embed_size, action_embed_size, shot_embed_size)
model.summary()

Model: "Classification"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 2, 3, 2)]    0                                            
__________________________________________________________________________________________________
tf.cast (TFOpLambda)            (None, 2, 3, 2)      0           input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 2, 3)]       0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 2, 3)]       0                                            
_____________________________________________________________________________________

In [14]:
def split_data(others_tensor, space_tensor, action_tensor, label_tensor):
    l = label_tensor.shape[0]
    split_persentage = int(l*0.9)

    train_space = space_tensor[:split_persentage]
    train_action = action_tensor[:split_persentage]
    train_others = others_tensor[:split_persentage]
    train_label = label[:split_persentage]

    test_space = space_tensor[split_persentage:]
    test_action = action_tensor[split_persentage:]
    test_others = others_tensor[split_persentage:]
    test_label = label[split_persentage:]

    train_x = [train_others, train_space, train_action]
    train_y = train_label

    test_x = [test_others, test_space, test_action]
    test_y = test_label

    return train_x, train_y, test_x, test_y

In [15]:
train_x, train_y, test_x, test_y = split_data(rally_set_tensor, rally_space_set_tensor, rally_action_set_tensor, rally_result_tensor)

In [16]:
optimizer = 'adam'
loss = keras.losses.CategoricalCrossentropy()
metrics = ['accuracy']
epochs = 30
callbacks = tf.keras.callbacks.EarlyStopping(min_delta=0.002, patience=15, restore_best_weights=True, monitor='val_loss')

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [18]:
History = model.fit(train_x, train_y, epochs=epochs, validation_split=0.1, callbacks=[callbacks])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [19]:
# model.save_weights('bestModel/my_model_weights.h5')

In [20]:
result = model.evaluate(test_x, test_y)



In [21]:
y_pred = model.predict(test_x)

In [22]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [23]:
argmax_y_pred = np.argmax(y_pred, axis=1)
argmax_test_y = np.argmax(test_y, axis=1)

In [24]:
from sklearn.metrics import accuracy_score, roc_auc_score
from evaluate import calculate_BS, f1, show_eval_result_2class

acc_score = accuracy_score(argmax_test_y, argmax_y_pred)
f1_score = f1(argmax_test_y, argmax_y_pred)
auc_score = roc_auc_score(test_y, y_pred)
BS = calculate_BS(test_y, y_pred, 2)

In [25]:
print('accuracy:    {:.2f}'.format(acc_score))
print('f1:          {:.2f}'.format(f1_score))
print('auc:         {:.2f}'.format(auc_score))
print('BS:          {:.2f}'.format(BS['BS'][0]))

accuracy:    0.97
f1:          0.98
auc:         0.98
BS:          0.03


In [26]:
def output_result(type):
    if(type == 'permute'):
        output_path = 'permute_feature_result.txt'
        f = open(output_path, 'a')
        f.write('Permuted feature: ' + feature + '\n')
        f.write('-'*40 + '\n')
        f.write('accuracy:    {:.2f}'.format(acc_score) + '\n')
        f.write('f1:          {:.2f}'.format(f1_score) + '\n')
        f.write('auc:         {:.2f}'.format(auc_score) + '\n')
        f.write('BS:          {:.2f}'.format(BS['BS'][0]) + '\n')
        f.write('\n' + '='*40 + '\n')
        f.close()
    elif(type == 'mode'):
        output_path = 'replaced_by_mode_result.txt'
        f = open(output_path, 'a')
        f.write('Fixed feature: ' + feature + '\n')
        f.write('-'*40 + '\n')
        f.write('accuracy:    {:.2f}'.format(acc_score) + '\n')
        f.write('f1:          {:.2f}'.format(f1_score) + '\n')
        f.write('auc:         {:.2f}'.format(auc_score) + '\n')
        f.write('BS:          {:.2f}'.format(BS['BS'][0]) + '\n')
        f.write('\n' + '='*40 + '\n')

In [27]:
# output_result('permute')
output_result('mode')