In [387]:
import tensorflow as tf
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import pad_sequences
import keras
from keras.layers import Embedding, Masking, Concatenate, GRU, Dense, Reshape
from model import CNN_with_mask

In [388]:
def get_padding_data(df):
    '''
    one df represents one game
    split: space, action, player(?), error&score
    label: 發球方是否得分？
    '''
    col = df.columns
    space_col = [c for c in df.columns if 'Space' in c]
    action_col = [c for c in df.columns if 'Action' in c]
    result_col = ['Errors', 'Score', 'Nothing']
    others_col = [c for c in df.columns if c not in space_col and c not in action_col and c not in result_col and c != 'Game' and c != 'Rally']

    team_col = [c for c in df.columns if 'Team' in c]

    rally_set = []
    rally_space_set = []
    rally_action_set = []
    rally_result_set = []
    for _, df_rally in df.groupby(['Game', 'Rally']):   # each rally in one game
        # print(df_rally)
        curr_team = df.iloc[df_rally.index[0]][team_col].tolist()
        shot_set = []
        shot_space_set = []
        shot_action_set = []
        shot_result_set = []
        
        atk_sequence = []
        atk_space_sequence = []
        atk_action_sequence = []
        atk_result = []
        
        for _, shot in df_rally.iterrows():
            if(shot[team_col].tolist() != curr_team):
                shot_set.append(atk_sequence)
                shot_space_set.append(atk_space_sequence)
                shot_action_set.append(atk_action_sequence)
                shot_result_set.append(atk_result)   # 最後一動的結果 -> predict object
                
                curr_team = shot[team_col].tolist()

                atk_sequence = []
                atk_space_sequence = []
                atk_action_sequence = []

            atk_space_sequence.append(shot[space_col])
            atk_action_sequence.append(shot[action_col])
            atk_sequence.append(shot[others_col])
            atk_result = shot[result_col]
        
        # the last shot
        shot_set.append(atk_sequence)
        shot_space_set.append(atk_space_sequence)
        shot_action_set.append(atk_action_sequence)
        shot_result_set.append(atk_result)

        # one rally has been finished
        shot_set = pad_sequences(shot_set, maxlen=3, padding='post')
        shot_space_set = pad_sequences(shot_space_set, maxlen=3, padding='post')
        shot_action_set = pad_sequences(shot_action_set, maxlen=3, padding='post')

        # one rally has been finished
        rally_set.append(shot_set)
        rally_space_set.append(shot_space_set)
        rally_action_set.append(shot_action_set)
        rally_result_set.append(shot_result_set)

    padded_rally_set = pad_sequences(rally_set, dtype=float, padding='post')
    padded_rally_space_set = pad_sequences(rally_space_set, dtype=float, padding='post')
    padded_rally_action_set = pad_sequences(rally_action_set, dtype=float, padding='post')
    padded_rally_result_set = pad_sequences(rally_result_set, dtype=float, padding='post')
    
    return padded_rally_set, padded_rally_space_set, padded_rally_action_set, padded_rally_result_set


In [389]:
df = pd.read_csv('test.csv')

space_replace = {list(df.groupby('Space').groups.keys())[i]: i+1 for i in range(len(df.groupby('Space')))}
action_replace = {list(df.groupby('Action').groups.keys())[i]: i+1 for i in range(len(df.groupby('Action')))}

df = pd.get_dummies(df, columns=['Team', 'No.'])
df = df.replace(space_replace)
df = df.replace(action_replace)

In [390]:
rally_set, rally_space_set, rally_action_set, rally_result_set = get_padding_data(df)

# rally數, 最大回合數in one rally, 3, feature數
print(rally_set.shape)
print(rally_space_set.shape)
print(rally_action_set.shape)
print(rally_result_set.shape)

(1292, 14, 3, 36)
(1292, 14, 3, 1)
(1292, 14, 3, 1)
(1292, 14, 3)


In [391]:
rally_space_set = rally_space_set.squeeze()
rally_action_set = rally_action_set.squeeze()

In [392]:
rally_set_tensor = tf.convert_to_tensor(rally_set)
rally_space_set_tensor = tf.convert_to_tensor(rally_space_set)
rally_action_set_tensor = tf.convert_to_tensor(rally_action_set)
rally_result_set_tensor = tf.convert_to_tensor(rally_result_set)

rally_result_set_tensor = tf.where(tf.math.is_nan(rally_result_set_tensor), 0.0, rally_result_set_tensor)

---

In [393]:
# rally_num = rally_set.shape[0]
rally_size = rally_set.shape[1]
shot_size = 3
feature_dim = (rally_set.shape[-1], len(df.groupby('Space'))+1, len(df.groupby('Action'))+1)
space_embed_size = 8
action_embed_size = 8
shot_embed_size = 16

In [394]:
def create_model(feature_dim, space_embed_size, action_embed_size, shot_embed_size):
    '''
    framework: 
    1. 對 space, action 做 embeding, (input, output) = (feature_dim, embed_size)
    2. concat space, action, others 成一個 embedded vector for each atk, (input) =  ([feature_dim, embed_size, embed_size])
    3. 先做 embedding
    4. CNN, filters = shot_embed_size
    5. GRU
    '''
    # each input: 三個維度, rally shot feature
    input_others = keras.Input(shape=(rally_size, shot_size, feature_dim[0]))
    input_space = keras.Input(shape=(rally_size, shot_size))
    input_action = keras.Input(shape=(rally_size, shot_size))

    # space & action 先做 embedding, 再和 others concat
    embed_space_layer = Embedding(input_dim=feature_dim[1], output_dim=space_embed_size, mask_zero=True, name='Space_Embedding')
    embed_action_layer = Embedding(input_dim=feature_dim[2], output_dim=action_embed_size, mask_zero=True, name='Action_Embedding')
    masking_layer = Masking(mask_value=0)   # for input_others (還沒有經過mask)
    concat_layer = Concatenate(name='Input_Concat')

    embed_shot_layer = CNN_with_mask(kernel_size=3, filters=shot_embed_size, strides=3, name='Shot_Embedding')

    cnn_layer = CNN_with_mask(kernel_size=3, filters=shot_embed_size, strides=1, name='CNN_Layer')
    gru_layer = GRU(units=16, return_sequences=True, name='GRU_Layer')
    dense_layer = Dense(units=3, activation='softmax')
    reshape_layer = Reshape((-1, 1, 3))

    # forward
    inputs = [input_others, input_space, input_action]

    embed_space = embed_space_layer(input_space)
    embed_action = embed_action_layer(input_action)
    masked_others = masking_layer(tf.cast(input_others, tf.float32))
    embed_input = concat_layer([masked_others, embed_space, embed_action])
    embed_shot = tf.squeeze(embed_shot_layer(embed_input), axis=2)

    cnn_output = cnn_layer(embed_shot)
    gru_output = gru_layer(cnn_output)
    output = dense_layer(gru_output)
    model = keras.Model(inputs=inputs, outputs=output, name='Classification')
    return model

In [395]:
model = create_model(feature_dim, space_embed_size, action_embed_size, shot_embed_size)
model.summary()

Model: "Classification"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_61 (InputLayer)           [(None, 14, 3, 36)]  0                                            
__________________________________________________________________________________________________
tf.cast_17 (TFOpLambda)         (None, 14, 3, 36)    0           input_61[0][0]                   
__________________________________________________________________________________________________
input_62 (InputLayer)           [(None, 14, 3)]      0                                            
__________________________________________________________________________________________________
input_63 (InputLayer)           [(None, 14, 3)]      0                                            
_____________________________________________________________________________________

In [396]:
# print(rally_set_tensor.shape)
# print(rally_space_set_tensor.shape)
# print(rally_action_set_tensor.shape)
# print(rally_result_set_tensor.shape)

In [397]:
# model.call([rally_set_tensor, rally_space_set_tensor, rally_action_set_tensor])

In [398]:
def split_data(others_tensor, space_tensor, action_tensor, label_tensor):
    l = label_tensor.shape[0]
    split_persentage = int(l*0.7)

    train_space = space_tensor[:split_persentage]
    train_action = action_tensor[:split_persentage]
    train_others = others_tensor[:split_persentage]
    train_label = label_tensor[:split_persentage]

    test_space = space_tensor[split_persentage:]
    test_action = action_tensor[split_persentage:]
    test_others = others_tensor[split_persentage:]
    test_label = label_tensor[split_persentage:]

    train_x = [train_others, train_space, train_action]
    train_y = train_label

    test_x = [test_others, test_space, test_action]
    test_y = test_label

    return train_x, train_y, test_x, test_y

In [399]:
train_x, train_y, test_x, test_y = split_data(rally_set_tensor, rally_space_set_tensor, rally_action_set_tensor, rally_result_set_tensor)

In [400]:
from tensorflow.keras import backend as K

def f1(y_true, y_pred):    
    def recall_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        
        recall = TP / (Positives+K.epsilon())    
        return recall 
    
    
    def precision_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    
        precision = TP / (Pred_Positives+K.epsilon())
        return precision 
    
    precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)
    
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [401]:
# regularizer = tf.keras.regularizers.l2(0.01)
optimizer = 'adam'
loss = keras.losses.CategoricalCrossentropy()
metrics = [f1, 'accuracy']
epochs = 50
callbacks = tf.keras.callbacks.EarlyStopping(min_delta=0.002, patience=15, restore_best_weights=True)

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [402]:
model.fit(train_x, train_y, epochs=epochs)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2047d5cc588>

In [403]:
result = model.evaluate(test_x, test_y)



In [404]:
y_pred = model.predict(test_x)

In [405]:
# from tensorflow.python.ops.numpy_ops import np_config
# np_config.enable_numpy_behavior()

In [406]:
test_y.shape

TensorShape([388, 14, 3])

In [407]:
test_y_reshape = test_y.reshape(test_y.shape[0]*test_y.shape[1], 3)
y_pred_reshape = y_pred.reshape(y_pred.shape[0]*y_pred.shape[1], 3)

idx = [i for i in range(len(test_y_reshape)) if(any(test_y_reshape[i] == np.array([1, 1, 1])))]

In [408]:
y = np.argmax(y_pred_reshape[idx], axis=1)
reshape_y = np.argmax(test_y_reshape[idx], axis=1)

In [410]:
len(reshape_y)

1172

In [411]:
from sklearn.metrics import accuracy_score

accuracy_score(reshape_y, y)

0.8805460750853242

In [412]:
f1(reshape_y, y)

<tf.Tensor: shape=(), dtype=float64, numpy=0.9708737362951817>