In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import pad_sequences
import keras
from keras.layers import Embedding, Masking, Concatenate, GRU, Dense, Reshape
from model import CNN_with_mask
from get_training_data import get_rally_result, get_padding_data, permute_feature

from sklearn.metrics import accuracy_score, roc_auc_score
from evaluate import calculate_BS, f1

np.random.seed(44)
tf.random.set_seed(44)

from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [2]:
df = pd.read_csv('new_test.csv')

In [3]:
def create_model(rally_size, shot_size, feature_dim, space_embed_size, action_embed_size, shot_embed_size):
    '''
    framework: 
    1. 對 space, action 做 embeding, (input, output) = (feature_dim, embed_size)
    2. concat space, action, others 成一個 embedded vector for each atk, (input) =  ([feature_dim, embed_size, embed_size])
    3. 先做 embedding
    4. CNN, filters = shot_embed_size
    5. GRU
    '''
    # each input: 三個維度, rally shot feature
    input_others = keras.Input(shape=(rally_size, shot_size, feature_dim[0]))
    input_space = keras.Input(shape=(rally_size, shot_size))
    input_action = keras.Input(shape=(rally_size, shot_size))

    # space & action 先做 embedding, 再和 others concat
    embed_space_layer = Embedding(input_dim=feature_dim[1], output_dim=space_embed_size, mask_zero=True, name='Space_Embedding')
    embed_action_layer = Embedding(input_dim=feature_dim[2], output_dim=action_embed_size, mask_zero=True, name='Action_Embedding')
    masking_layer = Masking(mask_value=0)
    concat_layer = Concatenate(name='Input_Concat')

    embed_shot_layer = CNN_with_mask(kernel_size=3, filters=shot_embed_size, strides=3, name='Shot_Embedding')
    cnn_layer = CNN_with_mask(kernel_size=3, filters=shot_embed_size, strides=1, name='CNN_Layer')
    gru_layer = GRU(units=16, name='GRU_Layer')

    dense_layer = Dense(units=2, activation='softmax')

    # forward
    inputs = [input_others, input_space, input_action]
    embed_space = embed_space_layer(input_space)
    embed_action = embed_action_layer(input_action)
    masked_others = masking_layer(tf.cast(input_others, tf.float32))
    embed_input = concat_layer([masked_others, embed_space, embed_action])
    embed_shot = tf.squeeze(embed_shot_layer(embed_input), axis=2)
    cnn_output = cnn_layer(embed_shot)
    gru_output = gru_layer(cnn_output)
    output = dense_layer(gru_output)
    model = keras.Model(inputs=inputs, outputs=output, name='Classification')

    return model

In [4]:
def get_data(df):
    space_replace = {list(df.groupby('Space').groups.keys())[i]: i+1 for i in range(len(df.groupby('Space')))}
    action_replace = {list(df.groupby('Action').groups.keys())[i]: i+1 for i in range(len(df.groupby('Action')))}
    df = pd.get_dummies(df, columns=['Player'])
    df = df.replace(space_replace)
    df = df.replace(action_replace)
    # get label
    label, ignor = get_rally_result(df)
    # get training data
    rally_set, rally_space_set, rally_action_set = get_padding_data(df, ignor)
    rally_space_set = rally_space_set.squeeze()
    rally_action_set = rally_action_set.squeeze()

    return rally_set, rally_space_set, rally_action_set, label

def data_transfer_to_tensor(rally_set, rally_space_set, rally_action_set, label):
    rally_set_tensor = tf.convert_to_tensor(rally_set)
    rally_space_set_tensor = tf.convert_to_tensor(rally_space_set)
    rally_action_set_tensor = tf.convert_to_tensor(rally_action_set)
    rally_result_tensor = tf.convert_to_tensor(label)
    
    return rally_set_tensor, rally_space_set_tensor, rally_action_set_tensor, rally_result_tensor

def split_data(others_tensor, space_tensor, action_tensor, label_tensor):
    l = label_tensor.shape[0]
    split_persentage = int(l*0.9)

    train_space = space_tensor[:split_persentage]
    train_action = action_tensor[:split_persentage]
    train_others = others_tensor[:split_persentage]
    train_label = label_tensor[:split_persentage]

    test_space = space_tensor[split_persentage:]
    test_action = action_tensor[split_persentage:]
    test_others = others_tensor[split_persentage:]
    test_label = label_tensor[split_persentage:]

    train_x = [train_others, train_space, train_action]
    train_y = train_label

    test_x = [test_others, test_space, test_action]
    test_y = test_label

    return train_x, train_y, test_x, test_y

In [5]:
def training(model, train_x, train_y):
    optimizer = 'adam'
    loss = keras.losses.CategoricalCrossentropy()
    metrics = ['accuracy']
    epochs = 30
    callbacks = tf.keras.callbacks.EarlyStopping(min_delta=0.002, patience=15, restore_best_weights=True, monitor='val_loss')

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model.fit(train_x, train_y, epochs=epochs, validation_split=0.1, callbacks=[callbacks])

    return model

In [6]:
def output_result(type, feature_list, score):
    if(type == 'permute'):
        output_path = 'permute_feature_result.txt'
        f = open(output_path, 'a')
        f.write('Permuted feature: ' + str(feature_list) + '\n')
        f.write('-'*40 + '\n')
        f.write('accuracy:    {:.2f}'.format(score[0]) + '\n')
        f.write('f1:          {:.2f}'.format(score[1]) + '\n')
        f.write('auc:         {:.2f}'.format(score[2]) + '\n')
        f.write('BS:          {:.2f}'.format(score[3]) + '\n')
        f.write('\n' + '='*40 + '\n')
    elif(type == 'mode'):
        output_path = 'replaced_by_mode_result.txt'
        f = open(output_path, 'a')
        f.write('Fixed feature: ' + str(feature_list) + '\n')
        f.write('-'*40 + '\n')
        f.write('accuracy:    {:.2f}'.format(score[0]) + '\n')
        f.write('f1:          {:.2f}'.format(score[1]) + '\n')
        f.write('auc:         {:.2f}'.format(score[2]) + '\n')
        f.write('BS:          {:.2f}'.format(score[3]) + '\n')
        f.write('\n' + '='*40 + '\n')
    f.close()

In [7]:
def replaced_by_mode(df, ex_feature):
    if(ex_feature == 'Original'):
        return df
    col = ['Player', 'Space', 'Action']
    col.remove(ex_feature)
    for c in col:
        mode = df.mode(axis=0)[c][0]
        replace = {list(df.groupby(c).groups.keys())[i]: mode for i in range(len(df.groupby(c)))}
        df[c] = df[c].replace(replace)

    return df

In [8]:
experiment = ['Original']

############# permutation #############################
for f in experiment:
    df = permute_feature(df, f)
    #df = replaced_by_mode(df, f)
############# train for result ########################
rally_set, rally_space_set, rally_action_set, label = get_data(df)
rally_set_tensor, rally_space_set_tensor, rally_action_set_tensor, rally_result_tensor = data_transfer_to_tensor(rally_set, rally_space_set, rally_action_set, label)
train_x, train_y, test_x, test_y = split_data(rally_set_tensor, rally_space_set_tensor, rally_action_set_tensor, rally_result_tensor)
############# model setting ###########################
rally_size = rally_set.shape[1]
shot_size = 3
feature_dim = (rally_set.shape[-1], len(df.groupby('Space'))+1, len(df.groupby('Action'))+1)
space_embed_size = 8
action_embed_size = 8
shot_embed_size = 16
############# create model ############################
model = create_model(rally_size, shot_size, feature_dim, space_embed_size, action_embed_size, shot_embed_size)
model = training(model, train_x, train_y)
############# evaluate ################################
y_pred = model.predict(test_x)
argmax_y_pred = np.argmax(y_pred, axis=1)
argmax_test_y = np.argmax(test_y, axis=1)

acc_score = accuracy_score(argmax_test_y, argmax_y_pred)
f1_score = f1(argmax_test_y, argmax_y_pred)
auc_score = roc_auc_score(test_y, y_pred)
BS = calculate_BS(test_y, y_pred, 2)
score = [acc_score, f1_score, auc_score, BS['BS'][0]]
output_result('permute', experiment, score)
# output_result('mode', experiment, score)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


---
Experiment

In [9]:
def get_team_data(df, team):
    space_replace = {list(df.groupby('Space').groups.keys())[i]: i+1 for i in range(len(df.groupby('Space')))}
    action_replace = {list(df.groupby('Action').groups.keys())[i]: i+1 for i in range(len(df.groupby('Action')))}
    df = pd.get_dummies(df, columns=['Player'])
    df = df.replace(space_replace)
    df = df.replace(action_replace)
    # get label
    label, ignor = get_rally_result(df)

    rally_idx = []
    idx = 0
    for _, df_rally in df.groupby(['Game', 'Rally']):   # each rally in one game
        if((df_rally.iloc[0]['Game'], df_rally.iloc[0]['Rally']) in ignor):
            continue
        if(team in df_rally['Team'].tolist()):
            rally_idx.append(idx)
        idx += 1
    return rally_idx

In [10]:
feature_list = [['Original'], ['Player'], ['Space'], ['Action'], ['Sequence'], 
                ['Player', 'Space'], ['Player', 'Action'], ['Player', 'Sequence'], ['Space', 'Action'], ['Space', 'Sequence'], ['Action', 'Sequence'], 
                ['Player', 'Space', 'Sequence'], ['Space', 'Action', 'Sequence'], ['Player', 'Action', 'Sequence'], ['Player', 'Space', 'Action']]

In [11]:
df

Unnamed: 0,Game,Rally,Player,Team,Space,Action,Errors,Nothing,Score
0,21,25.0,USA_15,USA,Y,JS_0,,1.0,
1,21,25.0,BRA_18,BRA,7,R_0,,1.0,
2,21,25.0,BRA_1,BRA,2,G_0,,1.0,
3,21,25.0,BRA_23,BRA,2,A_0,,,1.0
4,21,25.0,USA_8,USA,8,D_0,1.0,,
...,...,...,...,...,...,...,...,...,...
7142,15,30.0,POL_20,POL,L,JS_0,,1.0,
7143,15,30.0,ITA_15,ITA,8,R_0,,1.0,
7144,15,30.0,ITA_6,ITA,2,G_0,,1.0,
7145,15,30.0,ITA_5,ITA,1,At_0,,,1.0


In [13]:
for experiment in feature_list:
    print(experiment)
    ############# permutation #############################
    for f in experiment:
        df_permutation = permute_feature(df, f)
    ############# train for result ########################
    rally_idx = get_team_data(df_permutation, 'JAP')

    rally_set, rally_space_set, rally_action_set, label = get_data(df_permutation)

    team_rally_set = rally_set[rally_idx]
    team_rally_space_set = rally_space_set[rally_idx]
    team_rally_action_set = rally_action_set[rally_idx]
    team_label = label[rally_idx]

    team_rally_set_tensor, team_rally_space_set_tensor, team_rally_action_set_tensor, team_rally_result_tensor = data_transfer_to_tensor(team_rally_set, team_rally_space_set, team_rally_action_set, team_label)

    test_x = [team_rally_set_tensor, team_rally_space_set_tensor, team_rally_action_set_tensor]
    test_y = team_rally_result_tensor

    ############# evaluate ################################
    y_pred = model.predict(test_x)
    argmax_y_pred = np.argmax(y_pred, axis=1)
    argmax_test_y = np.argmax(test_y, axis=1)

    acc_score = accuracy_score(argmax_test_y, argmax_y_pred)
    f1_score = f1(argmax_test_y, argmax_y_pred)
    auc_score = roc_auc_score(test_y, y_pred)
    BS = calculate_BS(test_y, y_pred, 2)
    score = [acc_score, f1_score, auc_score, BS['BS'][0]]
    output_result('permute', experiment, score)

['Original']
['Player']
['Space']
['Action']
['Sequence']
['Player', 'Space']
['Player', 'Action']
['Player', 'Sequence']
['Space', 'Action']
['Space', 'Sequence']
['Action', 'Sequence']
['Player', 'Space', 'Sequence']
['Space', 'Action', 'Sequence']
['Player', 'Action', 'Sequence']
['Player', 'Space', 'Action']


---
每個隊伍佔的 rally 數量

In [47]:
team_dict = {'BRA': 0, 'FRA': 0, 'IRI': 0, 'ITA': 0, 'JAP': 0, 'NED': 0, 
             'POL': 0, 'UAS':0 , 'USA': 0}

In [48]:
for _, i in df.groupby(['Game', 'Rally']):
    for t in team_dict:
        if(t in i['Team'].tolist()):
            team_dict[t] += 1

In [49]:
team_dict

{'BRA': 169,
 'FRA': 426,
 'IRI': 178,
 'ITA': 390,
 'JAP': 116,
 'NED': 167,
 'POL': 425,
 'UAS': 39,
 'USA': 417}