In [171]:
import tensorflow as tf
import numpy as np
import pandas as pd
from keras import layers
import keras
from model import CNN_with_mask

In [172]:
def training(input_dim, seq_len, embed_space_size, embed_action_size):
    input_space = keras.Input(shape=(seq_len, input_dim[0]), name='Space_input')
    input_action = keras.Input(shape=(seq_len, input_dim[1]), name='Action_input')
    input_others = keras.Input(shape=(seq_len, input_dim[0], input_dim[2]), name='Other_input')

    space_embedding = layers.Embedding(input_dim=embed_space_size, output_dim=4, mask_zero=True, name='Space_embedding')
    action_embeding = layers.Embedding(input_dim=embed_action_size, output_dim=4, mask_zero=True, name='Action_embedding')

    masking_layer = layers.Masking(mask_value=0)
    
    input_concat_embedding = layers.Concatenate(name='Input_merging')
    
    layer_cnn = CNN_with_mask(filters=4+4+input_dim[2], kernel_size=3, name='cnn')
    layer_dense = layers.Dense(units=3, activation='softmax')

    '''define forward'''
    inputs = [input_space, input_action, input_others]
    embeded_space = space_embedding(input_space)
    embeded_action = action_embeding(input_action)

    unmasked_others = tf.cast(input_others, tf.float32)
    masked_others = masking_layer(unmasked_others)

    embeded_input = input_concat_embedding([embeded_space, embeded_action, masked_others])

    cnn = layer_cnn(embeded_input)
    output_prob = layer_dense(cnn)

    model = keras.Model(inputs=inputs, outputs=output_prob, name='classifier')

    return model

### Prepare data
* input_array1 = tf.convert_to_tensor([[[1], [2], [1], [0]], [[1], [0], [0], [2]]])
* input_array2 = tf.convert_to_tensor([[[1], [2], [2], [0]], [[1], [0], [0], [2]]])
* input_array3 = tf.convert_to_tensor([[[[1, 1]], [[2, 2]], [[1, 0]], [[0, 0]]], [[[1, 1]], [[0, 0]], [[0, 0]], [[2, 2]]]])

In [173]:
def split_data(space_tensor, action_tensor, others_tensor, label_tensor):
    l = label_tensor.shape[0]
    split_persentage = int(l*0.7)

    train_space = space_tensor[:split_persentage]
    train_action = action_tensor[:split_persentage]
    train_others = others_tensor[:split_persentage]
    train_label = label_tensor[:split_persentage]

    test_space = space_tensor[split_persentage:]
    test_action = action_tensor[split_persentage:]
    test_others = others_tensor[split_persentage:]
    test_label = label_tensor[split_persentage:]

    train_x = [train_space, train_action, train_others]
    train_y = train_label

    test_x = [test_space, test_action, test_others]
    test_y = test_label

    return train_x, train_y, test_x, test_y

In [174]:
def prepare_data(df):
    input = df[['Team', 'No', 'Space', 'Action']].copy()
    label = df[['Erros', 'Score']].copy().astype('float')

    

    space = input[['Space']].astype('float')
    action = input[['Action']].astype('float')
    others = input[['Team', 'No']].copy()

    others = others.astype({'No': 'str'})
    others_dummy = pd.get_dummies(others).astype('float')

    label.loc[:, 'Nothing'] = 0.0
    label.loc[(label['Erros'] == 0) & (label['Score'] == 0), 'Nothing'] = 1.0

    space_tensor = tf.convert_to_tensor(space)
    action_tensor = tf.convert_to_tensor(action)

    others_tensor = tf.convert_to_tensor(others_dummy)
    label_tensor = tf.convert_to_tensor(label)
    
    train_x, train_y, test_x, test_y = split_data(space_tensor, action_tensor, others_tensor, label_tensor)

    return train_x, train_y, test_x, test_y

In [175]:
df = pd.read_csv('df.csv')

In [176]:
df = df.astype({'No': 'str'})
df_dummy = pd.get_dummies(df[['Team', 'No']])
df = pd.concat([df.drop(columns=['No']), df_dummy], axis=1)
df.head()

Unnamed: 0,Session,Team,Space,Action,Erros,Attempts,Score,Team_FRA,Team_IRI,Team_ITA,...,No_28,No_3,No_38,No_4,No_49,No_5,No_6,No_7,No_8,No_9
0,1,IRI,11,6,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,IRI,11,6,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,1,IRI,8,8,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
3,1,IRI,2,1,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
4,1,IRI,3,2,1.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [177]:
df

Unnamed: 0,Session,Team,Space,Action,Erros,Attempts,Score,Team_FRA,Team_IRI,Team_ITA,...,No_28,No_3,No_38,No_4,No_49,No_5,No_6,No_7,No_8,No_9
0,1,IRI,11,6,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,IRI,11,6,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,1,IRI,8,8,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
3,1,IRI,2,1,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
4,1,IRI,3,2,1.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5517,8,USA,2,5,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5518,8,USA,2,1,0.0,0.0,1.0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
5519,8,USA,12,6,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5520,8,USA,8,3,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [178]:
df_c = df.columns
others_col_name = [i for i in df_c if('Team' in i or 'No' in i)]
others_col_name.remove('Team')
len(others_col_name)

36

In [179]:
total_space = []
total_action = []
total_others = []
total_label = []

for d in df.groupby(['Session', 'Team']):
    # print(d[1])
    input = d[1][['Space', 'Action']+others_col_name].copy()
    label = d[1][['Erros', 'Score']].copy().astype('float')

    space = input[['Space']].astype('float')
    action = input[['Action']].astype('float')
    others = input[others_col_name].copy()

    label.loc[:, 'Nothing'] = 0.0
    label.loc[(label['Erros'] == 0) & (label['Score'] == 0), 'Nothing'] = 1.0

    total_space.append(space)
    total_action.append(action)
    total_others.append(others)
    total_label.append(label)

padded_space = keras.preprocessing.sequence.pad_sequences(total_space, padding="post")
padded_action = keras.preprocessing.sequence.pad_sequences(total_action, padding="post")
padded_others = keras.preprocessing.sequence.pad_sequences(total_others, padding="post")
padded_label = keras.preprocessing.sequence.pad_sequences(total_label, padding="post")

space_tensor = tf.convert_to_tensor(padded_space)
action_tensor = tf.convert_to_tensor(padded_action)
others_tensor = tf.convert_to_tensor(padded_others)
label_tensor = tf.convert_to_tensor(padded_label)  

In [180]:
others_tensor = tf.reshape(others_tensor, (15, 486, 1, 36))
label_tensor = tf.reshape(label_tensor, (15, 486, 1, 3))

In [181]:
print(space_tensor.shape)
print(action_tensor.shape)
print(others_tensor.shape)
print(label_tensor.shape)

(15, 486, 1)
(15, 486, 1)
(15, 486, 1, 36)
(15, 486, 1, 3)


In [182]:
embed_space_size = 15
embed_action_size = 9
input_dim = (1, 1, 36) # train_x: space, action, others
seq_len = 486 # 最長的sequence -> 其他都要 padding 到這個長度

model = training(input_dim, seq_len, embed_space_size, embed_action_size)
model.summary()

Model: "classifier"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Other_input (InputLayer)        [(None, 486, 1, 36)] 0                                            
__________________________________________________________________________________________________
Space_input (InputLayer)        [(None, 486, 1)]     0                                            
__________________________________________________________________________________________________
Action_input (InputLayer)       [(None, 486, 1)]     0                                            
__________________________________________________________________________________________________
tf.cast_2 (TFOpLambda)          (None, 486, 1, 36)   0           Other_input[0][0]                
_________________________________________________________________________________________

In [183]:
# regularizer = tf.keras.regularizers.l2(0.01)
optimizer = 'adam'
loss = keras.losses.CategoricalCrossentropy()
metrics = ['AUC', 'accuracy']
epochs = 20
callbacks = tf.keras.callbacks.EarlyStopping(min_delta=0.002, patience=15, restore_best_weights=True)

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [184]:
train_x, train_y, test_x, test_y = split_data(space_tensor, action_tensor, others_tensor, label_tensor)

In [185]:
pred = model.fit(train_x, train_y, epochs=epochs)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [188]:
result = model.evaluate(test_x, test_y)

