## Requirement

In [3]:
import numpy as np
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, LSTM, Flatten, Dropout, Activation
from tensorflow.keras import layers


## Data loading

In [None]:
file_match_1 = open('/content/match_1.json')
data = json.load(file_match_1)
df_match_1=pd.DataFrame(data)
df_match_1

In [5]:
file_match_2 = open('/content/match_2.json')
data_2 = json.load(file_match_2)
df_match_2=pd.DataFrame(data_2)
df_match_2

Unnamed: 0,label,norm
0,no action,"[104.42423889328715, 87.63989343949717, 128.52..."
1,run,"[108.03112639951678, 92.57566083451651, 81.303..."
2,pass,"[17.837163554142162, 39.435455203974186, 51.64..."
3,rest,"[21.91843920469922, 18.11914155706377, 21.7137..."
4,walk,"[20.86063916182152, 17.8097748446304, 14.35537..."
...,...,...
608,walk,"[23.337305769963503, 20.210987911153104, 25.06..."
609,walk,"[22.470322813933603, 22.552427730975246, 23.84..."
610,walk,"[43.833612705797144, 46.18045998580312, 37.492..."
611,walk,"[30.927599255773355, 31.26358258808756, 28.286..."


In [6]:
df_match=pd.concat([df_match_1, df_match_2])
#df_match=df_match_2

In [7]:
df_match =df_match.drop(df_match[df_match['label'] == 'no action'].index)

In [8]:
actions=list(df_match['label'])

In [9]:
len(actions)

1183

In [None]:
actions

In [11]:
df_match['label'].value_counts()

run        551
walk       434
dribble     78
rest        35
pass        32
tackle      31
shot        18
cross        4
Name: label, dtype: int64

## playstyle training dataset

In [12]:
import random

def create_attacking_sequences(actions, attacking_actions, attacking_threshold=0.3, min_sequence_length=5):
    sequences = []
    current_sequence = []
    attacking_actions_count = 0

    for action in actions:
        current_sequence.append(action)

        if action in attacking_actions:
            attacking_actions_count += 1

        if  attacking_actions_count / len(current_sequence) >= attacking_threshold and  len(current_sequence) >= min_sequence_length:
        #if  len(current_sequence) == min_sequence_length:
            #if len(current_sequence) >= min_sequence_length:
                sequences.append(current_sequence.copy())
                current_sequence = []
                attacking_actions_count = 0

    # If there are remaining actions, add them to the last sequence
    if current_sequence and len(current_sequence) >= min_sequence_length:
        sequences.append(current_sequence)

    return sequences



#attacking_actions = ["run", "walk"]
attacking_actions = ["shot", "sprint","pass"]
attacking_sequences = create_attacking_sequences(actions, attacking_actions, attacking_threshold=0.5, min_sequence_length=5)
print(attacking_sequences)


[['walk', 'walk', 'walk', 'rest', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'tackle', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'run', 'run', 'run', 'run', 'run', 'walk', 'run', 'run', 'tackle', 'dribble', 'run', 'dribble', 'pass', 'pass', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'walk', 'run', 'walk', 'walk', 'run', 'run', 'rest', 'walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'rest', 'walk', 'run', 'run', 'run', 'run', 'dribble', 'run', 'pass', 'run', 'run', 'run', 'run', 'walk', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'run', 'run', 'tackle', 'tackle', 'run', 'run', 'run', 'walk', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'wal

In [13]:
len(attacking_sequences)

1

## one hot encoding

In [14]:
X_list = []
set_action=['walk','run','dribble','rest','pass','tackle','shot','cross']
n_x=len(set_action)
for element in attacking_sequences:
    #note_l = [note.pitch for note in midi_data.instruments[0].notes]
    # convert to one-hot-encoding
    T_x = len(element)

    X_ohe = np.zeros((T_x, n_x))
    for t in range(T_x):

        X_ohe[t, set_action.index(element[t])] = 1
    # add to the list
    X_list.append(X_ohe)

In [15]:
X_list[0].shape

(1183, 8)

In [16]:
X_train_list = []
y_train_list = []
sequence_length=4
for example in X_list:
      #print(example.shape)
      for i in range(example.shape[0] - sequence_length):
        #print(example[idx:idx+sequence_length].shape)
        X_train_list.append(example[i:i+sequence_length])
        y_train_list.append(example[i+sequence_length])

In [17]:
X_train = np.asarray(X_train_list)
y_train = np.asarray(y_train_list)

In [18]:
print("X_train.shape:", X_train.shape)
print("y_train.shape:", y_train.shape)

X_train.shape: (1179, 4, 8)
y_train.shape: (1179, 8)


## Model structure

Model structure

In [19]:
model  = Sequential()
model.add(LSTM(256, input_shape=(sequence_length, n_x), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(256))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_x, activation='softmax'))

In [20]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=60, batch_size=64)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.src.callbacks.History at 0x79889c376ce0>

## Generating a new sequence from sampling the language model





 To generate a new sequence from the language model, we simply give it as input a random sequence of duration sequence_length and ask the trained network to predict the output (using model.predict).

The output of the network is a vector of probability of dimension  nx  which represents the probability of each action to be the next action  given as input.

From this vector, we select the action which has the maximum probability.

We then concatenate this new action (its one-hot-encoding representation) at the end of the input sequence. We finally remove the first element of the input sequence to keep its duration constant (sequence_length).

In [21]:
#action_test=['run','dribble','dribble','pass','dribble','run','run','pass','walk','shot']#seed

set_action=['walk','run','dribble','rest','pass','tackle','shot','cross']
action_test=['run','shot','shot','cross']
#action_test=['shot']#seed
pattern=np.zeros((4, 8))
for t in range(4):

        pattern[t, set_action.index(action_test[t])] = 1
pattern#

array([[0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]])

In [22]:
previous_action=pattern[-1]
previous_action

array([0., 0., 0., 0., 0., 0., 0., 1.])

In [23]:
action_index = np.argmax(previous_action)

corresponding_action = set_action[action_index]
corresponding_action

'cross'

In [24]:
#start = np.random.randint(0, len(X_train_list)-1) # seed
#pattern = X_train_list[start]

In [25]:
#pattern

In [26]:
action_list = []
# --- prediction_l: is the list of output-vectors (float numbers \in [0,1]) of the network over time
prediction_l = []
dict_counter={'walk':0,'run':0,'dribble':0,'rest':0,'pass':0,'tackle':0,'shot':0,'cross':0}
set_action=['walk','run','dribble','rest','pass','tackle','shot','cross']

for note_index in range(200):
        penality=-0.4#normal game
        penalty_array=[-0.6,-0.6,0,-0.4,0,0,+0.2,0]#aray bonus question
        column_sums = np.sum(pattern, axis=0)
        print("number",np.multiply(penalty_array, column_sums))
        #prediction=penality*column_sums
        #print('first',prediction)
        prediction = model.predict(np.expand_dims(pattern, 0))
        #prediction[0]+= np.multiply(penalty_array, column_sums)
        prediction[0]+=penality*column_sums
        #print('nombre',penality*dict_counter[set_action[label]] )
        #prediction[0][label]+=penality*dict_counter[set_action[label]]
        label = np.argmax(prediction)#retourne l'indice de la valeur maximale

        #dict_counter[set_action[label]]+=1
        #print(dict_counter)

        print(label)
        prediction_l.append(prediction)
        print(prediction)
        pattern = np.append(pattern[1:],
                            np.expand_dims(np.eye(n_x)[label], 0),
                            axis=0)
        action_list.append(set_action[label])
print(action_list)

number [-0.  -0.6  0.  -0.   0.   0.   0.4  0. ]
0
[[ 0.77880085 -0.22088069  0.01528998  0.01399742  0.00236459  0.00146188
  -0.7910979  -0.39993617]]
number [-0.6 -0.   0.  -0.   0.   0.   0.4  0. ]
0
[[ 4.8565432e-01  1.0553551e-01  1.9730509e-03  6.0746726e-03
   1.7440523e-04  2.9869148e-04 -7.9971141e-01 -3.9999917e-01]]
number [-1.2 -0.   0.  -0.   0.   0.   0.2  0. ]
1
[[-1.61963589e-02  1.98238775e-01  4.14175401e-03  1.13693625e-02
   3.33204836e-04  1.73322146e-03 -3.99621695e-01 -3.99998307e-01]]
number [-1.2 -0.6  0.  -0.   0.   0.   0.   0. ]
1
[[-7.4176759e-01  5.0732923e-01  1.1073414e-02  1.3851171e-03
   4.4812466e-04  2.1316698e-02  2.1390557e-04 -3.9999887e-01]]
number [-1.2 -1.2  0.  -0.   0.   0.   0.   0. ]
5
[[-6.7534620e-01 -6.7721233e-02  4.0690817e-02  1.1822551e-02
   4.5141713e-03  8.4293790e-02  1.6810093e-03  6.4998421e-05]]
number [-0.6 -1.2  0.  -0.   0.   0.   0.   0. ]
2
[[-2.1380308e-01 -1.6264306e-01  6.1599735e-02  1.6354175e-02
   7.0941914e-03 -

In [27]:
df_match['label'].value_counts()

run        551
walk       434
dribble     78
rest        35
pass        32
tackle      31
shot        18
cross        4
Name: label, dtype: int64