In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer
from keras.utils import to_categorical 
from sklearn.metrics import precision_recall_fscore_support
import random

Using TensorFlow backend.


# Preprocessing

In [27]:
df = pd.read_csv('Data.csv', encoding = "ISO-8859-1")
#df = df.drop(columns=df.columns[:9].append(df.columns[11:]))
#df = df.groupby(['Winner', 'Loser']).size().reset_index(name='counts')

In [28]:
df

Unnamed: 0,ATP,Location,Tournament,Date,Series,Court,Surface,Round,Best of,Winner,...,UBW,UBL,LBW,LBL,SJW,SJL,MaxW,MaxL,AvgW,AvgL
0,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Dosedel S.,...,,,,,,,,,,
1,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Enqvist T.,...,,,,,,,,,,
2,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Escude N.,...,,,,,,,,,,
3,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Federer R.,...,,,,,,,,,,
4,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Fromberg R.,...,,,,,,,,,,
5,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Gambill J.M.,...,,,,,,,,,,
6,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Grosjean S.,...,,,,,,,,,,
7,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Henman T.,...,,,,,,,,,,
8,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Hewitt L.,...,,,,,,,,,,
9,1,Adelaide,Australian Hardcourt Championships,3/01/2000,International,Outdoor,Hard,1st Round,3,Lisnard J.,...,,,,,,,,,,


In [17]:
def win_rate(w, l, df, c):
    if df[(df.Winner == l) & (df.Loser == w)].empty:
        return 1
    else:
        return c/(c+ df[(df.Winner == l) & (df.Loser == w)]['counts'])
df['win_rate'] = df.apply(lambda row: win_rate(row.Winner, row.Loser, df, row.counts), axis=1)

In [18]:
def delist(x):
    if type(x) == np.ndarray:
        return x[0]
    else:
        return x
df['win_rate'] = df.win_rate.apply(lambda x: delist(x))

In [19]:
df['check_string'] = df.apply(lambda row: ''.join(sorted([row['Winner'], row['Loser']])), axis=1)
df = df.drop_duplicates('check_string', keep='last')
df['last_win'] = df.apply(lambda x: 0, axis=1)
df['meetings'] = df.apply(lambda x: np.round(x.counts/x.win_rate), axis=1)

In [20]:
def swapper(row):
    if random.choice([True,False]):
        x = row['Winner']
        row['Winner'] = row['Loser']
        row['Loser'] = x
        row['last_win'] = 1
        row['win_rate'] = 1 - row['win_rate']
    return row
df = df.apply(swapper, axis=1)
df = df.rename(columns={'Winner': 'player_0', 'Loser':'player_1'})
df = df.drop(columns=['check_string', 'counts'])

In [26]:
df.head()

Unnamed: 0,player_0,player_1,win_rate,last_win,meetings
0,Hajek J.,Ulihrach B.,1.0,0,1.0
1,Dupuis A.,Abel M.,0.0,1,1.0
2,Heuberger I.,Abel M.,0.0,1,1.0
5,Acasuso J.,Arazi H.,1.0,0,1.0
6,Bachinger M.,Acasuso J.,0.0,1,1.0


In [11]:
df.to_csv('tennis_h2h.csv', index=False)

In [128]:
players, counts = np.unique(np.concatenate((df.player_0.values,df.player_1.values)), return_counts=True)

In [154]:
def win_rate_cat(win_rate):
    if win_rate > 0.65:
        return 0
    elif win_rate < 0.34:
        return 1
    else:
        return 2
df['win_rate_cat'] = df.win_rate.apply(lambda x: win_rate_cat(x))

# Modeling

In [157]:
def build_model(): 
    player_1 = layers.Input(shape=(1,))
    player_2 = layers.Input(shape=(1,))
    emb_layer = layers.Embedding(num_players, 5)
    player_1_embedding = emb_layer(player_1)
    player_2_embedding = emb_layer(player_2)
    player_1_flat = layers.Flatten()(player_1_embedding)
    player_2_flat = layers.Flatten()(player_2_embedding)
    cl_concat = layers.Concatenate()([player_1_flat, player_2_flat])
    cl_dense = layers.Dense(100, activation='relu')(cl_concat)
    cl_dense1 = layers.Dense(20, activation='relu')(cl_dense)
    pred = layers.Dense(3, activation="softmax")(cl_dense1)
    model = Model([player_1, player_2], pred)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.summary()
    return model

In [163]:
freq = np.empty((5,3))
samp = np.empty((5,3))
for idx, i in enumerate(range(5)):
    #freq_players = np.squeeze(players[np.argwhere(counts>i)])
    #freq_df = df[(df.player_0.isin(freq_players)) & (df.player_1.isin(freq_players))]
    freq_df = df[df.meetings>i]
    for j in range(3):
        sample_df = df.sample(len(freq_df))

        ids = pd.factorize(np.concatenate((freq_df.player_0, freq_df.player_1)))[0]
        freq_df['p1'] =  ids[:len(freq_df)]
        freq_df['p2'] = ids[len(freq_df):]
        num_players = np.max(np.concatenate((freq_df.p1.values, freq_df.p2.values))) + 1

        train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(freq_df['p1'], freq_df['p2'], to_categorical(freq_df['win_rate_cat']), test_size=0.1)
        model = None
        model = build_model()
        history = model.fit([train_player_1, train_player_2],
                  train_labels,
                 validation_data=([test_player_1, test_player_2], test_labels),
                  epochs=5,
                  batch_size=32
                 )
        preds = model.predict([test_player_1, test_player_2])
        freq[idx, j] = precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')[2]

        ids = pd.factorize(np.concatenate((sample_df.player_0, sample_df.player_1)))[0]
        sample_df['p1'] =  ids[:len(sample_df)]
        sample_df['p2'] = ids[len(sample_df):]
        num_players = np.max(np.concatenate((sample_df.p1.values, sample_df.p2.values))) + 1

        train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(sample_df['p1'], sample_df['p2'], to_categorical(sample_df['win_rate_cat']), test_size=0.1)
        model = None
        model = build_model()
        history = model.fit([train_player_1, train_player_2],
                  train_labels,
                 validation_data=([test_player_1, test_player_2], test_labels),
                  epochs=5,
                  batch_size=32
                 )
        preds = model.predict([test_player_1, test_player_2])
        samp[idx, j] = precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')[2]

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_71 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_72 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_36 (Embedding)        (None, 1, 5)         6935        input_71[0][0]                   
                                                                 input_72[0][0]                   
__________________________________________________________________________________________________
flatten_71 (Flatten)            (None, 5)            0           embedding_36[0][0]               
__________

Train on 23012 samples, validate on 2557 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_77 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_78 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_39 (Embedding)        (None, 1, 5)         6935        input_77[0][0]                   
                                                                 input_78[0][0]                   
__________________________________________________________________________________________________
flatten_77

Train on 23012 samples, validate on 2557 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_83 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_84 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_42 (Embedding)        (None, 1, 5)         2655        input_83[0][0]                   
                                                                 input_84[0][0]                   
__________________________________________________________________________________________________
flatten_83 (Flatten)            (None, 5)            0           embedding_42[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_87 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_88 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_44 (Embedding)        (None, 1, 5)         2655        input_87[0][0]                   
                                                                 input_88[0][0]                   
__________________________________________________________________________________________________
flatten_87 (Flatten)            (None, 5)            0           embedding_44[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_91 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_92 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_46 (Embedding)        (None, 1, 5)         2655        input_91[0][0]                   
                                                                 input_92[0][0]                   
__________________________________________________________________________________________________
flatten_91 (Flatten)            (None, 5)            0           embedding_46[0][0]               
__________

  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_95 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_96 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_48 (Embedding)        (None, 1, 5)         1790        input_95[0][0]                   
                                                                 input_96[0][0]                   
__________________________________________________________________________________________________
flatten_95 (Flatten)            (None, 5)            0           embedding_48[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_99 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_100 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_50 (Embedding)        (None, 1, 5)         1790        input_99[0][0]                   
                                                                 input_100[0][0]                  
__________________________________________________________________________________________________
flatten_99 (Flatten)            (None, 5)            0           embedding_50[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_103 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_104 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_52 (Embedding)        (None, 1, 5)         1790        input_103[0][0]                  
                                                                 input_104[0][0]                  
__________________________________________________________________________________________________
flatten_103 (Flatten)           (None, 5)            0           embedding_52[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_107 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_108 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_54 (Embedding)        (None, 1, 5)         1345        input_107[0][0]                  
                                                                 input_108[0][0]                  
__________________________________________________________________________________________________
flatten_107 (Flatten)           (None, 5)            0           embedding_54[0][0]               
__________

  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_111 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_112 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_56 (Embedding)        (None, 1, 5)         1345        input_111[0][0]                  
                                                                 input_112[0][0]                  
__________________________________________________________________________________________________
flatten_111 (Flatten)           (None, 5)            0           embedding_56[0][0]               
__________

  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_115 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_116 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_58 (Embedding)        (None, 1, 5)         1345        input_115[0][0]                  
                                                                 input_116[0][0]                  
__________________________________________________________________________________________________
flatten_115 (Flatten)           (None, 5)            0           embedding_58[0][0]               
__________

  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_119 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_120 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_60 (Embedding)        (None, 1, 5)         1035        input_119[0][0]                  
                                                                 input_120[0][0]                  
__________________________________________________________________________________________________
flatten_119 (Flatten)           (None, 5)            0           embedding_60[0][0]               
__________

  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_123 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_124 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_62 (Embedding)        (None, 1, 5)         1035        input_123[0][0]                  
                                                                 input_124[0][0]                  
__________________________________________________________________________________________________
flatten_123 (Flatten)           (None, 5)            0           embedding_62[0][0]               
__________

  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_127 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_128 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_64 (Embedding)        (None, 1, 5)         1035        input_127[0][0]                  
                                                                 input_128[0][0]                  
__________________________________________________________________________________________________
flatten_127 (Flatten)           (None, 5)            0           embedding_64[0][0]               
__________

  'precision', 'predicted', average, warn_for)


In [164]:
print(np.average(freq, axis=1))
print(np.average(samp, axis=1))

[0.43490076 0.47739973 0.48421382 0.5288988  0.54019707]
[0.4444399  0.4135832  0.41998272 0.38504393 0.37760467]


# Last match

In [165]:
def build_model(): 
    player_1 = layers.Input(shape=(1,))
    player_2 = layers.Input(shape=(1,))
    emb_layer = layers.Embedding(num_players, 5)
    player_1_embedding = emb_layer(player_1)
    player_2_embedding = emb_layer(player_2)
    player_1_flat = layers.Flatten()(player_1_embedding)
    player_2_flat = layers.Flatten()(player_2_embedding)
    cl_concat = layers.Concatenate()([player_1_flat, player_2_flat])
    cl_dense = layers.Dense(100, activation='relu')(cl_concat)
    cl_dense1 = layers.Dense(20, activation='relu')(cl_dense)
    pred = layers.Dense(2, activation="softmax")(cl_dense1)
    model = Model([player_1, player_2], pred)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.summary()
    return model

In [166]:
freq = np.empty((5,3))
samp = np.empty((5,3))
for idx, i in enumerate(range(5)):
    freq_players = np.squeeze(players[np.argwhere(counts>i)])
    #freq_df = df[(df.player_0.isin(freq_players)) & (df.player_1.isin(freq_players))]
    freq_df = df[df.meetings>i]
    for j in range(3):
        sample_df = df.sample(len(freq_df))

        ids = pd.factorize(np.concatenate((freq_df.player_0, freq_df.player_1)))[0]
        freq_df['p1'] =  ids[:len(freq_df)]
        freq_df['p2'] = ids[len(freq_df):]
        num_players = np.max(np.concatenate((freq_df.p1.values, freq_df.p2.values))) + 1

        train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(freq_df['p1'], freq_df['p2'], to_categorical(freq_df['last_win']), test_size=0.1)
        model = None
        model = build_model()
        history = model.fit([train_player_1, train_player_2],
                  train_labels,
                 validation_data=([test_player_1, test_player_2], test_labels),
                  epochs=5,
                  batch_size=32
                 )
        preds = model.predict([test_player_1, test_player_2])
        freq[idx,j] = precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')[2]

        ids = pd.factorize(np.concatenate((sample_df.player_0, sample_df.player_1)))[0]
        sample_df['p1'] =  ids[:len(sample_df)]
        sample_df['p2'] = ids[len(sample_df):]
        num_players = np.max(np.concatenate((sample_df.p1.values, sample_df.p2.values))) + 1

        train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(sample_df['p1'], sample_df['p2'], to_categorical(sample_df['last_win']), test_size=0.1)
        model = None
        model = build_model()
        history = model.fit([train_player_1, train_player_2],
                  train_labels,
                 validation_data=([test_player_1, test_player_2], test_labels),
                  epochs=5,
                  batch_size=32
                 )
        preds = model.predict([test_player_1, test_player_2])
        samp[idx,j] = precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')[2]

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_131 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_132 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_66 (Embedding)        (None, 1, 5)         6935        input_131[0][0]                  
                                                                 input_132[0][0]                  
__________________________________________________________________________________________________
flatten_131 (Flatten)           (None, 5)            0           embedding_66[0][0]               
__________

Train on 23012 samples, validate on 2557 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_137 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_138 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_69 (Embedding)        (None, 1, 5)         6935        input_137[0][0]                  
                                                                 input_138[0][0]                  
__________________________________________________________________________________________________
flatten_13

Train on 23012 samples, validate on 2557 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_143 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_144 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_72 (Embedding)        (None, 1, 5)         2655        input_143[0][0]                  
                                                                 input_144[0][0]                  
__________________________________________________________________________________________________
flatten_143 (Flatten)           (None, 5)            0           embedding_72[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_147 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_148 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_74 (Embedding)        (None, 1, 5)         2655        input_147[0][0]                  
                                                                 input_148[0][0]                  
__________________________________________________________________________________________________
flatten_147 (Flatten)           (None, 5)            0           embedding_74[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_151 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_152 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_76 (Embedding)        (None, 1, 5)         2655        input_151[0][0]                  
                                                                 input_152[0][0]                  
__________________________________________________________________________________________________
flatten_151 (Flatten)           (None, 5)            0           embedding_76[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_155 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_156 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_78 (Embedding)        (None, 1, 5)         1790        input_155[0][0]                  
                                                                 input_156[0][0]                  
__________________________________________________________________________________________________
flatten_155 (Flatten)           (None, 5)            0           embedding_78[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_159 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_160 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_80 (Embedding)        (None, 1, 5)         1790        input_159[0][0]                  
                                                                 input_160[0][0]                  
__________________________________________________________________________________________________
flatten_159 (Flatten)           (None, 5)            0           embedding_80[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_163 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_164 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_82 (Embedding)        (None, 1, 5)         1790        input_163[0][0]                  
                                                                 input_164[0][0]                  
__________________________________________________________________________________________________
flatten_163 (Flatten)           (None, 5)            0           embedding_82[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_167 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_168 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_84 (Embedding)        (None, 1, 5)         1345        input_167[0][0]                  
                                                                 input_168[0][0]                  
__________________________________________________________________________________________________
flatten_167 (Flatten)           (None, 5)            0           embedding_84[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_171 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_172 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_86 (Embedding)        (None, 1, 5)         1345        input_171[0][0]                  
                                                                 input_172[0][0]                  
__________________________________________________________________________________________________
flatten_171 (Flatten)           (None, 5)            0           embedding_86[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_175 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_176 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_88 (Embedding)        (None, 1, 5)         1345        input_175[0][0]                  
                                                                 input_176[0][0]                  
__________________________________________________________________________________________________
flatten_175 (Flatten)           (None, 5)            0           embedding_88[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_179 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_180 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_90 (Embedding)        (None, 1, 5)         1035        input_179[0][0]                  
                                                                 input_180[0][0]                  
__________________________________________________________________________________________________
flatten_179 (Flatten)           (None, 5)            0           embedding_90[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_183 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_184 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_92 (Embedding)        (None, 1, 5)         1035        input_183[0][0]                  
                                                                 input_184[0][0]                  
__________________________________________________________________________________________________
flatten_183 (Flatten)           (None, 5)            0           embedding_92[0][0]               
__________

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_187 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_188 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_94 (Embedding)        (None, 1, 5)         1035        input_187[0][0]                  
                                                                 input_188[0][0]                  
__________________________________________________________________________________________________
flatten_187 (Flatten)           (None, 5)            0           embedding_94[0][0]               
__________

In [167]:
print(np.average(freq, axis=1))
print(np.average(samp, axis=1))

[0.67593479 0.73349855 0.79459114 0.82184588 0.84305607]
[0.67492168 0.6421452  0.62369951 0.64063955 0.57209557]


# Visualize