In [29]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer
from keras.utils import to_categorical 
from sklearn.metrics import precision_recall_fscore_support
import random

In [30]:
full_df = pd.read_csv('csvs/Judd Trump.csv').drop(columns=['Unnamed: 4','Unnamed: 5','Unnamed: 6', 'Percentage'])
full_df['player_1'] = full_df.apply(lambda x: 'Judd Trump', axis=1)
full_df['player_2'] = full_df.Player
full_df['win_rate'] = full_df.apply(lambda x: x.Wins/(x.Wins+x.Losses), axis=1)
full_df['meetings'] = full_df.apply(lambda x: x.Wins+x.Losses, axis=1)
full_df = full_df.drop(columns=['Player','Wins', 'Losses'])

In [31]:
for filename in os.listdir('csvs/'):
    if filename.endswith('.csv') and filename != 'Judd Trump.csv':
        df = pd.read_csv('csvs/' + filename).drop(columns=['Unnamed: 4','Unnamed: 5','Unnamed: 6', 'Percentage'])
        df['player_1'] = df.apply(lambda x: filename[:-4], axis=1)
        df['player_2'] = df.Player
        df['win_rate'] = df.apply(lambda x: x.Wins/(x.Wins+x.Losses), axis=1)
        df['meetings'] = df.apply(lambda x: x.Wins+x.Losses, axis=1)
        df = df.drop(columns=['Player','Wins', 'Losses'])
        full_df = full_df.append(df)

In [32]:
full_df['check_string'] = full_df.apply(lambda row: ''.join(sorted([row['player_1'], row['player_2']])), axis=1)
full_df = full_df.drop_duplicates('check_string').drop(columns=['check_string'])

In [33]:
def swapper(row):
    if random.choice([True,False]):
        x = row['player_1']
        row['player_1'] = row['player_2']
        row['player_2'] = x
        row['win_rate'] = 1 - row['win_rate']
    return row
full_df = full_df.apply(swapper, axis=1)

In [35]:
full_df.to_csv('snooker_h2h.csv', index=False)

In [246]:
full_df = pd.read_csv('snooker_h2h.csv')

In [247]:
def rate_cat(rate):
    if rate < 0.34:
        return 2
    if rate > 0.65:
        return 1
    else:
        return 0
full_df['rate_cat'] = full_df.win_rate.apply(rate_cat)

In [248]:
players, counts = np.unique(np.concatenate((full_df.player_1.values,full_df.player_2.values)), return_counts=True)

In [284]:
np.unique(full_df.rate_cat, return_counts=True)

(array([0, 1, 2]), array([588, 346, 362]))

In [159]:
def build_model(): 
    player_1 = layers.Input(shape=(1,))
    player_2 = layers.Input(shape=(1,))
    emb_layer = layers.Embedding(num_players + 1, 5)
    player_1_embedding = emb_layer(player_1)
    player_2_embedding = emb_layer(player_2)
    player_1_flat = layers.Flatten()(player_1_embedding)
    player_2_flat = layers.Flatten()(player_2_embedding)
    cl_concat = layers.Concatenate()([player_1_flat, player_2_flat])
    cl_dense = layers.Dense(100, activation='relu')(cl_concat)
    cl_dense1 = layers.Dense(20, activation='relu')(cl_dense)
    #pred = layers.Dense(1, activation="linear")(cl_dense1)
    pred = layers.Dense(3, activation="softmax")(cl_dense1)
    model = Model([player_1, player_2], pred)
    #model.compile(loss='mean_squared_error', optimizer='adam')
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.summary()
    return model

In [133]:
meeting_f1 = np.empty((7, 3))
sample_f1 = np.empty((7, 3))
for idx in range(7):
    freq_players = np.squeeze(players[np.argwhere(counts>idx)])
    #meeting_df = full_df[full_df.meetings > idx+1]
    meeting_df = full_df[(full_df.player_1.isin(freq_players)) & (full_df.player_2.isin(freq_players))]
    
    meeting_ids = pd.factorize(np.concatenate((meeting_df.player_1, meeting_df.player_2)))[0]
    meeting_df['p1'] =  meeting_ids[:len(meeting_df)]
    meeting_df['p2'] = meeting_ids[len(meeting_df):]
    num_players = np.max(np.concatenate((meeting_df.p1.values, meeting_df.p2.values))) + 1
    
    for i in range(3):
        train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(meeting_df['p1'], meeting_df['p2'], to_categorical(meeting_df['rate_cat']), test_size=0.1)
        model = None
        model = build_model()
        history = model.fit([train_player_1, train_player_2],
              train_labels,
             validation_data=([test_player_1, test_player_2], test_labels),
              epochs=5,
              batch_size=32
             )
        preds = model.predict([test_player_1, test_player_2])
        meeting_f1[idx, i] = precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')[2]

        sample_df = full_df.sample(len(meeting_df))
        sample_ids = pd.factorize(np.concatenate((sample_df.player_1, sample_df.player_2)))[0]
        sample_df['p1'] =  sample_ids[:len(sample_df)]
        sample_df['p2'] = sample_ids[len(sample_df):]
        num_players = np.max(np.concatenate((sample_df.p1.values, sample_df.p2.values))) + 1

        train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(sample_df['p1'], sample_df['p2'], to_categorical(sample_df['rate_cat']), test_size=0.1)
        model = None
        model = build_model()
        history = model.fit([train_player_1, train_player_2],
              train_labels,
             validation_data=([test_player_1, test_player_2], test_labels),
              epochs=5,
              batch_size=32
             )
        preds = model.predict([test_player_1, test_player_2])
        sample_f1[idx, i] = precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')[2]

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_217 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_218 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_109 (Embedding)       (None, 1, 5)         2800        input_217[0][0]                  
                                                                 input_218[0][0]                  
__________________________________________________________________________________________________
flatten_217 (Flatten)           (None, 5)            0           embedding_109[0][0]              
__________

Train on 6039 samples, validate on 672 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_223 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_224 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_112 (Embedding)       (None, 1, 5)         2800        input_223[0][0]                  
                                                                 input_224[0][0]                  
__________________________________________________________________________________________________
flatten_223 

Train on 6039 samples, validate on 672 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_229 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_230 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_115 (Embedding)       (None, 1, 5)         1975        input_229[0][0]                  
                                                                 input_230[0][0]                  
__________________________________________________________________________________________________
flatten_229 (Flatten)           (None, 5)            0           embedding_115[0][0]              
__________

Train on 5892 samples, validate on 655 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_235 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_236 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_118 (Embedding)       (None, 1, 5)         2795        input_235[0][0]                  
                                                                 input_236[0][0]                  
__________________________________________________________________________________________________
flatten_235 

Train on 5892 samples, validate on 655 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_241 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_242 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_121 (Embedding)       (None, 1, 5)         1660        input_241[0][0]                  
                                                                 input_242[0][0]                  
__________________________________________________________________________________________________
flatten_241 (Flatten)           (None, 5)            0           embedding_121[0][0]              
__________

Train on 5780 samples, validate on 643 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_247 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_248 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_124 (Embedding)       (None, 1, 5)         2760        input_247[0][0]                  
                                                                 input_248[0][0]                  
__________________________________________________________________________________________________
flatten_247 

Train on 5780 samples, validate on 643 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_253 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_254 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_127 (Embedding)       (None, 1, 5)         1455        input_253[0][0]                  
                                                                 input_254[0][0]                  
__________________________________________________________________________________________________
flatten_253 (Flatten)           (None, 5)            0           embedding_127[0][0]              
__________

Train on 5671 samples, validate on 631 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_259 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_260 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_130 (Embedding)       (None, 1, 5)         2715        input_259[0][0]                  
                                                                 input_260[0][0]                  
__________________________________________________________________________________________________
flatten_259 

Train on 5671 samples, validate on 631 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_265 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_266 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_133 (Embedding)       (None, 1, 5)         1325        input_265[0][0]                  
                                                                 input_266[0][0]                  
__________________________________________________________________________________________________
flatten_265 (Flatten)           (None, 5)            0           embedding_133[0][0]              
__________

Train on 5580 samples, validate on 621 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_271 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_272 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_136 (Embedding)       (None, 1, 5)         2730        input_271[0][0]                  
                                                                 input_272[0][0]                  
__________________________________________________________________________________________________
flatten_271 

Train on 5580 samples, validate on 621 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_277 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_278 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_139 (Embedding)       (None, 1, 5)         1270        input_277[0][0]                  
                                                                 input_278[0][0]                  
__________________________________________________________________________________________________
flatten_277 (Flatten)           (None, 5)            0           embedding_139[0][0]              
__________

Train on 5535 samples, validate on 616 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_283 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_284 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_142 (Embedding)       (None, 1, 5)         2755        input_283[0][0]                  
                                                                 input_284[0][0]                  
__________________________________________________________________________________________________
flatten_283 

Train on 5535 samples, validate on 616 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_289 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_290 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_145 (Embedding)       (None, 1, 5)         1195        input_289[0][0]                  
                                                                 input_290[0][0]                  
__________________________________________________________________________________________________
flatten_289 (Flatten)           (None, 5)            0           embedding_145[0][0]              
__________

Train on 5459 samples, validate on 607 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_295 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_296 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_148 (Embedding)       (None, 1, 5)         2740        input_295[0][0]                  
                                                                 input_296[0][0]                  
__________________________________________________________________________________________________
flatten_295 

Train on 5459 samples, validate on 607 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [134]:
np.average(meeting_f1, axis=1)

array([0.48339183, 0.48421158, 0.49518123, 0.46297282, 0.5143822 ,
       0.50321669, 0.50772152])

In [135]:
np.average(sample_f1, axis=1)

array([0.49449977, 0.49202174, 0.48297309, 0.48031542, 0.47032307,
       0.46856307, 0.47265868])

In [250]:
len(full_df)

6711

In [161]:
freq_players = np.squeeze(players[np.argwhere(counts>10)])
full_df = full_df[(full_df.player_1.isin(freq_players)) & (full_df.player_2.isin(freq_players))]

In [251]:
full_df = full_df[full_df.meetings > 4]

In [273]:
np.unique(full_df.rate_cat, return_counts=True)

(array([0, 1, 2]), array([588, 346, 362]))

In [254]:
freq_players = np.squeeze(players[np.argwhere(counts>5)])
full_df = full_df[(full_df.player_1.isin(freq_players)) & (full_df.player_2.isin(freq_players))]

In [255]:
full_df

Unnamed: 0,player_1,player_2,win_rate,meetings,rate_cat
0,Judd Trump,Robert Milkins,0.250000,8,2
3,Judd Trump,John Higgins,0.363636,22,0
4,Kyren Wilson,Judd Trump,0.600000,10,0
5,Michael White,Judd Trump,0.600000,5,0
6,Judd Trump,Ali Carter,0.428571,7,0
7,Judd Trump,Graeme Dott,0.444444,9,0
8,Judd Trump,Mark Allen,0.466667,15,0
9,Ronnie O Sullivan,Judd Trump,0.523810,21,0
10,Barry Hawkins,Judd Trump,0.500000,10,0
11,Liang Wenbo,Judd Trump,0.500000,8,0


In [256]:
ids = pd.factorize(np.concatenate((full_df.player_1, full_df.player_2)))[0]
full_df['p1'] =  ids[:len(full_df)]
full_df['p2'] = ids[len(full_df):]
num_players = np.max(np.concatenate((full_df.p1.values, full_df.p2.values))) + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [268]:
# categorical
train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(full_df['p1'], full_df['p2'], to_categorical(full_df['rate_cat']), test_size=0.1, random_state=0)

In [288]:
# real valued
train_player_1, test_player_1, train_player_2,test_player_2, train_labels, test_labels = train_test_split(full_df['p1'], full_df['p2'], full_df['win_rate'], test_size=0.1, random_state=0)

In [319]:
def build_model(): 
    player_1 = layers.Input(shape=(1,))
    player_2 = layers.Input(shape=(1,))
    emb_layer = layers.Embedding(num_players + 1, 20)
    player_1_embedding = emb_layer(player_1)
    player_2_embedding = emb_layer(player_2)
    player_1_flat = layers.Flatten()(player_1_embedding)
    player_2_flat = layers.Flatten()(player_2_embedding)
    cl_concat = layers.Concatenate()([player_1_flat, player_2_flat])
    cl_dense = layers.Dense(100, activation='relu')(cl_concat)
    cl_dense1 = layers.Dense(20, activation='relu')(cl_dense)
    cl_dense2 = layers.Dense(10, activation='relu')(cl_dense1)
    pred = layers.Dense(1, activation="linear")(cl_dense2)
    #pred = layers.Dense(3, activation="softmax")(cl_dense1)
    model = Model([player_1, player_2], pred)
    model.compile(loss='mean_squared_error', optimizer='adam')
    #model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.summary()
    return model

In [320]:
model = None
model = build_model()
history = model.fit([train_player_1, train_player_2],
          train_labels,
         validation_data=([test_player_1, test_player_2], test_labels),
          epochs=10,
          batch_size=32,
        #class_weight={0: 1., 1: 2., 2: 2.}
         )

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_345 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
input_346 (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_173 (Embedding)       (None, 1, 20)        3060        input_345[0][0]                  
                                                                 input_346[0][0]                  
__________________________________________________________________________________________________
flatten_345 (Flatten)           (None, 20)           0           embedding_173[0][0]              
__________

In [321]:
preds = model.predict([test_player_1, test_player_2])

In [141]:
# real valued
precision_recall_fscore_support(np.round(np.squeeze(preds)), np.round(test_labels.values), average='macro')

(0.70437998215776, 0.6920151405912041, 0.6941325970680314, None)

In [287]:
# categorical
precision_recall_fscore_support(np.argmax(test_labels,axis=1), np.argmax(preds, axis=1), average='macro')

(0.44654312188168205, 0.46799516908212563, 0.44117446758831375, None)

In [282]:
np.argmax(test_labels,axis=1)[:20]

array([2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 0, 1, 0, 2, 1, 2, 2])

In [283]:
np.argmax(preds, axis=1)[:20]

array([2, 1, 2, 2, 2, 0, 0, 2, 1, 2, 0, 2, 0, 2, 2, 0, 2, 1, 2, 1])

In [322]:
np.squeeze(preds)[:20]

array([0.3172906 , 0.5184352 , 0.27428398, 0.28995064, 0.39741963,
       0.50583076, 0.4852446 , 0.38402203, 0.6862867 , 0.25714117,
       0.5874709 , 0.09857122, 0.3671345 , 0.2519115 , 0.4823543 ,
       0.4518452 , 0.3206232 , 0.75178695, 0.4224865 , 0.57337546],
      dtype=float32)

In [323]:
test_labels.values[:20]

array([0.14285714, 0.6       , 0.25      , 0.2       , 0.6       ,
       0.28571429, 0.33333333, 0.55555556, 0.        , 0.28571429,
       0.27272727, 0.14285714, 0.33333333, 0.4       , 0.66666667,
       0.6       , 0.2       , 0.8       , 0.1       , 0.33333333])

In [147]:
np.round(0.5)

0.0

In [263]:
cat_preds = np.empty(len(preds))
cat_labels = np.empty(len(preds))

for i, pred in enumerate(preds):
    if pred < 0.34:
        cat_preds[i] = 2
    elif pred > 0.65:
        cat_preds[i] = 1
    else:
        cat_preds[i] = 0

In [264]:
for i, pred in enumerate(test_labels.values):
    if pred < 0.34:
        cat_labels[i] = 2
    elif pred > 0.65:
        cat_labels[i] = 1
    else:
        cat_labels[i] = 0

In [265]:
precision_recall_fscore_support(cat_labels, cat_preds, average='macro')

(0.6165311653116531, 0.34631642512077293, 0.2308497363442418, None)

In [266]:
cat_preds[:20]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [267]:
cat_labels[:20]

array([2., 0., 2., 2., 0., 2., 2., 0., 2., 2., 2., 2., 2., 0., 1., 0., 2.,
       1., 2., 2.])

In [229]:
(cat_preds == cat_labels).all(axis=(0)).mean()

0.0

In [237]:
uniq, counts = np.unique(np.equal(cat_preds, cat_labels), return_counts=True)

In [238]:
dict(zip(uniq, counts))

{False: 134, True: 198}