In [6]:
import pandas as pd
import os
from tqdm import tqdm
import time
from matplotlib import pyplot as plt
import numpy as np

In [2]:
def pull_game_data(g):
        game_res = pd.read_excel('raw_game_pulls/{}'.format(g))
        game_strings = g.split(' vs ')
        team_A = game_strings[0].split('for ')[-1]
        team_B = game_strings[1].split('.xlsx')[0]

        matrix1 = pd.read_excel('team_specific_matrix/{}_A.xlsx'.format(team_A), index_col='Starting_State')
        matrix2 = pd.read_excel('team_specific_matrix/{}_B.xlsx'.format(team_B), index_col='Starting_State')

        A_score = game_res.iloc[-1]['Home']
        B_score = game_res.iloc[-1]['Away']
        score = (A_score,B_score)

        return matrix1, matrix2, score

In [3]:
for game in os.listdir('raw_game_pulls')[:10]:
    a,b,c = pull_game_data(game)

In [9]:
# import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, Model, regularizers


In [53]:
# Define the neural network architecture
def build_model():
    input1 = layers.Input(shape=(18, 18))
    input2 = layers.Input(shape=(18, 18))

    flatten1 = layers.Flatten()(input1)
    flatten2 = layers.Flatten()(input2)

    concat = layers.Concatenate()([flatten1, flatten2])
    dense1 = layers.Dense(200, activation='relu')(concat)
    dense2 = layers.Dense(100, activation='relu')(dense1)

    # dense3 = layers.Dense(32, activation='sigmoid')(dense2)
    # dense4 = layers.Dense(256, activation='relu')(dense3)
    # dense5 = layers.Dense(128, activation='sigmoid')(dense4)

    output1 = layers.Dense(201, activation='softmax', name='output1')(dense2)  # Adjust output dimension to match the range (150 - 50 + 1)
    output2 = layers.Dense(201, activation='softmax', name='output2')(dense2)  # Adjust output dimension to match the range (150 - 50 + 1)

    model = Model(inputs=[input1, input2], outputs=[output1, output2])
    return model

# Define your training function
def train_model(matrixA_list, matrixB_list, output_tuple_list, epochs=10, batch_size=32):
    # Convert DataFrame to numpy arrays
    matrixA_array = np.array(matrixA_list)
    matrixB_array = np.array(matrixB_list)
    output_array = np.array(output_tuple_list)
    
    # Build the model
    model = build_model()

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    model.fit([matrixA_array, matrixB_array], [output_array[:, 0], output_array[:, 1]], epochs=epochs, batch_size=batch_size)
    return model

In [7]:
ma_list = []
mb_list = []
output_list = []

for game in tqdm(os.listdir('raw_game_pulls')):
    a,b,c = pull_game_data(game)
    ma_list.append(a)
    mb_list.append(b)
    output_list.append(c)

100%|██████████| 5237/5237 [09:36<00:00,  9.09it/s]


In [8]:
arr_outputs = []
for x in output_list:
    value1, value2 = x

    # Create numpy arrays
    array1 = np.zeros(201)
    array2 = np.zeros(201)

    # Set the corresponding indices to 1
    array1[value1 - 1] = 1
    array2[value2 - 1] = 1 
    arr_outputs.append((array1, array2))

In [12]:
all_inputs = ma_list + mb_list

In [28]:
arr = np.array([df.values for df in all_inputs])

# Calculate mean and standard deviation across the first axis (which represents different DataFrames)
avg = np.mean(arr, axis=0)
std_dev = np.std(arr, axis=0)

# Create DataFrames from the calculated arrays
avg_df = pd.DataFrame(avg, index=all_inputs[0].index, columns=all_inputs[0].columns)
std_dev_df = pd.DataFrame(std_dev, index=all_inputs[0].index, columns=all_inputs[0].columns)

In [45]:
norm_inputs_a = []
norm_inputs_b = []

In [46]:
# Step 3: Normalize each value in each matrix based on the overall mean and standard deviation
for matrix in ma_list:
    normalized_matrix = (matrix - avg_df) / std_dev_df
    normalized_matrix=normalized_matrix.fillna(0.)
    norm_inputs_a.append(normalized_matrix)

for matrix in mb_list:
    normalized_matrix = (matrix - avg_df) / std_dev_df
    normalized_matrix=normalized_matrix.fillna(0.)
    norm_inputs_b.append(normalized_matrix)

In [47]:
matrix_A_train = norm_inputs_a[:4000]
matrix_B_train = norm_inputs_b[:4000]

y_train = arr_outputs[:4000]

matrix_A_test = norm_inputs_a[4000:]
matrix_B_test = norm_inputs_b[4000:]

y_test=arr_outputs[4000:]

In [54]:
m1 = train_model(matrix_A_train, matrix_B_train, y_train, 10, 64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [61]:
predicted_scores = []
for i in tqdm(range(len(matrix_A_test))):
    # Convert test matrices to numpy arrays
    test_matrix_a_array = matrix_A_test[i].values.reshape(-1, 18, 18)
    test_matrix_b_array = matrix_B_test[i].values.reshape(-1, 18, 18)

    # Make predictions using the model
    predictions = m1.predict([test_matrix_a_array, test_matrix_b_array], verbose=0)

    predicted_score = (np.argmax(predictions[0]), np.argmax(predictions[1]))

    predicted_scores.append(predicted_score)

100%|██████████| 1237/1237 [00:43<00:00, 28.32it/s]


In [62]:
def accuracy(predicted_scores, actual_scores):
    real_A_score = np.array([x[0] for x in actual_scores])
    real_B_score = np.array([x[1] for x in actual_scores])

    sim_A_score=np.array([x[0] for x in predicted_scores])
    sim_B_score=np.array([x[1] for x in predicted_scores])

    
    a_error = (real_A_score-sim_A_score)**2
    b_error = (real_B_score-sim_B_score)**2

    return np.mean(np.sqrt(a_error+b_error))

In [63]:
real_scores = output_list[4000:]

In [64]:
accuracy(predicted_scores, real_scores)

14.780018003211344

In [66]:
def accuracy_ou(predicted_scores, actual_scores):
    real_A_score = np.array([x[0] for x in actual_scores])
    real_B_score = np.array([x[1] for x in actual_scores])

    real_sum = real_A_score+real_B_score

    sim_A_score=np.array([x[0] for x in predicted_scores])
    sim_B_score=np.array([x[1] for x in predicted_scores])

    sim_sum=sim_A_score+sim_B_score

    return np.mean(sim_sum-real_sum)

In [68]:
def distrib_ou(predicted_scores, actual_scores):
    real_A_score = np.array([x[0] for x in actual_scores])
    real_B_score = np.array([x[1] for x in actual_scores])

    real_sum = real_A_score+real_B_score

    sim_A_score=np.array([x[0] for x in predicted_scores])
    sim_B_score=np.array([x[1] for x in predicted_scores])

    sim_sum=sim_A_score+sim_B_score

    return sim_sum-real_sum

In [71]:
def accuracy_moneyline(predicted_scores, actual_scores):
    real_A_score = np.array([x[0] for x in actual_scores])
    real_B_score = np.array([x[1] for x in actual_scores])

    a_won = real_A_score>real_B_score

    sim_A_score=np.array([x[0] for x in predicted_scores])
    sim_B_score=np.array([x[1] for x in predicted_scores])

    sim_A_won = sim_A_score>sim_B_score

    return np.mean(a_won == sim_A_won)

In [69]:
distrib = distrib_ou(predicted_scores, real_scores)

In [72]:
accuracy_moneyline(predicted_scores, real_scores)

0.6402586903799515

In [60]:
def build_regularized_model():
    input1 = layers.Input(shape=(18, 18))
    input2 = layers.Input(shape=(18, 18))

    flatten1 = layers.Flatten()(input1)
    flatten2 = layers.Flatten()(input2)

    concat = layers.Concatenate()([flatten1, flatten2])
    
    # Wide sparse layers with L1 regularization
    dense1 = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l1(0.01))(concat)
    dropout1 = layers.Dropout(0.5)(dense1)
    
    dense2 = layers.Dense(256, activation='sigmoid', kernel_regularizer=regularizers.l1(0.01))(dropout1)
    dropout2 = layers.Dropout(0.5)(dense2)

    # Wide sparse layers with L1 regularization
    dense3 = layers.Dense(2048, activation='relu', kernel_regularizer=regularizers.l1(0.01))(dropout2)
    dropout3 = layers.Dropout(0.5)(dense2)
    
    dense4 = layers.Dense(64, activation='sigmoid', kernel_regularizer=regularizers.l1(0.01))(dropout3)
    dropout4 = layers.Dropout(0.5)(dense3)

    d5=layers.Dense(102377, activation='sigmoid')(dense4)
    d6=layers.Dense(219, activation='sigmoid')(d5)
    d7=layers.Dense(23456, activation='relu')(d6)
    d8=layers.Dense(129, activation='softmax')(d7)
    d9=layers.Dense(32, activation='sigmoid')(d8)

    output1 = layers.Dense(201, activation='softmax', name='output1')(d9)  # Adjust output dimension to match the range (150 - 50 + 1)
    output2 = layers.Dense(201, activation='softmax', name='output2')(d9)  # Adjust output dimension to match the range (150 - 50 + 1)

    model = Model(inputs=[input1, input2], outputs=[output1, output2])
    return model

In [139]:
def custom_loss(y_true, y_pred):
    # Reshape the input tensors if necessary
    y_true_reshaped = tf.reshape(y_true, (-1, 201))  # Assuming 101 is the number of classes
    y_pred_reshaped = tf.reshape(y_pred, (-1, 201))

    # Compute the argmax operation with the correct axis
    a_pred = tf.argmax(y_pred_reshaped[0], axis=1)
    b_pred = tf.argmax(y_pred_reshaped[1], axis=1)

    # Compute the argmax operation for the true labels as well
    a_true = tf.argmax(y_true_reshaped[0], axis=1)
    b_true = tf.argmax(y_true_reshaped[1], axis=1)

    # Compute the loss based on the difference between predicted and true values
    loss = tf.sqrt(tf.cast(tf.square(a_true - a_pred), dtype=tf.float32) + tf.cast(tf.square(b_true - b_pred), dtype=tf.float32))
    return loss

In [140]:
def train_model_2(matrixA_list, matrixB_list, output_tuple_list, epochs=10, batch_size=32):
    # Convert DataFrame to numpy arrays
    matrixA_array = np.array(matrixA_list)
    matrixB_array = np.array(matrixB_list)
    output_array = np.array(output_tuple_list)
    
    # Build the model
    model = build_regularized_model()

    # Compile the model
    model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])

    # Train the model
    model.fit([matrixA_array, matrixB_array], [output_array[:, 0], output_array[:, 1]], epochs=epochs, batch_size=batch_size)
    return model

In [141]:
m2 = train_model_2(matrix_A_train, matrix_B_train, y_train, 10, 64)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'custom_loss_1/ArgMax_2' defined at (most recent call last):
    File "c:\Users\mdona\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\mdona\anaconda3\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\mdona\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "c:\Users\mdona\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\mdona\anaconda3\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\mdona\anaconda3\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\mdona\anaconda3\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 390, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "c:\Users\mdona\anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\mdona\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
      result = self._run_cell(
    File "c:\Users\mdona\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
      return runner(coro)
    File "c:\Users\mdona\anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\mdona\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\mdona\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "c:\Users\mdona\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\mdona\AppData\Local\Temp\ipykernel_13084\1803753112.py", line 1, in <module>
      m2 = train_model_2(matrix_A_train, matrix_B_train, y_train, 10, 64)
    File "C:\Users\mdona\AppData\Local\Temp\ipykernel_13084\3458158607.py", line 14, in train_model_2
      model.fit([matrixA_array, matrixB_array], [output_array[:, 0], output_array[:, 1]], epochs=epochs, batch_size=batch_size)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\training.py", line 1024, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\training.py", line 1082, in compute_loss
      return self.compiled_loss(
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "c:\Users\mdona\anaconda3\lib\site-packages\keras\losses.py", line 284, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\mdona\AppData\Local\Temp\ipykernel_13084\4113577499.py", line 11, in custom_loss
      a_true = tf.argmax(y_true_reshaped[0], axis=1)
Node: 'custom_loss_1/ArgMax_2'
Expected dimension in the range [-1, 1), but got 1
	 [[{{node custom_loss_1/ArgMax_2}}]] [Op:__inference_train_function_238695]

In [126]:
a

<tf.Tensor 'IteratorGetNext:3' shape=(None, 201) dtype=float32>

In [111]:
predicted_scores_2 = []
for i in tqdm.tqdm(range(len(matrix_A_test))[:10]):
    # Convert test matrices to numpy arrays
    test_matrix_a_array = matrix_A_test[i].values.reshape(-1, 18, 18)
    test_matrix_b_array = matrix_B_test[i].values.reshape(-1, 18, 18)

    # Make predictions using the model
    predictions = m2.predict([test_matrix_a_array, test_matrix_b_array], verbose=0)

    predicted_score = (np.argmax(predictions[0]), np.argmax(predictions[1]))

    predicted_scores_2.append(predicted_score)

100%|██████████| 10/10 [00:00<00:00, 18.60it/s]


In [112]:
predicted_scores_2

[(76, 69),
 (76, 69),
 (76, 69),
 (76, 69),
 (76, 69),
 (76, 69),
 (76, 69),
 (76, 69),
 (76, 69),
 (76, 69)]