<a href="https://colab.research.google.com/github/nathanij/atpPredictor/blob/main/Model_Building.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Setup

In [None]:
!pip install --upgrade tables
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import datetime
import h5py
from google.colab import drive
import keras
from keras.utils import np_utils
drive.mount('/content/drive')

Use processed data

In [2]:
data = pd.read_hdf('/content/drive/MyDrive/tennisData/finished(key=a).h5', key = 'a')

Delete definitely unwanted columns

In [3]:
drops = ['ind', 'tourney_id']
for d in drops:
  data = data.drop(d, axis = 1)
plys = ['p0_', 'p1_']
pDrops = ['id']
for p in plys:
  for d in pDrops:
    data = data.drop(p+d, axis = 1)

Filter out whatever subset you want to train on

In [4]:
data = data[data.tourney_date >= 20080000] #cut out first 3 years of data

Drop Qualifiers

In [5]:
drops = ['tourney_date']
for d in drops:
  data = data.drop(d, axis = 1)
plys = ['p0_', 'p1_']
pDrops = []
for p in plys:
  for d in pDrops:
    data = data.drop(p+d, axis = 1)

Seperate training and validation

In [6]:
data = data.sample(frac=1).reset_index(drop=True) #shuffles rows
train = data.copy()
valid = train.pop('winner')
np.set_printoptions(precision=3, suppress=True)
train = np.asarray(train).astype(np.float32)
x_val = train[-7000:]
y_val = valid[-7000:]
x_train = train[:-7000]
y_train = valid[:-7000]
y_val = np_utils.to_categorical(y_val, 2)
y_train = np_utils.to_categorical(y_train, 2)

Set up checkpoint to save best model while training

In [7]:
acc_filepath = '/content/bestAccModel'
loss_filepath = '/content/bestLossModel'
acc_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=acc_filepath,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)
loss_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=loss_filepath,
    save_weights_only=False,
    monitor='val_loss',
    mode='min',
    save_best_only=True)
early = tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience=50)

In [8]:
model = tf.keras.Sequential([
  layers.Dense(46, activation='relu', kernel_regularizer=keras.regularizers.l2(l=0.1)),
  layers.Dense(2048, activation='relu', kernel_regularizer=keras.regularizers.l2(l=0.1)),
  layers.Dropout(0.1),                        
  layers.Dense(1024, activation='relu', kernel_regularizer=keras.regularizers.l2(l=0.1)),
  layers.Dropout(0.1),
  layers.Dense(512, activation = 'relu', kernel_regularizer=keras.regularizers.l2(l=0.1)),
  layers.Dropout(0.1),
  layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(l=0.1)),
  layers.Dropout(0.1),
  layers.Dense(2, activation = 'softmax')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Train the model

In [None]:
model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size = 128, epochs = 1000, shuffle=True, callbacks = [acc_callback, loss_callback, early])

In [10]:
accModel = keras.models.load_model('/content/bestAccModel')
lossModel = keras.models.load_model('/content/bestLossModel')

In [None]:
accModel.evaluate(x_val, y_val)

In [None]:
lossModel.evaluate(x_val, y_val)

In [None]:
accModel.save('.6783acc') #save
!cp -r .6783acc /content/drive/MyDrive/Models
lossModel.save('.6427loss')
!cp -r .6427loss /content/drive/MyDrive/Models