In [None]:
!pip install keras-tuner
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, callbacks
from sklearn.model_selection import train_test_split
import keras_tuner as kt

df = pd.read_csv('/content/drive/MyDrive/STAT551/2022 MLB Player Stats - Batting_colab.csv')
df = df.drop(['Name'], axis=1)
Y = df['Allstar_batter'].values
X = df.drop(['Allstar_batter'], axis=1)
X = X.astype('float32')
Y = Y.astype('float32')
train_features, test_features, train_targets, test_targets = train_test_split(
    X, Y, train_size=0.8, test_size=0.2, stratify=Y)

def build_model(hp):
    model = tf.keras.Sequential()
    model.add(layers.Dense(units=hp.Int('units_input', min_value=10, max_value=30, step=5), activation='relu', input_dim=len(train_features.columns)))
    model.add(layers.Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)))

    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i), min_value=10, max_value=30, step=5), activation='relu'))
        model.add(layers.Dropout(hp.Float('dropout_' + str(i), min_value=0.1, max_value=0.5, step=0.1)))

    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['Precision', 'Accuracy'])

    return model

tuner = kt.BayesianOptimization(
    build_model,
    objective='Precision',
    max_trials=5,
    executions_per_trial=5,
    overwrite = True,
    directory='/content/drive/MyDrive/STAT551/tuner',
    project_name='baseball_tune2'
)

stop_early = callbacks.EarlyStopping(monitor='Precision', patience=5)

tuner.search(train_features, train_targets, epochs=10, validation_split=0.2, batch_size=4, callbacks=[stop_early])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model
evaluation = best_model.evaluate(test_features, test_targets)
print(np.mean(evaluation[0]))
print(np.mean(evaluation[1]))






Collecting keras-tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/128.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m112.6/128.9 kB[0m [31m3.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
20                |20                |units_input
0.4               |0.4               |dropout
3                 |3                 |num_layers
25                |25                |units_0
0.5               |0.5               |dropout_0
0.001           



Epoch 2/10



Epoch 3/10



Epoch 4/10



Epoch 5/10



Epoch 6/10



Epoch 7/10

KeyboardInterrupt: ignored

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, callbacks
from sklearn.model_selection import train_test_split
import keras_tuner as kt

df = pd.read_csv('/content/drive/MyDrive/STAT551/2022 MLB Player Stats - Pitching_colab.csv')
df = df.drop(['Name'], axis=1)
Y = df['Allstar_pitcher'].values
X = df.drop(['Allstar_pitcher'], axis=1)
X = X.astype('float32')
Y = Y.astype('float32')
train_features, test_features, train_targets, test_targets = train_test_split(
    X, Y, train_size=0.8, test_size=0.2, stratify=Y)

def build_model(hp):
    model = tf.keras.Sequential()
    model.add(layers.Dense(units=hp.Int('units_input', min_value=10, max_value=30, step=5), activation='relu', input_dim=len(train_features.columns)))
    model.add(layers.Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)))

    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i), min_value=10, max_value=30, step=5), activation='relu'))
        model.add(layers.Dropout(hp.Float('dropout_' + str(i), min_value=0.1, max_value=0.5, step=0.1)))

    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['Precision', 'Accuracy'])

    return model

tuner = kt.BayesianOptimization(
    build_model,
    objective='Precision',
    max_trials=5,
    executions_per_trial=5,
    overwrite = True,
    directory='/content/drive/MyDrive/STAT551/tuner',
    project_name='baseball_tune3'
)

stop_early = callbacks.EarlyStopping(monitor='Precision', patience=5)

tuner.search(train_features, train_targets, epochs=10, validation_split=0.2, batch_size=4, callbacks=[stop_early])

best_model = tuner.get_best_models(num_models=1)[0]

evaluation = best_model.evaluate(test_features, test_targets)
print(np.mean(evaluation[0]))
print(np.mean(evaluation[1]))



Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
30                |30                |units_input
0.4               |0.4               |dropout
1                 |1                 |num_layers
25                |25                |units_0
0.2               |0.2               |dropout_0
0.001             |0.001             |learning_rate

Epoch 1/10



Epoch 2/10



Epoch 3/10



Epoch 4/10
 29/139 [=====>........................] - ETA: 0s - loss: 2.0068 - precision: 0.0714 - Accuracy: 0.8362    

KeyboardInterrupt: ignored