In [None]:
# import dependencies here
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# pull in csv file and read into a pandas data frame and clean data frame
players_df = pd.read_csv("Resources/player_data.csv")
clean_df = players_df.loc[(players_df["stolen_bases"] != 'no') & (players_df["batting_average"] != 'no')]
clean_df.head()

In [None]:
# convert HOF column to yes/no format
replaced_df = clean_df.replace(to_replace=["P", "C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"], value=["yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes"])

In [None]:
# Store x and y variables
X = replaced_df[[["at_bats", "runs", "hits", "runs_batted_in",  "bases_on_balls", "doubles", "triples", "home_runs", "stolen_bases"]]]

encoded_y = pd.get_dummies(data[["hall_of_fame"]])
clean_y = encoded_y[["hall_of_fame_yes"]]
y = clean_y.rename(columns={"hall_of_fame_yes":"hall_of_fame"})

In [None]:
# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y)

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
# Scale/normalize data using StandardScaler
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

In [None]:
# Create a sequential model
model = Sequential()

In [None]:
# Add the first layer of neural network
number_inputs = 9
number_hidden_nodes = 4
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

In [None]:
# Add the output layer
number_classes = 2
model.add(Dense(units=number_classes, activation='softmax'))

model.summary()

In [None]:
# Compile the model
recall = tf.keras.metrics.Recall()
precision = tf.keras.metrics.Precision()
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy', recall, precision])

In [None]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=200,
    shuffle=True,
    verbose=2
)

In [None]:
# Evaluate the model using the testing data
y_predict = model.predict(X_test)
y_predict

In [None]:
replaced_df.concat(objs=y_predict, axis=1)