In [None]:
# install required materials
!pip install --upgrade tensorflow
!pip install --upgrade keras


In [23]:
# importing statements
import pandas as pd
import sklearn
import keras
from keras import layers
from keras import ops
import tensorflow as tf
from sklearn.model_selection import train_test_split



# define Sequential model with 3 layers
model = keras.Sequential(
    [
        layers.Dense(245, activation="relu", name="layer1"),
        layers.Dense(64, activation="relu", name="layer2"),
        # removing additional layers because risk of overfitting was higher with additional layers, and sticking with 128 neurons to learn more complext features from data
        layers.Dense(1, name="layer3"),


    ]
)

# load Team Results dataset into a dataframe
data = pd.read_csv('Team Results.csv', header=0)

print(data.columns)

# clean data
columns_to_remove = ['TEAM ID', 'TEAM','WIN%']  # Specify the columns you want to remove
X = data.drop(columns=columns_to_remove)
y = data["WIN%"]

# data processing, convert % string values to float values
X['F4%']= X['F4%'].apply(lambda x: float(x.rstrip('%')) / 100.0)

X['CHAMP%'] = X['CHAMP%'].apply(lambda x: float(x.rstrip('%')) / 100.0)

# Train test split

# split the data into training and testing sets
random_seed = 0
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_seed)

from sklearn.preprocessing import StandardScaler

# data standardization
scaler = StandardScaler()

# norm data
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test) # don't use "fit_transform" on the test partition

# compile model
model.compile(loss="mse", optimizer="adam")

# train model
model.fit(X_train, y_train, epochs=10)

# call model for y_pred results
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)

# print(y_pred.shape)
# print(y_test.shape)

from sklearn.metrics import mean_absolute_error, r2_score

# evaluate model
print("Train MSE: ", mean_squared_error(y_pred_train, y_train))
print("Test MSE: ", mean_squared_error(y_pred_test, y_test))
print("Train R^2: ", r2_score(y_pred_train, y_train))
print("Test R^2: ", r2_score(y_pred_test, y_test))
print("Train MAE: ", mean_absolute_error(y_pred_train, y_train))
print("Test MAE: ", mean_absolute_error(y_pred_test, y_test))


Index(['TEAM ID', 'TEAM', 'PAKE', 'PAKE RANK', 'PASE', 'PASE RANK', 'GAMES',
       'W', 'L', 'WIN%', 'R64', 'R32', 'S16', 'E8', 'F4', 'F2', 'CHAMP',
       'TOP2', 'F4%', 'CHAMP%'],
      dtype='object')
Epoch 1/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1654
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1001 
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0303 
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0445 
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0183 
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0107 
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0103 
Epoch 8/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0082 
Epoch 9/10


In [29]:
# extract the embeddings of each TEAM
team_embeddings = model.get_layer("layer2").get_weights()[0]

# create a TSV file of these embeddings
with open("team_embeddings.tsv", "w") as f:
    for team_name, team_embedding in zip(data["TEAM"], team_embeddings):
        embedding_str = "\t".join([str(x) for x in team_embedding])
        f.write(f"{embedding_str}\n")

with open("team_metadata.tsv", "w") as f:
    f.write("TEAM\tPAKE\tPASE\tWIN%\n")
    # write the metadata with corresponding values of TEAM, PAKE, PASE, and WIN%
    for i in range(len(data)):
        team_name = data.iloc[i]["TEAM"]
        pake_value = data.iloc[i]["PAKE"]
        pase_value = data.iloc[i]["PASE"]
        win_percentage = data.iloc[i]["WIN%"]
        f.write(f"{team_name}\t{pake_value}\t{pase_value}\t{win_percentage}\n")


In [25]:
team_embeddings.shape

(245, 64)