In [4]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, ndcg_score

# Set seeds for reproducibility
seed = 42
os.environ['TF_DETERMINISTIC_OPS'] = '1'
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Load and preprocess data
data = pd.read_csv('2024_2025_season.csv')  # Replace with actual file name

# Calculate the fantasy score, now scaled by the number of games played (GP)
data['fantasy_score'] = (
    (2 * data['FGM'] - data['FGA'] + data['FTM'] - data['FTA'] +
    data['3P'] + data['PPG'] + 2 * data['APG'] + data['RPG'] +
    4 * (data['STL'] + data['BLK']) - 2 * data['TOV']) * data['GP']
)

# Normalize features
features = ['FGM', 'FGA', 'FTM', 'FTA', '3P', 'PPG', 'APG', 'RPG', 'STL', 'BLK', 'TOV', 'GP']
data[features] = (data[features] - data[features].mean()) / data[features].std()

# Group data for listwise ranking (grouped by 'Team' for example)
grouped_data = data.groupby('Team')
X, y = [], []
for _, group in grouped_data:
    X.append(group[features].values)
    y.append(group['fantasy_score'].values)

# Pad sequences for TensorFlow model
max_group_size = max(len(group) for group in X)
X_padded = tf.keras.preprocessing.sequence.pad_sequences(
    X, maxlen=max_group_size, padding='post', dtype='float32'
)
y_padded = tf.keras.preprocessing.sequence.pad_sequences(
    y, maxlen=max_group_size, padding='post', dtype='float32'
)

# Prepare TensorFlow dataset
def prepare_dataset(X, y):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.shuffle(buffer_size=100, seed=seed).batch(32)  # Fixed seed for shuffling
    return dataset

train_dataset = prepare_dataset(X_padded, y_padded)

# Define a neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(max_group_size, len(features))),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output relevance score
])

# Custom listwise loss function
def listwise_loss(y_true, y_pred):
    y_pred = tf.nn.softmax(y_pred, axis=1)
    y_true = tf.nn.softmax(y_true, axis=1)
    return -tf.reduce_sum(y_true * tf.math.log(y_pred + 1e-9))

# Compile model with AdamW optimizer
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=1e-4),
    loss=listwise_loss,
    metrics=['accuracy']
)

# Train the model
model.fit(train_dataset, epochs=10)

# Predict rankings using the trained model
predictions = model.predict(X_padded)
predictions_flat = [pred[:len(group)] for pred, group in zip(predictions, X)]

# Flatten and align predictions with the dataset
predicted_scores_flat = np.concatenate(predictions_flat).reshape(-1, 1)
true_fantasy_scores = data['fantasy_score'].values.reshape(-1, 1)

# Split data into training and testing for Random Forest
X_train, X_test, y_train, y_test = train_test_split(
    predicted_scores_flat, true_fantasy_scores, test_size=0.2, random_state=seed  # Fixed seed
)

# Hyperparameter tuning for Random Forest
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

rf_regressor = RandomForestRegressor(random_state=seed)
grid_search = GridSearchCV(rf_regressor, param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train.ravel())
best_rf = grid_search.best_estimator_

# Use the best model to predict fantasy scores
mapped_fantasy_points = best_rf.predict(predicted_scores_flat)
data['predicted_fantasy_points'] = mapped_fantasy_points

# Evaluate the model
mae = mean_absolute_error(y_test, best_rf.predict(X_test))
ndcg = ndcg_score([y_test.ravel()], [best_rf.predict(X_test)])
print(f"Mean Absolute Error: {mae}")
print(f"NDCG Score: {ndcg}")

# Sort and print all players by predicted fantasy points
ranked_players = data.sort_values(by='predicted_fantasy_points', ascending=False)
all_players = ranked_players[['Name', 'Team', 'Position', 'predicted_fantasy_points']]

print("Full List of Fantasy Basketball Players (Mapped Fantasy Points:")
print(all_players.to_string(index=False))


Epoch 1/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 564ms/step - accuracy: 0.0000e+00 - loss: 48.0090 - learning_rate: 0.0010
Epoch 2/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 48.0804 - learning_rate: 0.0010
Epoch 3/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0000e+00 - loss: 47.6962 - learning_rate: 0.0010
Epoch 4/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 47.8336 - learning_rate: 0.0010
Epoch 5/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 47.6755 - learning_rate: 0.0010
Epoch 6/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 47.5185 - learning_rate: 0.0010
Epoch 7/82
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 46.9231 - learn