In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Step 1: Load and preprocess the data
# Adjust the file path according to your environment
data = pd.read_excel('nba_team_data.xlsx')

# Dropping non-predictive columns
predictive_columns = data.drop(columns=['index','index_opp', 'SEASON_YEAR','SEASON_YEAR_opp','TEAM_ID','TEAM_ID_opp', 'TEAM_ABBREVIATION',
                                        'TEAM_ABBREVIATION_opp','TEAM_NAME','TEAM_NAME_opp', 'GAME_ID','GAME_ID_opp', 'GAME_DATE','GAME_DATE_opp',
                                        'MATCHUP','MATCHUP_opp', 'WL', 'WL_opp'])

# Encoding the Win/Loss column as 0s and 1s
wl_encoder = LabelEncoder()
target = wl_encoder.fit_transform(data['WL'])

# Step 2: Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(predictive_columns, target, test_size=0.2, random_state=42)

# Step 3: Building the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Step 4: Compiling the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Training the model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2, verbose=1)

# Step 5: Evaluating the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Model summary
model.summary()

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8958 - loss: 0.4400 - val_accuracy: 0.9804 - val_loss: 0.0424
Epoch 2/10
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9916 - loss: 0.0289 - val_accuracy: 0.9856 - val_loss: 0.0276
Epoch 3/10
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9951 - loss: 0.0170 - val_accuracy: 0.9993 - val_loss: 0.0089
Epoch 4/10
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9977 - loss: 0.0107 - val_accuracy: 0.9960 - val_loss: 0.0116
Epoch 5/10
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9935 - loss: 0.0153 - val_accuracy: 1.0000 - val_loss: 0.0030
Epoch 6/10
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9997 - loss: 0.0041 - val_accuracy: 0.9974 - val_loss: 0.0099
Epoch 7/10
[1m265/265[0m [32m━━━━━━━

In [26]:
# Convert 'WL' to numeric (1 for 'W', 0 for 'L')
data_sorted['WL_numeric'] = data_sorted['WL'].map({'W': 1, 'L': 0})

# Ensure data_sorted has a reset index if it doesn't already have a simple integer index
data_sorted.reset_index(drop=True, inplace=True)

# Calculate rolling stats
rolling_stats = data_sorted.groupby(['TEAM_NAME', 'SEASON_YEAR']).rolling(window=15, min_periods=15).agg({
    'PTS': 'mean',  # Average points scored
    'PTS_opp': 'mean',  # Average points allowed
    'WL_numeric': 'mean',  # Mean of WL_numeric gives the win ratio
    'FGM': 'mean'  # Average Field Goals Made
}).shift(1)  # Shift to not include the current game

# Reset the index of rolling_stats to turn the multi-index into columns,
# this operation will make 'TEAM_NAME' and 'SEASON_YEAR' regular columns
# and keep the rolling window index aligned with data_sorted's index
rolling_stats.reset_index(inplace=True)

# Rename the calculated columns in rolling_stats to avoid name overlap
rolling_stats = rolling_stats.rename(columns={
    'PTS': 'PTS_rolling',
    'PTS_opp': 'PTS_opp_rolling',
    'WL_numeric': 'Win_Ratio_rolling',
    'FGM': 'FGM_rolling'
})

# Since we're joining based on the index and the 'TEAM_NAME' and 'SEASON_YEAR' columns,
# ensure these columns exist in both DataFrames and are of compatible types
# Now join rolling_stats back to data_sorted using the common keys and index
data_with_rolling_stats = pd.merge(data_sorted, rolling_stats, left_on=['TEAM_NAME', 'SEASON_YEAR', data_sorted.index], right_on=['TEAM_NAME', 'SEASON_YEAR', 'level_2'], how='left')

# Drop unnecessary columns resulting from the merge, if any
data_with_rolling_stats.drop(columns=['level_2'], inplace=True)

# Filter to rows where rolling stats are available (i.e., not NaN)
evaluated_data = data_with_rolling_stats.dropna(subset=['PTS_rolling'])

# Select columns to display, adjust as needed
display_columns = ['TEAM_NAME', 'SEASON_YEAR', 'GAME_DATE', 'PTS', 'PTS_opp', 'WL', 'PTS_rolling', 'PTS_opp_rolling', 'Win_Ratio_rolling', 'FGM_rolling']
evaluated_data[display_columns].head()


Unnamed: 0,TEAM_NAME,SEASON_YEAR,GAME_DATE,PTS,PTS_opp,WL,PTS_rolling,PTS_opp_rolling,Win_Ratio_rolling,FGM_rolling
15,Atlanta Hawks,2012-13,2012-12-07,104,95,W,97.0,94.2,0.666667,37.6
16,Atlanta Hawks,2012-13,2012-12-08,93,83,W,97.133333,93.266667,0.733333,37.666667
17,Atlanta Hawks,2012-13,2012-12-10,92,101,L,96.4,92.466667,0.733333,37.2
18,Atlanta Hawks,2012-13,2012-12-12,86,80,W,96.6,93.466667,0.666667,36.8
19,Atlanta Hawks,2012-13,2012-12-13,113,90,W,96.4,92.466667,0.733333,36.866667


In [27]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Assuming 'evaluated_data' is your DataFrame with rolling stats and 'WL' outcomes

# Feature Selection: Selecting rolling stats as features
features = ['PTS_rolling', 'PTS_opp_rolling', 'Win_Ratio_rolling']
X = evaluated_data[features]

# Target Preparation: Encoding the 'WL' column
y = LabelEncoder().fit_transform(evaluated_data['WL'])

# Data Splitting: Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data Scaling: Optional but recommended for many models
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Building: Using a Random Forest for classification
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Training the model
model.fit(X_train_scaled, y_train)

# Making predictions
predictions = model.predict(X_test_scaled)

# Evaluating the model
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 53.87%
