In [3]:
# Bringing in the data

import pandas as pd

# Read the CSV file into a DataFrame
df_mm = pd.read_csv("C:/Users/hagen/Downloads/NBA DATA/Teams/Transformed Data/mm_data.csv")

df_p = pd.read_csv("C:/Users/hagen/Downloads/NBA DATA/Teams/Transformed Data/p_data.csv")

In [5]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor

def predict_nba_champion(df, model, selected_features):
    """
    Function to predict the top 5 NBA teams for the upcoming season using Logistic Regression.
    Trains the model on the years 2019-2023 using a range.
    """
    # Train the model using data from 2019-2023
    train_data = df[df['Year'].between(2019, 2023)]  # Use 2019-2023 data for training
    X_train = train_data[selected_features]
    y_train = train_data['Levels']

    # Print VIF if multiple features
    if len(selected_features) > 1:
        vif_data = pd.DataFrame({
            "Feature": selected_features,
            "VIF": [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
        })
        print(f"VIF for {selected_features}:\n{vif_data}\n")
    
    model.fit(X_train, y_train)

    # Get data for the upcoming season
    test_data = df[df['Year'] == 2024]  # Get the data for the upcoming season
    X_test = test_data[selected_features]

    # Make predictions for the upcoming season
    y_pred = model.predict_proba(X_test)

    # Rank teams for the upcoming season based on predicted probabilities
    ranked_teams = test_data[['Team']].copy()
    ranked_teams['Score'] = y_pred[:, 4]
    ranked_teams = ranked_teams.sort_values(by='Score', ascending=False)

    # Get the top 5 teams and their scores
    top_teams = ranked_teams.head(5)
    top_teams_list = top_teams.apply(lambda row: f"{row['Team']}: {row['Score']:.4f}", axis=1).tolist()
    
    # Print the top 5 predictions
    print(f"\nTop 5 predictions for the upcoming NBA season:")
    for i, team in enumerate(top_teams_list, 1):
        print(f"{i}. {team}")

In [6]:
model = LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=1000)

selected_features = ['W', 'eFG%']

In [18]:
import warnings
from datetime import datetime

# Mute the specific FutureWarning for 'multi_class' deprecation in LogisticRegression
warnings.filterwarnings("ignore", category=FutureWarning, 
                        message=".*'multi_class' was deprecated.*")

def current_time_and_date():
    now = datetime.now()
    return now.strftime("%Y-%m-%d %H:%M:%S")  # Format: YYYY-MM-DD HH:MM:SS

predict_nba_champion(df_p, model, selected_features)

current_time_and_date()

VIF for ['W', 'eFG%']:
  Feature       VIF
0       W  6.949585
1    eFG%  6.949585


Top 5 predictions for the upcoming NBA season:
1. Cleveland Cavaliers: 0.0755
2. Denver Nuggets: 0.0710
3. Indiana Pacers: 0.0662
4. Boston Celtics: 0.0653
5. Oklahoma City Thunder: 0.0626


'2025-04-14 16:06:30'