<div class="alert alert-danger">
    <h4 style="font-weight: bold; font-size: 28px;">Feature Selection using Wrapper Methods</h4>
    <p style="font-size: 20px;">NBA API Data (2022-2024)</p>
</div>

<a name="Feature-Selection"></a>

# Table of Contents

[Setup](#Setup)

[Data](#Data)

**[1. Wrapper Methods for Total Points](#1.-Wrapper-Methods-for-Total-Points)**

**[2. Wrapper Methods for Plus Minus](#2.-Wrapper-Methods-for-Plus-Minus)**

**[3. Wrapper Methods for Game Winner](#3.-Wrapper-Methods-for-Game-Winner)**

# Setup

[Return to top](#Feature-Selection)

In [1]:
import sys
from pathlib import Path
# get current working directory
cwd = %pwd
# add shared_code directory to Python sys.path
sys.path.append(str(Path(cwd).parent / "shared_code"))
# import all libraries in shared_code directory 'imports.py' file
from imports import *
%matplotlib inline

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


# Data

[Return to top](#Feature-Selection)

In [2]:
# load, filter (by time) and scale data
pts_scaled_df, pm_scaled_df, res_scaled_df, test_set_obs = utl.load_and_scale_data(
    file_path='../../data/processed/nba_team_matchups_rolling_box_scores_2022_2024_r05.csv',
    seasons_to_keep=['2021-22', '2022-23', '2023-24'],
    training_season='2021-22',
    feature_prefix='ROLL_',
    scaler_type='minmax', 
    scale_target=False
)

Season 2021-22: 1186 games
Season 2022-23: 1181 games
Season 2023-24: 692 games
Total number of games across sampled seasons: 3059 games


<a name="1.-Wrapper-Methods-for-Total-Points"></a>
# 1. Wrapper Methods for Total Points

[Return to top](#Feature-Selection)

In [3]:
start_time = time.time()

# sequential feature selection
pts_selected_lr = utl.sequential_feature_selection(
    df=pts_scaled_df, 
    outcome_name='TOTAL_PTS', 
    estimator=LinearRegression()
)

end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

{
    "forward_selected": [
        "ROLL_HOME_PTS",
        "ROLL_HOME_FG3M",
        "ROLL_HOME_FTA",
        "ROLL_HOME_REB",
        "ROLL_HOME_AST",
        "ROLL_HOME_PF",
        "ROLL_AWAY_PTS",
        "ROLL_AWAY_FG3_PCT",
        "ROLL_AWAY_DREB"
    ],
    "backward_selected": [
        "ROLL_HOME_PTS",
        "ROLL_HOME_FG3A",
        "ROLL_HOME_FG3_PCT",
        "ROLL_HOME_REB",
        "ROLL_HOME_AST",
        "ROLL_HOME_STL",
        "ROLL_HOME_PF",
        "ROLL_AWAY_FGM",
        "ROLL_AWAY_FG_PCT",
        "ROLL_AWAY_FTA",
        "ROLL_AWAY_REB",
        "ROLL_AWAY_STL"
    ]
}
Total time taken: 5.90 seconds


In [4]:
start_time = time.time()

# sequential feature selection
pts_selected_rf = utl.sequential_feature_selection(
    df=pts_scaled_df, 
    outcome_name='TOTAL_PTS', 
    estimator=RandomForestRegressor(random_state=599, n_jobs=-1, max_depth=8, max_features=0.3)
)

end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

{
    "forward_selected": [
        "ROLL_HOME_FGM",
        "ROLL_HOME_FTA",
        "ROLL_HOME_STL",
        "ROLL_HOME_PF",
        "ROLL_AWAY_PTS",
        "ROLL_AWAY_FG3A",
        "ROLL_AWAY_DREB"
    ],
    "backward_selected": [
        "ROLL_HOME_FG3A",
        "ROLL_HOME_FTM",
        "ROLL_HOME_AST",
        "ROLL_HOME_STL",
        "ROLL_HOME_PF",
        "ROLL_AWAY_PTS"
    ]
}
Total time taken: 458.79 seconds


<a name="2.-Wrapper-Methods-for-Plus-Minus"></a>
# 2. Wrapper Methods for Plus Minus

[Return to top](#Feature-Selection)

In [5]:
start_time = time.time()

# sequential feature selection
pm_selected_lr = utl.sequential_feature_selection(
    df=pm_scaled_df, 
    outcome_name='PLUS_MINUS', 
    estimator=LinearRegression()
)

end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

{
    "forward_selected": [
        "ROLL_HOME_FGA",
        "ROLL_HOME_FG_PCT",
        "ROLL_HOME_FG3A",
        "ROLL_HOME_FTA",
        "ROLL_HOME_DREB",
        "ROLL_HOME_REB",
        "ROLL_AWAY_FGM",
        "ROLL_AWAY_FT_PCT",
        "ROLL_AWAY_AST"
    ],
    "backward_selected": [
        "ROLL_HOME_FGM",
        "ROLL_HOME_FGA",
        "ROLL_HOME_FG3A",
        "ROLL_HOME_FTM",
        "ROLL_HOME_FTA",
        "ROLL_HOME_FT_PCT",
        "ROLL_HOME_REB",
        "ROLL_AWAY_FGM",
        "ROLL_AWAY_FT_PCT"
    ]
}
Total time taken: 5.94 seconds


In [6]:
start_time = time.time()

# sequential feature selection
pm_selected_rf = utl.sequential_feature_selection(
    df=pm_scaled_df, 
    outcome_name='PLUS_MINUS', 
    estimator=RandomForestRegressor(random_state=599, n_jobs=-1, max_depth=8, max_features=0.3)
)

end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

{
    "forward_selected": [
        "ROLL_HOME_FG_PCT",
        "ROLL_HOME_FG3M",
        "ROLL_HOME_FG3A",
        "ROLL_HOME_FG3_PCT",
        "ROLL_HOME_FT_PCT",
        "ROLL_HOME_DREB",
        "ROLL_HOME_TOV",
        "ROLL_AWAY_PTS",
        "ROLL_AWAY_FGM",
        "ROLL_AWAY_FG3A",
        "ROLL_AWAY_FT_PCT",
        "ROLL_AWAY_AST",
        "ROLL_AWAY_BLK",
        "ROLL_AWAY_PF"
    ],
    "backward_selected": [
        "ROLL_HOME_FG_PCT",
        "ROLL_HOME_FT_PCT",
        "ROLL_HOME_DREB",
        "ROLL_HOME_BLK",
        "ROLL_HOME_TOV",
        "ROLL_AWAY_FG3A",
        "ROLL_AWAY_FT_PCT",
        "ROLL_AWAY_AST",
        "ROLL_AWAY_STL"
    ]
}
Total time taken: 473.23 seconds


<a name="3.-Wrapper-Methods-for-Game-Winner"></a>
# 3. Wrapper Methods for Game Winner

[Return to top](#Feature-Selection)

In [7]:
start_time = time.time()

# sequential feature selection
res_selected_lr = utl.sequential_feature_selection(
    df=res_scaled_df, 
    outcome_name='GAME_RESULT', 
    estimator=LinearRegression()
)

end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

{
    "forward_selected": [
        "ROLL_HOME_PTS"
    ],
    "backward_selected": [
        "ROLL_HOME_PTS",
        "ROLL_HOME_FGM",
        "ROLL_HOME_FGA",
        "ROLL_HOME_FG_PCT",
        "ROLL_HOME_FG3M",
        "ROLL_HOME_FG3A",
        "ROLL_HOME_FG3_PCT",
        "ROLL_HOME_FTM",
        "ROLL_HOME_FTA",
        "ROLL_HOME_FT_PCT",
        "ROLL_HOME_OREB",
        "ROLL_HOME_DREB",
        "ROLL_HOME_REB",
        "ROLL_HOME_AST",
        "ROLL_HOME_STL",
        "ROLL_HOME_BLK",
        "ROLL_HOME_TOV",
        "ROLL_HOME_PF",
        "ROLL_AWAY_PTS",
        "ROLL_AWAY_FGM",
        "ROLL_AWAY_FGA",
        "ROLL_AWAY_FG_PCT",
        "ROLL_AWAY_FG3M",
        "ROLL_AWAY_FG3A",
        "ROLL_AWAY_FG3_PCT",
        "ROLL_AWAY_FTM",
        "ROLL_AWAY_FTA",
        "ROLL_AWAY_FT_PCT",
        "ROLL_AWAY_OREB",
        "ROLL_AWAY_DREB",
        "ROLL_AWAY_REB",
        "ROLL_AWAY_AST",
        "ROLL_AWAY_STL",
        "ROLL_AWAY_BLK",
        "ROLL_AWAY_TOV",
        "ROL

In [8]:
start_time = time.time()

# sequential feature selection
res_selected_rf = utl.sequential_feature_selection(
    df=res_scaled_df, 
    outcome_name='GAME_RESULT', 
    estimator=RandomForestClassifier(random_state=599, n_jobs=-1, max_depth=8, max_features='sqrt')
)

end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

{
    "forward_selected": [
        "ROLL_HOME_FG3_PCT",
        "ROLL_HOME_DREB",
        "ROLL_HOME_AST",
        "ROLL_HOME_STL",
        "ROLL_HOME_TOV",
        "ROLL_AWAY_FGM",
        "ROLL_AWAY_FG_PCT",
        "ROLL_AWAY_AST"
    ],
    "backward_selected": [
        "ROLL_HOME_FGM",
        "ROLL_HOME_FG3A",
        "ROLL_HOME_AST",
        "ROLL_HOME_TOV",
        "ROLL_AWAY_FG_PCT",
        "ROLL_AWAY_FG3A",
        "ROLL_AWAY_AST",
        "ROLL_AWAY_STL",
        "ROLL_AWAY_TOV"
    ]
}
Total time taken: 590.41 seconds
