### Grabs the players data using the fetchplayerstats class
Enter the year you want in season and if you want Regualr Season or Playoffs in season_type

In [None]:
from NBAData.fetchPlayersStats import FetchPlayersStats

nba = FetchPlayersStats()
data = nba.getCompleteStats(season='2024-25', season_type='Playoffs', sleep_time=1, max_workers=5)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Assign features for regular season data

In [3]:
regular_season_files = {
    2021: 'CSV_FILES/PLAYOFF_DATA/PLAYOFFS_21_PTS_features.csv',
    2022: 'CSV_FILES/PLAYOFF_DATA/PLAYOFFS_22_PTS_features.csv',
    2023: 'CSV_FILES/PLAYOFF_DATA/PLAYOFFS_23_PTS_features.csv',
    2024: 'CSV_FILES/PLAYOFF_DATA/PLAYOFFS_24_PTS_features.csv',
}

star_players_by_year = {
    2021: [
        "Giannis Antetokounmpo", "Kawhi Leonard", "Nikola Jokic", "Stephen Curry", "Luka Doncic",
        "Julius Randle", "LeBron James", "Joel Embiid", "Damian Lillard", "Chris Paul",
        "Jimmy Butler", "Paul George", "Rudy Gobert", "Bradley Beal", "Kyrie Irving",
        "Devin Booker", "Mike Conley", "James Harden", "Zach LaVine", "Donovan Mitchell",
        "Nikola Vucevic", "Anthony Davis"
    ],
    2022: [
        "Giannis Antetokounmpo", "Luka Doncic", "Jayson Tatum", "Nikola Jokic", "Devin Booker",
        "Ja Morant", "Stephen Curry", "DeMar DeRozan", "Kevin Durant", "Joel Embiid",
        "LeBron James", "Chris Paul", "Trae Young", "Pascal Siakam", "Karl-Anthony Towns",
        "Andrew Wiggins", "Donovan Mitchell", "Rudy Gobert", "Zach LaVine", "Khris Middleton",
        "Jimmy Butler", "Darius Garland", "Fred VanVleet", "LaMelo Ball"
    ],
    2023: [
        "Giannis Antetokounmpo", "Jayson Tatum", "Joel Embiid", "Shai Gilgeous-Alexander", "Luka Doncic",
        "Jaylen Brown", "Jimmy Butler", "Nikola Jokic", "Stephen Curry", "Donovan Mitchell",
        "LeBron James", "Julius Randle", "Domantas Sabonis", "De'Aaron Fox", "Damian Lillard",
        "Kyrie Irving", "Zion Williamson", "Kevin Durant", "Ja Morant", "DeMar DeRozan",
        "Tyrese Haliburton", "Jrue Holiday", "Bam Adebayo", "Jaren Jackson Jr.", "Paul George",
        "Pascal Siakam", "Anthony Edwards"
    ],
    2024: [
        "Shai Gilgeous-Alexander", "Luka Doncic", "Jayson Tatum", "Giannis Antetokounmpo", "Nikola Jokic",
        "Jalen Brunson", "Anthony Edwards", "Kawhi Leonard", "Kevin Durant", "Anthony Davis",
        "Stephen Curry", "Devin Booker", "LeBron James", "Domantas Sabonis", "Bam Adebayo",
        "Tyrese Haliburton", "Damian Lillard", "Karl-Anthony Towns", "Jaylen Brown",
        "Trae Young", "Paolo Banchero", "Scottie Barnes"
    ],
    2025: [
        'Shai Gilgeous-Alexander', 'Nikola Jokić', 'Giannis Antetokounmpo', 'Jayson Tatum', 'Donovan Mitchell',
        'Anthony Edwards', 'LeBron James', 'Stephen Curry', 'Evan Mobley', 'Jalen Brunson',
        'Cade Cunningham', 'Karl-Anthony Towns', 'Tyrese Haliburton', 'Jalen Williams', 'James Harden',
        'Darius Garland', 'Damian Lillard', 'Anthony Davis', 'Kyrie Irving', 'Jaylen Brown', 'Tyler Herro', 'Jaren Jackson Jr.', 
        'Pascal Siakam', 'Victor Wembanyama', 'Alperen Sengun', 'Trae Young'
]}

In [None]:
from NBAData.features import *
from NBAData.playerPositions import *
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
def process_season_features(season_df, prop_type, year, star_players):
    """
    Process features for a specific season and prop type
    """
    # Basic preprocessing that's common for all props
    df = season_df.copy()
    df['IS_PLAYOFF'] = 0
    df['STARTING'] = df['START_POSITION'].apply(lambda x: 1 if x in ['G','F','C'] else 0)
    df['OPP_TEAM_ID'] = df['OPP_TEAM_ID'].astype(int)
    
    # Add position data
    df = assign_position_with_cache(
        df, 
        cache_file='playerInfo.csv',
        max_workers=4, 
        delay_between_requests=1.5
    )
    
    # Common features for all props
    df = add_rest_day_features(df)
    df = encode_teams(df)
    
    # Prop-specific features
    df = statAgainstTeam(df, player_id_col='PLAYER_ID', opp_col='OPP_ABBREVIATION', stat_line=prop_type)
    df = rollingAverages(df, [2, 4, 6], stat_line=prop_type)
    df = HomeAwayAverages(df, stat_line=prop_type)
    df = addLagFeatures(df, stat_line=prop_type)
    df = add_all_opponent_features(df, stat_line=prop_type)
    df = allLineupFeatures(df, star_players)
    
    # Clean up any unwanted columns
    if 'Unnamed: 0' in df.columns:
        df.drop(columns=['Unnamed: 0'], inplace=True)
    if 'Unnamed: 0.1' in df.columns:
        df.drop(columns=['Unnamed: 0.1'], inplace=True)
    return df

# Usage example:
prop_types = ['PTS','AST', 'REB']
seasons = [2021, 2022, 2023, 2024, 2025]

for prop in prop_types:
    for year in seasons:
        # Read raw season data
        raw_data = pd.read_csv(f'CSV_FILES/REGULAR_DATA/S{str(year)[-2:]}.csv')
        
        # Process features
        processed_data = process_season_features(
            raw_data, 
            prop_type=prop,
            year=year,
            star_players=star_players_by_year[year]
        )
        
        # Save processed data
        processed_data.to_csv(f'CSV_FILES/REGULAR_DATA/{prop}_{str(year)[-2:]}.csv')

## Assign Features for playoff data

In [None]:
from NBAData.features import *
from NBAData.playerPositions import *
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
def process_season_features(season_df, prop_type, year, star_players):
    """
    Process features for a specific season and prop type
    """
    # Basic preprocessing that's common for all props
    df = season_df.copy()
    df['IS_PLAYOFF'] = 1
    df['STARTING'] = df['START_POSITION'].apply(lambda x: 1 if x in ['G','F','C'] else 0)
    
    # Add position data
    df = assign_position_with_cache(
        df, 
        cache_file='playerInfo.csv',
        max_workers=4, 
        delay_between_requests=1.5
    )
    
    # Common features for all props
    df = add_rest_day_features(df)
    df = encode_teams(df)
    
    # Prop-specific features
    df = statAgainstTeam(df, player_id_col='PLAYER_ID', opp_col='OPP_ABBREVIATION', stat_line=prop_type)
    df = rollingAverages(df, [2, 4, 6], stat_line=prop_type)
    df = HomeAwayAverages(df, stat_line=prop_type)
    df = addLagFeatures(df, stat_line=prop_type)
    df = add_all_opponent_features(df, stat_line=prop_type)
    df = assign_playoff_series_info(df)
    df = allLineupFeatures(df, star_players)
    
    # Clean up any unwanted columns
    if 'Unnamed: 0' in df.columns:
        df.drop(columns=['Unnamed: 0'], inplace=True)
    if 'Unnamed: 0.1' in df.columns:
        df.drop(columns=['Unnamed: 0.1'], inplace=True)
        
    return df

# Usage example:
prop_types = ['PTS','AST', 'REB']
seasons = [2021, 2022, 2023, 2024, 2025]

for prop in prop_types:
    for year in seasons:
        # Read raw season data
        raw_data = pd.read_csv(f'CSV_FILES/PLAYOFF_DATA/P{str(year)[-2:]}.csv')
        
        # Process features
        processed_data = process_season_features(
            raw_data, 
            prop_type=prop,
            year=year,
            star_players=star_players_by_year[year]
        )
        
        # Save processed data
        processed_data.to_csv(f'CSV_FILES/PLAYOFF_DATA/{prop}_{str(year)[-2:]}.csv')

## concat all seasons

In [16]:
s21 = pd.read_csv('CSV_FILES/REGULAR_DATA/PTS_21.csv')
res = [1,2]
def get_defense_averages(df, stat='PTS'):
    strong = df[df['DEF_CATEGORY'] == 1][stat].mean()
    weak = df[df['DEF_CATEGORY'] == 0][stat].mean()
    diff = strong - weak
    return [strong, weak, diff]
res.extend(get_defense_averages(s21))
res

[1,
 2,
 np.float64(10.333730001322227),
 np.float64(10.591656950382173),
 np.float64(-0.2579269490599465)]