In [1]:
import pandas as pd
from nba_api.stats.endpoints import LeagueLeaders
import time
from tqdm import tqdm # Used for a progress bar

# --- Configuration ---
# Columns needed for the initial DataFrame and calculation
RAW_COLS = ['PLAYER_ID', 'PLAYER', 'GP', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
STAT_CATEGORIES = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
START_YEAR = 2000 # Start analysis with the 2000-01 season
END_YEAR = 2023 # End analysis with the 2023-24 season

# --- Helper Functions ---

def generate_season_pairs(start_year, end_year):
    """
    Generates a list of consecutive season pairs in 'YYYY-YY' format,
    iterating backward from the most recent to the oldest.
    """
    season_pairs = []
    # Loop backwards from 2023 down to 2001 (which pairs with 2000)
    for year_y in range(end_year, start_year, -1):
        # Season Y (e.g., '2023-24')
        season_y_str = f'{year_y}-{str(year_y+1)[2:]}'
        # Season X (e.g., '2022-23')
        year_x = year_y - 1
        season_x_str = f'{year_x}-{str(year_x+1)[2:]}'
        season_pairs.append((season_x_str, season_y_str))
    return season_pairs

def get_player_stats(season):
    """
    Fetches player stats for a single season, selects required columns,
    and calculates per-game averages.
    """
    # Defensive measure against rate limiting (important for long loops)
    time.sleep(1.5)
    
    try:
        # Request Totals to accurately calculate per-game averages
        leaders = LeagueLeaders(
            season=season,
            stat_category_abbreviation='MIN', # Use MIN to fetch a broad list
            scope='S',
            per_mode48='Totals'
        )
        df = leaders.get_data_frames()[0]
    except Exception:
        return pd.DataFrame()

    # Select required columns and ensure the player played at least one game
    df_clean = df[RAW_COLS].copy()
    df_clean = df_clean[df_clean['GP'] > 0]

    # Calculate Per-Game Stats
    for stat in STAT_CATEGORIES:
        if stat in ['GP']: continue
        # CRITICAL ASSUMPTION: 'PTS', 'REB', etc. are Totals, requiring division by GP
        df_clean[f'{stat}_pg'] = df_clean[stat] / df_clean['GP']
    
    # Define the full set of column names for selection
    raw_and_pg_cols = ['PLAYER_ID', 'PLAYER', 'GP'] + STAT_CATEGORIES + [f'{s}_pg' for s in STAT_CATEGORIES]
    
    # Filter to the generated list of columns
    return df_clean[[col for col in raw_and_pg_cols if col in df_clean.columns]].copy()


def analyze_season_pair(df_x, df_y, season_x, season_y):
    """
    Merges two seasonal DataFrames and applies the NEW breakout criteria:
    1. Improvement in 2+ stats > 1 StdDev above mean change.
    2. Improvement in 1+ stat > 2 StdDev above mean change.
    (Both require the raw stat to be in the top 20% to ensure quality).
    """
    # 1. Merge DataFrames
    df_merged = pd.merge(
        df_x.drop(columns=['PLAYER']), 
        df_y, 
        on='PLAYER_ID', 
        how='inner', 
        suffixes=('_x', '_y')
    )
    
    if df_merged.empty: return None

    # Get intersection of available stats
    stat_cols_available = [s for s in STAT_CATEGORIES if f'{s}_pg_x' in df_merged.columns and f'{s}_pg_y' in df_merged.columns]

    # 2. Calculate the difference (Change)
    for stat in stat_cols_available:
        df_merged[f'{stat}_DIFF'] = df_merged[f'{stat}_pg_y'] - df_merged[f'{stat}_pg_x']

    # 3. Calculate Thresholds
    # We need both 1 StdDev and 2 StdDev thresholds now
    means = {stat: df_merged[f'{stat}_DIFF'].mean() for stat in stat_cols_available}
    stds = {stat: df_merged[f'{stat}_DIFF'].std() for stat in stat_cols_available}
    
    thresh_1sd = {stat: means[stat] + stds[stat] for stat in stat_cols_available}
    thresh_2sd = {stat: means[stat] + (2 * stds[stat]) for stat in stat_cols_available}
    
    PERCENTILE_THRESHOLD = 0.80
    raw_stat_thresholds = {
        stat: df_merged[f'{stat}_pg_y'].quantile(PERCENTILE_THRESHOLD)
        for stat in stat_cols_available
    }

    # 4. Apply the NEW Two-Layer Filter
    # We will build boolean DataFrames to track which criteria are met for each stat
    meets_1sd_criteria = pd.DataFrame(index=df_merged.index)
    meets_2sd_criteria = pd.DataFrame(index=df_merged.index)

    for stat in stat_cols_available:
        # Base Condition: Must be in top 20% of raw stats (Quality Check)
        quality_mask = df_merged[f'{stat}_pg_y'] >= raw_stat_thresholds[stat]
        
        # Check 1 StdDev Change
        diff_mask_1sd = df_merged[f'{stat}_DIFF'] > thresh_1sd[stat]
        meets_1sd_criteria[stat] = diff_mask_1sd & quality_mask
        
        # Check 2 StdDev Change
        diff_mask_2sd = df_merged[f'{stat}_DIFF'] > thresh_2sd[stat]
        meets_2sd_criteria[stat] = diff_mask_2sd & quality_mask

    # LOGIC A: Change in TWO stats > 1 StdDev
    # .sum(axis=1) counts how many Trues are in the row (how many stats met the criteria)
    count_1sd_stats = meets_1sd_criteria.sum(axis=1)
    mask_condition_a = count_1sd_stats >= 2

    # LOGIC B: Change in ONE stat > 2 StdDev
    # .any(axis=1) returns True if at least one stat met the criteria
    mask_condition_b = meets_2sd_criteria.any(axis=1)

    # Combine Conditions (OR logic)
    overall_mask = mask_condition_a | mask_condition_b

    # 5. Filter the DataFrame
    filtered_df = df_merged[overall_mask].copy()
    
    if filtered_df.empty: return None

    # 6. Prepare Final Output Columns
    filtered_df['SEASON_X'] = season_x
    filtered_df['SEASON_Y'] = season_y
    
    FINAL_COLS_BASE = ['PLAYER', 'SEASON_X', 'SEASON_Y']
    raw_x_cols = [f'{s}_x' for s in stat_cols_available]
    pg_x_cols = [f'{s}_pg_x' for s in stat_cols_available]
    raw_y_cols = [f'{s}_y' for s in stat_cols_available]
    pg_y_cols = [f'{s}_pg_y' for s in stat_cols_available]
    diff_cols = [f'{s}_DIFF' for s in stat_cols_available]
    
    final_output_cols = FINAL_COLS_BASE + raw_x_cols + pg_x_cols + raw_y_cols + pg_y_cols + diff_cols
    
    return filtered_df[[col for col in final_output_cols if col in filtered_df.columns]]

# --- Main Execution ---
def run_historical_analysis():
    all_season_pairs = generate_season_pairs(START_YEAR, END_YEAR)
    final_results = []
    season_cache = {}

    # The loop will perform API calls for 23 pairs of seasons
    # tqdm is used to provide a progress bar for the long process
    for season_x_str, season_y_str in tqdm(all_season_pairs, desc="Processing Season Pairs"):
        
        # Fetch data for Season X, using cache
        if season_x_str not in season_cache:
            df_x = get_player_stats(season_x_str)
            if df_x.empty: continue
            season_cache[season_x_str] = df_x
        else:
            df_x = season_cache[season_x_str]

        # Fetch data for Season Y, using cache
        if season_y_str not in season_cache:
            df_y = get_player_stats(season_y_str)
            if df_y.empty: continue
            season_cache[season_y_str] = df_y
        else:
            df_y = season_cache[season_y_str]

        # Analyze the pair and collect results
        if not df_x.empty and not df_y.empty:
            filtered_results_df = analyze_season_pair(df_x, df_y, season_x_str, season_y_str)
            if filtered_results_df is not None:
                final_results.append(filtered_results_df)

    # Concatenate all results into a single Master DataFrame
    if final_results:
        master_df = pd.concat(final_results, ignore_index=True)
    else:
        master_df = pd.DataFrame()
    
    return master_df

# Call run_historical_analysis() to get the final DataFrame
# master_df = run_historical_analysis()

# master_df.to_csv('significant_player_gain_history.csv', index=False)

In [2]:
final_df = run_historical_analysis()
final_df

Processing Season Pairs: 100%|██████████| 23/23 [01:01<00:00,  2.69s/it]


Unnamed: 0,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,MIN_x,PTS_pg_x,...,AST_pg_y,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF
0,Scottie Barnes,2022-23,2023-24,1179,512,371,83,61,2678,15.311688,...,6.050000,1.250000,1.466667,34.900000,4.538312,1.583983,1.231818,0.172078,0.674459,0.120779
1,De'Aaron Fox,2022-23,2023-24,1826,306,447,83,23,2435,25.013699,...,5.648649,2.027027,0.418919,35.932432,1.553869,0.402814,-0.474639,0.890041,0.103850,2.576268
2,Donovan Mitchell,2022-23,2023-24,1922,289,301,99,27,2432,28.264706,...,6.054545,1.836364,0.545455,35.327273,-1.664706,0.840909,1.628075,0.380481,0.148396,-0.437433
3,Paolo Banchero,2022-23,2023-24,1437,497,269,60,39,2430,19.958333,...,5.387500,0.887500,0.587500,34.987500,2.591667,0.022222,1.651389,0.054167,0.045833,1.237500
4,Keegan Murray,2022-23,2023-24,976,371,98,61,42,2382,12.200000,...,1.675325,1.025974,0.766234,33.623377,3.020779,0.817045,0.450325,0.263474,0.241234,3.848377
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,Tyronn Lue,2000-01,2001-02,130,32,45,19,0,468,3.421053,...,3.464789,0.690141,0.000000,20.591549,4.395849,0.876205,2.280578,0.190141,0.000000,8.275760
1036,Adrian Griffin,2000-01,2001-02,93,87,27,18,5,377,2.113636,...,1.827586,1.293103,0.206897,23.913793,5.041536,1.971003,1.213950,0.884013,0.093260,15.345611
1037,Joel Przybilla,2000-01,2001-02,27,71,2,3,30,270,0.818182,...,0.295775,0.281690,1.661972,15.887324,1.857875,1.834400,0.235169,0.190781,0.752881,7.705506
1038,John Crotty,2000-01,2001-02,65,28,34,6,0,264,2.096774,...,3.439024,0.463415,0.024390,19.487805,4.830055,0.926042,2.342250,0.269866,0.024390,10.971676


In [3]:
final_df["breakout?"] = 1
final_df

Unnamed: 0,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,MIN_x,PTS_pg_x,...,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF,breakout?
0,Scottie Barnes,2022-23,2023-24,1179,512,371,83,61,2678,15.311688,...,1.250000,1.466667,34.900000,4.538312,1.583983,1.231818,0.172078,0.674459,0.120779,1
1,De'Aaron Fox,2022-23,2023-24,1826,306,447,83,23,2435,25.013699,...,2.027027,0.418919,35.932432,1.553869,0.402814,-0.474639,0.890041,0.103850,2.576268,1
2,Donovan Mitchell,2022-23,2023-24,1922,289,301,99,27,2432,28.264706,...,1.836364,0.545455,35.327273,-1.664706,0.840909,1.628075,0.380481,0.148396,-0.437433,1
3,Paolo Banchero,2022-23,2023-24,1437,497,269,60,39,2430,19.958333,...,0.887500,0.587500,34.987500,2.591667,0.022222,1.651389,0.054167,0.045833,1.237500,1
4,Keegan Murray,2022-23,2023-24,976,371,98,61,42,2382,12.200000,...,1.025974,0.766234,33.623377,3.020779,0.817045,0.450325,0.263474,0.241234,3.848377,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,Tyronn Lue,2000-01,2001-02,130,32,45,19,0,468,3.421053,...,0.690141,0.000000,20.591549,4.395849,0.876205,2.280578,0.190141,0.000000,8.275760,1
1036,Adrian Griffin,2000-01,2001-02,93,87,27,18,5,377,2.113636,...,1.293103,0.206897,23.913793,5.041536,1.971003,1.213950,0.884013,0.093260,15.345611,1
1037,Joel Przybilla,2000-01,2001-02,27,71,2,3,30,270,0.818182,...,0.281690,1.661972,15.887324,1.857875,1.834400,0.235169,0.190781,0.752881,7.705506,1
1038,John Crotty,2000-01,2001-02,65,28,34,6,0,264,2.096774,...,0.463415,0.024390,19.487805,4.830055,0.926042,2.342250,0.269866,0.024390,10.971676,1


In [4]:
import pandas as pd
from nba_api.stats.endpoints import LeagueLeaders
import time
from tqdm import tqdm
import sys
import io
RAW_COLS = ['PLAYER_ID', 'PLAYER', 'GP', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
STAT_CATEGORIES = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
START_YEAR = 2000 # Start analysis with the 2000-01 season
END_YEAR = 2023 # End analysis with the 2023-24 season

# --- Helper Functions ---

def generate_season_pairs(start_year, end_year):
    """
    Generates a list of consecutive season pairs in 'YYYY-YY' format,
    iterating backward from the most recent to the oldest.
    """
    season_pairs = []
    # Loop backwards from 2023 down to 2001 (which pairs with 2000)
    for year_y in range(end_year, start_year, -1):
        # Season Y (e.g., '2023-24')
        season_y_str = f'{year_y}-{str(year_y+1)[2:]}'
        # Season X (e.g., '2022-23')
        year_x = year_y - 1
        season_x_str = f'{year_x}-{str(year_x+1)[2:]}'
        season_pairs.append((season_x_str, season_y_str))
    return season_pairs

def get_player_stats(season):
    """
    Fetches player stats for a single season, selects required columns,
    and calculates per-game averages.
    """
    # Defensive measure against rate limiting (important for long loops)
    time.sleep(1.5)
    
    try:
        # Request Totals to accurately calculate per-game averages
        leaders = LeagueLeaders(
            season=season,
            stat_category_abbreviation='MIN', # Use MIN to fetch a broad list
            scope='S',
            per_mode48='Totals'
        )
        df = leaders.get_data_frames()[0]
    except Exception:
        return pd.DataFrame()

    # Select required columns and ensure the player played at least one game
    df_clean = df[RAW_COLS].copy()
    df_clean = df_clean[df_clean['GP'] > 0]

    # Calculate Per-Game Stats
    for stat in STAT_CATEGORIES:
        if stat in ['GP']: continue
        # CRITICAL ASSUMPTION: 'PTS', 'REB', etc. are Totals, requiring division by GP
        df_clean[f'{stat}_pg'] = df_clean[stat] / df_clean['GP']
    
    # Define the full set of column names for selection
    raw_and_pg_cols = ['PLAYER_ID', 'PLAYER', 'GP'] + STAT_CATEGORIES + [f'{s}_pg' for s in STAT_CATEGORIES]
    
    # Filter to the generated list of columns
    return df_clean[[col for col in raw_and_pg_cols if col in df_clean.columns]].copy()


# --- Core Analysis Function (Modified) ---

def analyze_non_qualifying_pair(df_x, df_y, season_x, season_y):
    """
    Inverse of analyze_season_pair. Returns players who did NOT meet the breakout criteria.
    """
    # 1. Merge
    df_merged = pd.merge(
        df_x.drop(columns=['PLAYER']), 
        df_y, 
        on='PLAYER_ID', 
        how='inner', 
        suffixes=('_x', '_y')
    )
    
    if df_merged.empty: return None

    stat_cols_available = [s for s in STAT_CATEGORIES if f'{s}_pg_x' in df_merged.columns and f'{s}_pg_y' in df_merged.columns]
    
    # 2. Diff
    for stat in stat_cols_available:
        df_merged[f'{stat}_DIFF'] = df_merged[f'{stat}_pg_y'] - df_merged[f'{stat}_pg_x']

    # 3. Thresholds
    means = {stat: df_merged[f'{stat}_DIFF'].mean() for stat in stat_cols_available}
    stds = {stat: df_merged[f'{stat}_DIFF'].std() for stat in stat_cols_available}
    thresh_1sd = {stat: means[stat] + stds[stat] for stat in stat_cols_available}
    thresh_2sd = {stat: means[stat] + (2 * stds[stat]) for stat in stat_cols_available}
    
    PERCENTILE_THRESHOLD = 0.80
    raw_stat_thresholds = {stat: df_merged[f'{stat}_pg_y'].quantile(PERCENTILE_THRESHOLD) for stat in stat_cols_available}

    # 4. Logic (Identical to analyze_season_pair)
    meets_1sd_criteria = pd.DataFrame(index=df_merged.index)
    meets_2sd_criteria = pd.DataFrame(index=df_merged.index)

    for stat in stat_cols_available:
        quality_mask = df_merged[f'{stat}_pg_y'] >= raw_stat_thresholds[stat]
        
        diff_mask_1sd = df_merged[f'{stat}_DIFF'] > thresh_1sd[stat]
        meets_1sd_criteria[stat] = diff_mask_1sd & quality_mask
        
        diff_mask_2sd = df_merged[f'{stat}_DIFF'] > thresh_2sd[stat]
        meets_2sd_criteria[stat] = diff_mask_2sd & quality_mask

    count_1sd_stats = meets_1sd_criteria.sum(axis=1)
    mask_condition_a = count_1sd_stats >= 2
    mask_condition_b = meets_2sd_criteria.any(axis=1)
    
    qualifying_mask = mask_condition_a | mask_condition_b

    # 5. Filter (INVERTED using ~)
    non_qualifying_df = df_merged[~qualifying_mask].copy() 
    
    if non_qualifying_df.empty: return None

    # 6. Output
    non_qualifying_df['SEASON_X'] = season_x
    non_qualifying_df['SEASON_Y'] = season_y
    
    FINAL_COLS_BASE = ['PLAYER', 'SEASON_X', 'SEASON_Y']
    raw_x_cols = [f'{s}_x' for s in stat_cols_available]
    pg_x_cols = [f'{s}_pg_x' for s in stat_cols_available]
    raw_y_cols = [f'{s}_y' for s in stat_cols_available]
    pg_y_cols = [f'{s}_pg_y' for s in stat_cols_available]
    diff_cols = [f'{s}_DIFF' for s in stat_cols_available]
    
    final_output_cols = FINAL_COLS_BASE + raw_x_cols + pg_x_cols + raw_y_cols + pg_y_cols + diff_cols
    
    return non_qualifying_df[[col for col in final_output_cols if col in non_qualifying_df.columns]]
# --- Main Execution (Modified) ---
def run_non_qualifying_analysis():
    """
    Main orchestration function to loop through all season pairs and collect 
    the players who *DID NOT* meet the two-layer filter criteria.
    """
    all_season_pairs = generate_season_pairs(START_YEAR, END_YEAR)
    final_results = []
    season_cache = {} # Dictionary to store fetched DataFrames

    # The loop structure is identical to the previous analysis for caching and flow control
    for season_x_str, season_y_str in tqdm(all_season_pairs, desc="Processing Non-Qualifying Pairs"):
        
        # --- Data Fetching and Caching Logic (Restored) ---
        
        # Fetch data for Season X, using cache
        if season_x_str not in season_cache:
            # df_x is defined here by calling the helper function
            df_x = get_player_stats(season_x_str)
            if df_x.empty: continue
            season_cache[season_x_str] = df_x
        else:
            df_x = season_cache[season_x_str]

        # Fetch data for Season Y, using cache
        if season_y_str not in season_cache:
            # df_y is defined here by calling the helper function
            df_y = get_player_stats(season_y_str)
            if df_y.empty: continue
            season_cache[season_y_str] = df_y
        else:
            df_y = season_cache[season_y_str]

        # --- End of Restored Logic ---

        # The rest of the function proceeds once df_x and df_y are guaranteed to be defined
        if not df_x.empty and not df_y.empty:
            # Use the NEW analysis function
            filtered_results_df = analyze_non_qualifying_pair(df_x, df_y, season_x_str, season_y_str)
            if filtered_results_df is not None:
                final_results.append(filtered_results_df)

    if final_results:
        master_df = pd.concat(final_results, ignore_index=True)
    else:
        master_df = pd.DataFrame()
    
    return master_df

# master_df_non_qualifying = run_non_qualifying_analysis()
# master_df_non_qualifying.to_csv('non_qualifying_player_history.csv', index=False)

In [5]:
non_breakout_df = run_non_qualifying_analysis()

Processing Non-Qualifying Pairs: 100%|██████████| 23/23 [00:37<00:00,  1.64s/it]


In [6]:
non_breakout_df["breakout?"] = 0
non_breakout_df

Unnamed: 0,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,MIN_x,PTS_pg_x,...,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF,breakout?
0,Mikal Bridges,2022-23,2023-24,1671,364,273,91,61,2963,20.132530,...,0.987805,0.365854,34.804878,-0.547164,0.151043,0.357185,-0.108581,-0.369086,-0.893917,0
1,Anthony Edwards,2022-23,2023-24,1946,458,350,125,58,2841,24.632911,...,1.278481,0.531646,35.063291,1.303797,-0.354430,0.696203,-0.303797,-0.202532,-0.898734,0
2,Zach LaVine,2022-23,2023-24,1913,345,327,69,18,2768,24.844156,...,0.840000,0.320000,34.880000,-5.364156,0.679481,-0.326753,-0.056104,0.086234,-1.068052,0
3,Nikola Vučević,2022-23,2023-24,1447,903,265,60,57,2746,17.646341,...,0.657895,0.789474,34.342105,0.379974,-0.472721,0.110398,-0.073813,0.094352,0.854300,0
4,Julius Randle,2022-23,2023-24,1936,767,316,49,21,2737,25.142857,...,0.543478,0.260870,35.434783,-1.186335,-0.765387,0.874365,-0.092885,-0.011858,-0.110672,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7892,Ira Bowman,2000-01,2001-02,0,2,7,0,0,19,0.000000,...,0.666667,0.000000,9.666667,3.333333,-0.333333,-2.000000,0.666667,0.000000,3.333333,0
7893,Zendon Hamilton,2000-01,2001-02,9,8,0,0,0,19,3.000000,...,0.388889,0.333333,15.666667,3.000000,2.018519,0.259259,0.388889,0.333333,9.333333,0
7894,Jason Hart,2000-01,2001-02,2,0,1,0,0,10,2.000000,...,0.700000,0.100000,9.300000,0.600000,1.300000,0.200000,0.700000,0.100000,-0.700000,0
7895,Mamadou N'diaye,2000-01,2001-02,4,2,0,0,0,10,1.333333,...,0.000000,0.400000,9.400000,2.666667,1.533333,0.000000,0.000000,0.400000,6.066667,0


In [7]:
pd.concat([final_df, non_breakout_df], axis=0).reset_index().to_csv(path_or_buf="dataset.csv")

In [8]:
df = pd.read_csv("dataset.csv")

In [9]:
df

Unnamed: 0.1,Unnamed: 0,index,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,...,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF,breakout?
0,0,0,Scottie Barnes,2022-23,2023-24,1179,512,371,83,61,...,1.250000,1.466667,34.900000,4.538312,1.583983,1.231818,0.172078,0.674459,0.120779,1
1,1,1,De'Aaron Fox,2022-23,2023-24,1826,306,447,83,23,...,2.027027,0.418919,35.932432,1.553869,0.402814,-0.474639,0.890041,0.103850,2.576268,1
2,2,2,Donovan Mitchell,2022-23,2023-24,1922,289,301,99,27,...,1.836364,0.545455,35.327273,-1.664706,0.840909,1.628075,0.380481,0.148396,-0.437433,1
3,3,3,Paolo Banchero,2022-23,2023-24,1437,497,269,60,39,...,0.887500,0.587500,34.987500,2.591667,0.022222,1.651389,0.054167,0.045833,1.237500,1
4,4,4,Keegan Murray,2022-23,2023-24,976,371,98,61,42,...,1.025974,0.766234,33.623377,3.020779,0.817045,0.450325,0.263474,0.241234,3.848377,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8932,8932,7892,Ira Bowman,2000-01,2001-02,0,2,7,0,0,...,0.666667,0.000000,9.666667,3.333333,-0.333333,-2.000000,0.666667,0.000000,3.333333,0
8933,8933,7893,Zendon Hamilton,2000-01,2001-02,9,8,0,0,0,...,0.388889,0.333333,15.666667,3.000000,2.018519,0.259259,0.388889,0.333333,9.333333,0
8934,8934,7894,Jason Hart,2000-01,2001-02,2,0,1,0,0,...,0.700000,0.100000,9.300000,0.600000,1.300000,0.200000,0.700000,0.100000,-0.700000,0
8935,8935,7895,Mamadou N'diaye,2000-01,2001-02,4,2,0,0,0,...,0.000000,0.400000,9.400000,2.666667,1.533333,0.000000,0.000000,0.400000,6.066667,0


In [10]:
df.to_markdown()

"|      |   Unnamed: 0 |   index | PLAYER                   | SEASON_X   | SEASON_Y   |   PTS_x |   REB_x |   AST_x |   STL_x |   BLK_x |   MIN_x |   PTS_pg_x |   REB_pg_x |   AST_pg_x |   STL_pg_x |   BLK_pg_x |   MIN_pg_x |   PTS_y |   REB_y |   AST_y |   STL_y |   BLK_y |   MIN_y |   PTS_pg_y |   REB_pg_y |   AST_pg_y |   STL_pg_y |   BLK_pg_y |   MIN_pg_y |      PTS_DIFF |     REB_DIFF |     AST_DIFF |     STL_DIFF |     BLK_DIFF |     MIN_DIFF |   breakout? |\n|-----:|-------------:|--------:|:-------------------------|:-----------|:-----------|--------:|--------:|--------:|--------:|--------:|--------:|-----------:|-----------:|-----------:|-----------:|-----------:|-----------:|--------:|--------:|--------:|--------:|--------:|--------:|-----------:|-----------:|-----------:|-----------:|-----------:|-----------:|--------------:|-------------:|-------------:|-------------:|-------------:|-------------:|------------:|\n|    0 |            0 |       0 | Scottie Barnes           | 