In [2]:
import pandas as pd
from nba_api.stats.endpoints import LeagueLeaders
import time
from tqdm import tqdm # Used for a progress bar

# --- Configuration ---
# Columns needed for the initial DataFrame and calculation
RAW_COLS = ['PLAYER_ID', 'PLAYER', 'GP', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
STAT_CATEGORIES = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
START_YEAR = 2000 # Start analysis with the 2000-01 season
END_YEAR = 2023 # End analysis with the 2023-24 season

# --- Helper Functions ---

def generate_season_pairs(start_year, end_year):
    """
    Generates a list of consecutive season pairs in 'YYYY-YY' format,
    iterating backward from the most recent to the oldest.
    """
    season_pairs = []
    # Loop backwards from 2023 down to 2001 (which pairs with 2000)
    for year_y in range(end_year, start_year, -1):
        # Season Y (e.g., '2023-24')
        season_y_str = f'{year_y}-{str(year_y+1)[2:]}'
        # Season X (e.g., '2022-23')
        year_x = year_y - 1
        season_x_str = f'{year_x}-{str(year_x+1)[2:]}'
        season_pairs.append((season_x_str, season_y_str))
    return season_pairs

def get_player_stats(season):
    """
    Fetches player stats for a single season, selects required columns,
    and calculates per-game averages.
    """
    # Defensive measure against rate limiting (important for long loops)
    time.sleep(1.5)
    
    try:
        # Request Totals to accurately calculate per-game averages
        leaders = LeagueLeaders(
            season=season,
            stat_category_abbreviation='MIN', # Use MIN to fetch a broad list
            scope='S',
            per_mode48='Totals'
        )
        df = leaders.get_data_frames()[0]
    except Exception:
        return pd.DataFrame()

    # Select required columns and ensure the player played at least one game
    df_clean = df[RAW_COLS].copy()
    df_clean = df_clean[df_clean['GP'] > 0]

    # Calculate Per-Game Stats
    for stat in STAT_CATEGORIES:
        if stat in ['GP']: continue
        # CRITICAL ASSUMPTION: 'PTS', 'REB', etc. are Totals, requiring division by GP
        df_clean[f'{stat}_pg'] = df_clean[stat] / df_clean['GP']
    
    # Define the full set of column names for selection
    raw_and_pg_cols = ['PLAYER_ID', 'PLAYER', 'GP'] + STAT_CATEGORIES + [f'{s}_pg' for s in STAT_CATEGORIES]
    
    # Filter to the generated list of columns
    return df_clean[[col for col in raw_and_pg_cols if col in df_clean.columns]].copy()


def analyze_season_pair(df_x, df_y, season_x, season_y):
    """
    Merges two seasonal DataFrames, calculates the difference, applies
    the two-layer filter (StdDev Change AND Top 20% Raw Stat), and returns the results.
    """
    # 1. Merge DataFrames
    df_merged = pd.merge(
        df_x.drop(columns=['PLAYER']), 
        df_y, 
        on='PLAYER_ID', 
        how='inner', 
        suffixes=('_x', '_y')
    )
    
    if df_merged.empty: return None

    # Get intersection of available stats (some old seasons lack specific stats)
    stat_cols_available = [s for s in STAT_CATEGORIES if f'{s}_pg_x' in df_merged.columns and f'{s}_pg_y' in df_merged.columns]

    # 2. Calculate the difference (Change) for each Per-Game category
    for stat in stat_cols_available:
        df_merged[f'{stat}_DIFF'] = df_merged[f'{stat}_pg_y'] - df_merged[f'{stat}_pg_x']

    # 3. Calculate Thresholds: StdDev and 80th Percentile
    std_thresholds = {
        stat: df_merged[f'{stat}_DIFF'].mean() + df_merged[f'{stat}_DIFF'].std()
        for stat in stat_cols_available
    }
    PERCENTILE_THRESHOLD = 0.80
    raw_stat_thresholds = {
        stat: df_merged[f'{stat}_pg_y'].quantile(PERCENTILE_THRESHOLD)
        for stat in stat_cols_available
    }

    # 4. Apply the Two-Layer Filter
    # Player must meet the criteria in *any* stat category
    overall_two_layer_mask = pd.Series([False] * len(df_merged), index=df_merged.index)

    for stat in stat_cols_available:
        # Condition A: Significant Positive Change (Above Mean + 1 StdDev)
        std_dev_mask = df_merged[f'{stat}_DIFF'] > std_thresholds[stat]
        
        # Condition B: High Raw Performance in Year Y (Top 20% in the league)
        percentile_mask = df_merged[f'{stat}_pg_y'] >= raw_stat_thresholds[stat]
        
        # Combined Mask for THIS stat: (Condition A AND Condition B)
        combined_stat_mask = std_dev_mask & percentile_mask
        
        # Update the overall mask: (Previous Overall OR Current Stat Mask)
        overall_two_layer_mask = overall_two_layer_mask | combined_stat_mask

    # 5. Filter the DataFrame
    filtered_df = df_merged[overall_two_layer_mask].copy()
    
    if filtered_df.empty: return None

    # 6. Prepare Final Output Columns
    filtered_df['SEASON_X'] = season_x
    filtered_df['SEASON_Y'] = season_y
    
    # Define all required column groups
    FINAL_COLS_BASE = ['PLAYER', 'SEASON_X', 'SEASON_Y']
    raw_x_cols = [f'{s}_x' for s in stat_cols_available]
    pg_x_cols = [f'{s}_pg_x' for s in stat_cols_available]
    raw_y_cols = [f'{s}_y' for s in stat_cols_available]
    pg_y_cols = [f'{s}_pg_y' for s in stat_cols_available]
    diff_cols = [f'{s}_DIFF' for s in stat_cols_available]
    
    final_output_cols = FINAL_COLS_BASE + raw_x_cols + pg_x_cols + raw_y_cols + pg_y_cols + diff_cols
    
    return filtered_df[[col for col in final_output_cols if col in filtered_df.columns]]


# --- Main Execution ---
def run_historical_analysis():
    all_season_pairs = generate_season_pairs(START_YEAR, END_YEAR)
    final_results = []
    season_cache = {}

    # The loop will perform API calls for 23 pairs of seasons
    # tqdm is used to provide a progress bar for the long process
    for season_x_str, season_y_str in tqdm(all_season_pairs, desc="Processing Season Pairs"):
        
        # Fetch data for Season X, using cache
        if season_x_str not in season_cache:
            df_x = get_player_stats(season_x_str)
            if df_x.empty: continue
            season_cache[season_x_str] = df_x
        else:
            df_x = season_cache[season_x_str]

        # Fetch data for Season Y, using cache
        if season_y_str not in season_cache:
            df_y = get_player_stats(season_y_str)
            if df_y.empty: continue
            season_cache[season_y_str] = df_y
        else:
            df_y = season_cache[season_y_str]

        # Analyze the pair and collect results
        if not df_x.empty and not df_y.empty:
            filtered_results_df = analyze_season_pair(df_x, df_y, season_x_str, season_y_str)
            if filtered_results_df is not None:
                final_results.append(filtered_results_df)

    # Concatenate all results into a single Master DataFrame
    if final_results:
        master_df = pd.concat(final_results, ignore_index=True)
    else:
        master_df = pd.DataFrame()
    
    return master_df

# Call run_historical_analysis() to get the final DataFrame
# master_df = run_historical_analysis()

# master_df.to_csv('significant_player_gain_history.csv', index=False)

In [3]:
final_df = run_historical_analysis()
final_df

Processing Season Pairs: 100%|██████████| 23/23 [00:53<00:00,  2.35s/it]


Unnamed: 0,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,MIN_x,PTS_pg_x,...,AST_pg_y,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF
0,Domantas Sabonis,2022-23,2023-24,1510,973,573,65,39,2736,19.113924,...,8.207317,0.902439,0.585366,35.707317,0.312905,1.342081,0.954153,0.079654,0.091695,1.074406
1,Scottie Barnes,2022-23,2023-24,1179,512,371,83,61,2678,15.311688,...,6.050000,1.250000,1.466667,34.900000,4.538312,1.583983,1.231818,0.172078,0.674459,0.120779
2,Franz Wagner,2022-23,2023-24,1485,329,283,77,17,2609,18.562500,...,3.736111,1.055556,0.388889,32.458333,1.173611,1.179167,0.198611,0.093056,0.176389,-0.154167
3,Bam Adebayo,2022-23,2023-24,1529,688,240,88,61,2598,20.386667,...,3.915493,1.140845,0.929577,34.028169,-1.133146,1.206948,0.715493,-0.032488,0.116244,-0.611831
4,Fred VanVleet,2022-23,2023-24,1335,280,495,123,38,2535,19.347826,...,8.068493,1.383562,0.808219,36.767123,-1.950566,-0.249752,0.894580,-0.399047,0.257495,0.027993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1972,Adrian Griffin,2000-01,2001-02,93,87,27,18,5,377,2.113636,...,1.827586,1.293103,0.206897,23.913793,5.041536,1.971003,1.213950,0.884013,0.093260,15.345611
1973,Alonzo Mourning,2000-01,2001-02,177,101,12,4,31,306,13.615385,...,1.160000,0.360000,2.480000,32.706667,2.091282,0.657436,0.236923,0.052308,0.095385,9.168205
1974,Joel Przybilla,2000-01,2001-02,27,71,2,3,30,270,0.818182,...,0.295775,0.281690,1.661972,15.887324,1.857875,1.834400,0.235169,0.190781,0.752881,7.705506
1975,John Crotty,2000-01,2001-02,65,28,34,6,0,264,2.096774,...,3.439024,0.463415,0.024390,19.487805,4.830055,0.926042,2.342250,0.269866,0.024390,10.971676


In [4]:
final_df["breakout?"] = 1
final_df

Unnamed: 0,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,MIN_x,PTS_pg_x,...,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF,breakout?
0,Domantas Sabonis,2022-23,2023-24,1510,973,573,65,39,2736,19.113924,...,0.902439,0.585366,35.707317,0.312905,1.342081,0.954153,0.079654,0.091695,1.074406,1
1,Scottie Barnes,2022-23,2023-24,1179,512,371,83,61,2678,15.311688,...,1.250000,1.466667,34.900000,4.538312,1.583983,1.231818,0.172078,0.674459,0.120779,1
2,Franz Wagner,2022-23,2023-24,1485,329,283,77,17,2609,18.562500,...,1.055556,0.388889,32.458333,1.173611,1.179167,0.198611,0.093056,0.176389,-0.154167,1
3,Bam Adebayo,2022-23,2023-24,1529,688,240,88,61,2598,20.386667,...,1.140845,0.929577,34.028169,-1.133146,1.206948,0.715493,-0.032488,0.116244,-0.611831,1
4,Fred VanVleet,2022-23,2023-24,1335,280,495,123,38,2535,19.347826,...,1.383562,0.808219,36.767123,-1.950566,-0.249752,0.894580,-0.399047,0.257495,0.027993,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1972,Adrian Griffin,2000-01,2001-02,93,87,27,18,5,377,2.113636,...,1.293103,0.206897,23.913793,5.041536,1.971003,1.213950,0.884013,0.093260,15.345611,1
1973,Alonzo Mourning,2000-01,2001-02,177,101,12,4,31,306,13.615385,...,0.360000,2.480000,32.706667,2.091282,0.657436,0.236923,0.052308,0.095385,9.168205,1
1974,Joel Przybilla,2000-01,2001-02,27,71,2,3,30,270,0.818182,...,0.281690,1.661972,15.887324,1.857875,1.834400,0.235169,0.190781,0.752881,7.705506,1
1975,John Crotty,2000-01,2001-02,65,28,34,6,0,264,2.096774,...,0.463415,0.024390,19.487805,4.830055,0.926042,2.342250,0.269866,0.024390,10.971676,1


In [9]:
import pandas as pd
from nba_api.stats.endpoints import LeagueLeaders
import time
from tqdm import tqdm
import sys
import io
RAW_COLS = ['PLAYER_ID', 'PLAYER', 'GP', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
STAT_CATEGORIES = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'MIN']
START_YEAR = 2000 # Start analysis with the 2000-01 season
END_YEAR = 2023 # End analysis with the 2023-24 season

# --- Helper Functions ---

def generate_season_pairs(start_year, end_year):
    """
    Generates a list of consecutive season pairs in 'YYYY-YY' format,
    iterating backward from the most recent to the oldest.
    """
    season_pairs = []
    # Loop backwards from 2023 down to 2001 (which pairs with 2000)
    for year_y in range(end_year, start_year, -1):
        # Season Y (e.g., '2023-24')
        season_y_str = f'{year_y}-{str(year_y+1)[2:]}'
        # Season X (e.g., '2022-23')
        year_x = year_y - 1
        season_x_str = f'{year_x}-{str(year_x+1)[2:]}'
        season_pairs.append((season_x_str, season_y_str))
    return season_pairs

def get_player_stats(season):
    """
    Fetches player stats for a single season, selects required columns,
    and calculates per-game averages.
    """
    # Defensive measure against rate limiting (important for long loops)
    time.sleep(1.5)
    
    try:
        # Request Totals to accurately calculate per-game averages
        leaders = LeagueLeaders(
            season=season,
            stat_category_abbreviation='MIN', # Use MIN to fetch a broad list
            scope='S',
            per_mode48='Totals'
        )
        df = leaders.get_data_frames()[0]
    except Exception:
        return pd.DataFrame()

    # Select required columns and ensure the player played at least one game
    df_clean = df[RAW_COLS].copy()
    df_clean = df_clean[df_clean['GP'] > 0]

    # Calculate Per-Game Stats
    for stat in STAT_CATEGORIES:
        if stat in ['GP']: continue
        # CRITICAL ASSUMPTION: 'PTS', 'REB', etc. are Totals, requiring division by GP
        df_clean[f'{stat}_pg'] = df_clean[stat] / df_clean['GP']
    
    # Define the full set of column names for selection
    raw_and_pg_cols = ['PLAYER_ID', 'PLAYER', 'GP'] + STAT_CATEGORIES + [f'{s}_pg' for s in STAT_CATEGORIES]
    
    # Filter to the generated list of columns
    return df_clean[[col for col in raw_and_pg_cols if col in df_clean.columns]].copy()


# --- Core Analysis Function (Modified) ---

def analyze_non_qualifying_pair(df_x, df_y, season_x, season_y):
    """
    Merges two seasonal DataFrames, calculates differences, applies the two-layer filter
    (StdDev Change AND Top 20% Raw Stat), and returns the players who *DID NOT* qualify.
    """
    # 1. Merge DataFrames
    # Use 'inner' to only include players who played in BOTH seasons
    df_merged = pd.merge(
        df_x.drop(columns=['PLAYER']), 
        df_y, 
        on='PLAYER_ID', 
        how='inner', 
        suffixes=('_x', '_y')
    )
    
    if df_merged.empty: return None

    stat_cols_available = [s for s in STAT_CATEGORIES if f'{s}_pg_x' in df_merged.columns and f'{s}_pg_y' in df_merged.columns]
    
    # 2. Calculate the difference (Change)
    for stat in stat_cols_available:
        df_merged[f'{stat}_DIFF'] = df_merged[f'{stat}_pg_y'] - df_merged[f'{stat}_pg_x']

    # 3. Calculate Thresholds (required even for non-qualifiers)
    std_thresholds = {
        stat: df_merged[f'{stat}_DIFF'].mean() + df_merged[f'{stat}_DIFF'].std()
        for stat in stat_cols_available
    }
    PERCENTILE_THRESHOLD = 0.80
    raw_stat_thresholds = {
        stat: df_merged[f'{stat}_pg_y'].quantile(PERCENTILE_THRESHOLD)
        for stat in stat_cols_available
    }

    # 4. Determine which players *QUALIFY* for the high-gain filter (the internal mask)
    # We must first find the TRUE qualifiers, then invert the mask.
    qualifying_mask = pd.Series([False] * len(df_merged), index=df_merged.index)

    for stat in stat_cols_available:
        # Condition A: Player's change is greater than 1 Std Dev above the mean
        std_dev_mask = df_merged[f'{stat}_DIFF'] > std_thresholds[stat]
        
        # Condition B: Player's raw stat is in the top 20% of the entire merged league for Season Y
        percentile_mask = df_merged[f'{stat}_pg_y'] >= raw_stat_thresholds[stat]
        
        # Combined Mask for THIS stat: (Significant Change AND High Raw Performance)
        combined_stat_mask = std_dev_mask & percentile_mask
        
        # Update the qualifying mask: (Previous OR Current Stat Mask)
        qualifying_mask = qualifying_mask | combined_stat_mask

    # 5. Filter the DataFrame to the NON-QUALIFYING players
    
    # CRITICAL STEP: Use the tilde (~) operator to invert the mask!
    non_qualifying_df = df_merged[~qualifying_mask].copy() 
    
    if non_qualifying_df.empty: return None

    # 6. Prepare Final Output Columns (Same structure as the previous filter)
    
    # Add season columns
    non_qualifying_df['SEASON_X'] = season_x
    non_qualifying_df['SEASON_Y'] = season_y
    
    # Define all required column groups based on available stats
    FINAL_COLS_BASE = ['PLAYER', 'SEASON_X', 'SEASON_Y']
    raw_x_cols = [f'{s}_x' for s in stat_cols_available]
    pg_x_cols = [f'{s}_pg_x' for s in stat_cols_available]
    raw_y_cols = [f'{s}_y' for s in stat_cols_available]
    pg_y_cols = [f'{s}_pg_y' for s in stat_cols_available]
    diff_cols = [f'{s}_DIFF' for s in stat_cols_available]
    
    # Final column order
    final_output_cols = FINAL_COLS_BASE + raw_x_cols + pg_x_cols + raw_y_cols + pg_y_cols + diff_cols
    
    return non_qualifying_df[[col for col in final_output_cols if col in non_qualifying_df.columns]]

# --- Main Execution (Modified) ---
def run_non_qualifying_analysis():
    """
    Main orchestration function to loop through all season pairs and collect 
    the players who *DID NOT* meet the two-layer filter criteria.
    """
    all_season_pairs = generate_season_pairs(START_YEAR, END_YEAR)
    final_results = []
    season_cache = {} # Dictionary to store fetched DataFrames

    # The loop structure is identical to the previous analysis for caching and flow control
    for season_x_str, season_y_str in tqdm(all_season_pairs, desc="Processing Non-Qualifying Pairs"):
        
        # --- Data Fetching and Caching Logic (Restored) ---
        
        # Fetch data for Season X, using cache
        if season_x_str not in season_cache:
            # df_x is defined here by calling the helper function
            df_x = get_player_stats(season_x_str)
            if df_x.empty: continue
            season_cache[season_x_str] = df_x
        else:
            df_x = season_cache[season_x_str]

        # Fetch data for Season Y, using cache
        if season_y_str not in season_cache:
            # df_y is defined here by calling the helper function
            df_y = get_player_stats(season_y_str)
            if df_y.empty: continue
            season_cache[season_y_str] = df_y
        else:
            df_y = season_cache[season_y_str]

        # --- End of Restored Logic ---

        # The rest of the function proceeds once df_x and df_y are guaranteed to be defined
        if not df_x.empty and not df_y.empty:
            # Use the NEW analysis function
            filtered_results_df = analyze_non_qualifying_pair(df_x, df_y, season_x_str, season_y_str)
            if filtered_results_df is not None:
                final_results.append(filtered_results_df)

    if final_results:
        master_df = pd.concat(final_results, ignore_index=True)
    else:
        master_df = pd.DataFrame()
    
    return master_df

# master_df_non_qualifying = run_non_qualifying_analysis()
# master_df_non_qualifying.to_csv('non_qualifying_player_history.csv', index=False)

In [10]:
non_breakout_df = run_non_qualifying_analysis()

Processing Non-Qualifying Pairs: 100%|██████████| 23/23 [00:57<00:00,  2.48s/it]


In [12]:
non_breakout_df["breakout?"] = 0
non_breakout_df

Unnamed: 0,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,MIN_x,PTS_pg_x,...,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF,breakout?
0,DeMar DeRozan,2021-22,2022-23,2118,392,374,68,24,2743,27.868421,...,1.121622,0.486486,36.243243,-3.327881,-0.522760,0.173542,0.226885,0.170697,0.151138,0
1,Jayson Tatum,2021-22,2022-23,2046,609,334,75,49,2731,26.921053,...,1.054054,0.689189,36.918919,3.146515,0.757112,0.226885,0.067212,0.044452,0.984708,0
2,Saddiq Bey,2021-22,2022-23,1321,441,233,73,16,2704,16.109756,...,0.909091,0.168831,27.649351,-2.317548,-0.676750,-1.308996,0.018847,-0.026291,-5.326259,0
3,Russell Westbrook,2021-22,2022-23,1441,580,550,75,20,2678,18.474359,...,1.041096,0.452055,29.123288,-2.597647,-1.641377,0.496663,0.079557,0.195645,-5.210046,0
4,Trae Young,2021-22,2022-23,2155,284,737,72,7,2652,28.355263,...,1.095890,0.123288,34.808219,-2.136085,-0.764239,0.453317,0.148522,0.031182,-0.086518,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6605,Ira Bowman,2000-01,2001-02,0,2,7,0,0,19,0.000000,...,0.666667,0.000000,9.666667,3.333333,-0.333333,-2.000000,0.666667,0.000000,3.333333,0
6606,Zendon Hamilton,2000-01,2001-02,9,8,0,0,0,19,3.000000,...,0.388889,0.333333,15.666667,3.000000,2.018519,0.259259,0.388889,0.333333,9.333333,0
6607,Jason Hart,2000-01,2001-02,2,0,1,0,0,10,2.000000,...,0.700000,0.100000,9.300000,0.600000,1.300000,0.200000,0.700000,0.100000,-0.700000,0
6608,Mamadou N'diaye,2000-01,2001-02,4,2,0,0,0,10,1.333333,...,0.000000,0.400000,9.400000,2.666667,1.533333,0.000000,0.000000,0.400000,6.066667,0


In [15]:
pd.concat([final_df, non_breakout_df], axis=0).reset_index().to_csv(path_or_buf="dataset.csv")

In [16]:
df = pd.read_csv("dataset.csv")

In [17]:
df

Unnamed: 0.1,Unnamed: 0,index,PLAYER,SEASON_X,SEASON_Y,PTS_x,REB_x,AST_x,STL_x,BLK_x,...,STL_pg_y,BLK_pg_y,MIN_pg_y,PTS_DIFF,REB_DIFF,AST_DIFF,STL_DIFF,BLK_DIFF,MIN_DIFF,breakout?
0,0,0,Domantas Sabonis,2022-23,2023-24,1510,973,573,65,39,...,0.902439,0.585366,35.707317,0.312905,1.342081,0.954153,0.079654,0.091695,1.074406,1
1,1,1,Scottie Barnes,2022-23,2023-24,1179,512,371,83,61,...,1.250000,1.466667,34.900000,4.538312,1.583983,1.231818,0.172078,0.674459,0.120779,1
2,2,2,Franz Wagner,2022-23,2023-24,1485,329,283,77,17,...,1.055556,0.388889,32.458333,1.173611,1.179167,0.198611,0.093056,0.176389,-0.154167,1
3,3,3,Bam Adebayo,2022-23,2023-24,1529,688,240,88,61,...,1.140845,0.929577,34.028169,-1.133146,1.206948,0.715493,-0.032488,0.116244,-0.611831,1
4,4,4,Fred VanVleet,2022-23,2023-24,1335,280,495,123,38,...,1.383562,0.808219,36.767123,-1.950566,-0.249752,0.894580,-0.399047,0.257495,0.027993,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8582,8582,6605,Ira Bowman,2000-01,2001-02,0,2,7,0,0,...,0.666667,0.000000,9.666667,3.333333,-0.333333,-2.000000,0.666667,0.000000,3.333333,0
8583,8583,6606,Zendon Hamilton,2000-01,2001-02,9,8,0,0,0,...,0.388889,0.333333,15.666667,3.000000,2.018519,0.259259,0.388889,0.333333,9.333333,0
8584,8584,6607,Jason Hart,2000-01,2001-02,2,0,1,0,0,...,0.700000,0.100000,9.300000,0.600000,1.300000,0.200000,0.700000,0.100000,-0.700000,0
8585,8585,6608,Mamadou N'diaye,2000-01,2001-02,4,2,0,0,0,...,0.000000,0.400000,9.400000,2.666667,1.533333,0.000000,0.000000,0.400000,6.066667,0


In [18]:
df.to_markdown()

"|      |   Unnamed: 0 |   index | PLAYER                   | SEASON_X   | SEASON_Y   |   PTS_x |   REB_x |   AST_x |   STL_x |   BLK_x |   MIN_x |   PTS_pg_x |   REB_pg_x |   AST_pg_x |   STL_pg_x |   BLK_pg_x |   MIN_pg_x |   PTS_y |   REB_y |   AST_y |   STL_y |   BLK_y |   MIN_y |   PTS_pg_y |   REB_pg_y |   AST_pg_y |   STL_pg_y |   BLK_pg_y |   MIN_pg_y |      PTS_DIFF |     REB_DIFF |     AST_DIFF |     STL_DIFF |     BLK_DIFF |     MIN_DIFF |   breakout? |\n|-----:|-------------:|--------:|:-------------------------|:-----------|:-----------|--------:|--------:|--------:|--------:|--------:|--------:|-----------:|-----------:|-----------:|-----------:|-----------:|-----------:|--------:|--------:|--------:|--------:|--------:|--------:|-----------:|-----------:|-----------:|-----------:|-----------:|-----------:|--------------:|-------------:|-------------:|-------------:|-------------:|-------------:|------------:|\n|    0 |            0 |       0 | Domantas Sabonis         | 