In [1]:
import pandas as pd
from UtilFunctions import format_season

In [5]:
def pre_process(seasons):
    df = None
    for season in seasons:
        per_min_cols = ['FGA', '3PA', '2PA', 'PF', 'PTS', 'OWS', 'DWS', 'OBPM', 'DBPM', 'BPM']
        cols_to_drop = None
        first_year, second_year = format_season(season)
        player_data = pd.read_csv(f"DataCollection/Player_Stats/player_stats_{first_year}-{second_year}.csv")
        cols_to_drop = ['index', 'TRB', 'DRB', 'ORB', 'AST', 'G', 'GS', 'FT', 'FG', 'FG%', 'BLK', 'STL',
                                'WS', 'FTA', 'TOV', 'Age', '2P', '3P', 'VORP']
            
        player_data = player_data.fillna(0)
        player_data = player_data[(player_data["MP"] > 15) & (player_data["G"] >= 30)]
        player_data = player_data.sort_values(by=['G'], ascending=False)
        player_data = player_data[player_data["Tm"] != "TOT"]
        player_data = player_data.drop_duplicates(subset ="Player",keep = "first")
        player_data['Season'] = season
        player_data.reset_index(inplace=True)
        player_data.drop(columns=cols_to_drop, inplace=True)
        
        for col in per_min_cols:
            player_data[col] = player_data[col] / player_data['MP'] * 36
        
        if df is None:
            df = player_data
        else:
            df = pd.concat([df, player_data])
            
    df.reset_index(inplace=True)
    df.drop(['index', 'Tm'], axis=1, inplace=True)
    return df

In [6]:
df = pre_process(range(1980, 2023))

In [7]:
df

Unnamed: 0,Player,Pos,MP,FGA,3PA,3P%,2PA,2P%,eFG%,FT%,...,BLK%,TOV%,USG%,OWS,DWS,WS/48,OBPM,DBPM,BPM,Salary
0,Kareem Abdul-Jabbar*,C,38.3,15.885117,0.000000,0.000,15.885117,0.604,0.604,0.765,...,4.6,15.7,24.1,8.929504,4.981723,0.227,4.511749,2.255875,6.767624,0.0
1,Larry Bird*,PF,36.0,17.800000,1.700000,0.406,16.100000,0.481,0.494,0.836,...,1.0,14.0,25.3,5.600000,5.600000,0.182,3.000000,1.500000,4.500000,0.0
2,Otis Birdsong,SG,35.2,19.329545,0.409091,0.278,18.818182,0.511,0.508,0.694,...,0.4,9.4,26.4,5.011364,3.784091,0.143,2.045455,0.102273,2.147727,0.0
3,Henry Bibby,PG,24.8,11.032258,0.870968,0.212,10.161290,0.418,0.410,0.790,...,0.2,16.4,17.7,1.887097,3.048387,0.079,-2.467742,-0.725806,-3.193548,0.0
4,Greg Ballard,SF,29.7,16.242424,0.727273,0.340,15.636364,0.502,0.502,0.753,...,0.8,10.0,21.6,4.969697,3.393939,0.136,2.303030,0.121212,2.424242,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10971,Andre Iguodala,SF,19.5,7.200000,4.430769,0.230,2.769231,0.617,0.450,0.750,...,3.5,17.7,11.4,0.923077,2.030769,0.125,-1.661538,6.830769,4.984615,1669178.0
10972,Vit Krejci,SG,23.0,8.765217,5.165217,0.327,3.600000,0.522,0.503,0.864,...,1.1,14.1,12.9,0.156522,0.782609,0.042,-6.730435,-1.565217,-8.295652,925258.0
10973,Marvin Bagley III,PF,21.9,13.479452,3.452055,0.242,10.027397,0.538,0.494,0.745,...,1.5,7.3,18.8,0.821918,0.657534,0.068,-1.808219,-3.945205,-5.753425,11312114.0
10974,Terence Davis,SG,17.9,17.899441,10.055866,0.329,7.843575,0.542,0.515,0.818,...,1.8,10.3,25.6,0.000000,1.005587,0.044,-1.206704,-0.804469,-1.810056,4000000.0


In [8]:
df_sample = df.sample(n=100, random_state=1)
df_sample

Unnamed: 0,Player,Pos,MP,FGA,3PA,3P%,2PA,2P%,eFG%,FT%,...,BLK%,TOV%,USG%,OWS,DWS,WS/48,OBPM,DBPM,BPM,Salary
2333,Sedale Threatt,PG,25.8,14.511628,0.558140,0.286,13.953488,0.529,0.525,0.792,...,0.3,13.2,21.3,4.325581,1.953488,0.106,0.279070,0.000000,0.418605,0.0
5768,Nick Collison,C,17.0,9.105882,0.000000,0.000,9.105882,0.541,0.537,0.703,...,2.8,13.3,15.5,5.717647,2.541176,0.135,-2.752941,-0.847059,-3.600000,0.0
10908,OG Anunoby,SF,36.0,14.500000,6.600000,0.363,7.900000,0.511,0.526,0.754,...,1.4,9.7,20.5,1.800000,1.900000,0.104,0.400000,0.100000,0.500000,0.0
8426,Jameer Nelson,PG,32.0,12.375000,6.525000,0.348,5.850000,0.444,0.485,0.857,...,0.2,17.3,20.1,1.800000,1.350000,0.062,0.675000,-1.912500,-1.237500,0.0
1094,Robert Parish*,C,36.1,12.864266,0.000000,0.000,12.864266,0.542,0.542,0.743,...,1.9,13.5,19.7,4.986150,3.988920,0.151,1.096953,-0.299169,0.797784,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8856,Corey Brewer,SF,20.4,11.823529,4.764706,0.272,7.058824,0.460,0.439,0.750,...,0.9,11.3,17.8,0.176471,2.117647,0.035,-4.764706,-0.529412,-5.117647,8229375.0
6049,Eric Snow,PG,28.7,6.146341,0.125436,0.100,5.895470,0.416,0.410,0.688,...,0.6,20.9,10.9,0.000000,2.508711,0.042,-4.515679,0.752613,-3.763066,5484375.0
3669,Tyrone Corbin,SF,19.0,10.042105,0.378947,0.083,9.473684,0.470,0.454,0.837,...,1.4,14.3,16.5,1.326316,2.084211,0.092,-2.463158,1.705263,-0.757895,0.0
8769,Ryan Kelly,PF,23.7,8.962025,3.949367,0.336,5.164557,0.337,0.409,0.832,...,1.6,9.5,14.0,0.607595,0.455696,0.027,-4.556962,-0.455696,-5.012658,1650000.0
