In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import os
from IPython.display import display

In [2]:
def display_full(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(df)

In [3]:
def dateify(df):
    df['Date'] = pd.to_datetime(df['Date'])
    return df

def minutes_played(df):
    df['MP'] = df['MP'].str[:-3].astype(int) + (df['MP'].str[-2:].astype(int) / 60)
    return df

# def _find_diff(arr):
#     return abs(int(arr[1]) - int(arr[0]))

# def game_differential(df):
#     df['game_diff'] = df['Result'].apply(lambda x: _find_diff(re.findall(r'\d+-|-\d+', x)))
#     return df

In [4]:
career = pd.DataFrame()
for file in os.listdir('joel-embiid'):
    df = pd.read_csv(f'joel-embiid/{file}')
    career = pd.concat([career, df])

In [5]:
career = career.pipe(dateify)
career = career.reset_index(drop=True)
career

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Unnamed: 5,Opp,Result,GS,MP,...,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-,+-
0,1.0,1.0,1,2016-10-26,PHI,,OKC,L 97-103,*,22:25,...,7,0,0,2,4,4,20,8.7,-6,
1,2.0,2.0,2,2016-10-29,PHI,,ATL,L 72-104,*,15:28,...,2,1,1,2,3,3,14,8.8,-3,
2,3.0,3.0,3,2016-11-01,PHI,,ORL,L 101-103,*,24:52,...,10,3,0,4,4,2,18,15.1,5,
3,3.0,,4,2016-11-02,PHI,@,CHO,L 93-109,Not With Team,Not With Team,...,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,
4,4.0,4.0,5,2016-11-05,PHI,,CLE,L 101-102,*,25:15,...,6,2,1,4,6,3,22,16.3,-11,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,19.0,,78,2025-04-05,PHI,,MIN,L 109-114,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,,Inactive
715,19.0,,79,2025-04-07,PHI,@,MIA,L 105-117,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,,Inactive
716,19.0,,80,2025-04-09,PHI,@,WAS,W 122-103,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,,Inactive
717,19.0,,81,2025-04-11,PHI,,ATL,L 110-124,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,,Inactive


In [11]:
is_previous_dnp = [False] + career['Gcar'].isna().to_list()[:-1]
career['is_previous_dnp'] = is_previous_dnp

is_second_dnp = [False] + is_previous_dnp[:-1]
career['is_second_dnp'] = is_second_dnp

career.head()

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Unnamed: 5,Opp,Result,GS,MP,...,STL,BLK,TOV,PF,PTS,GmSc,+/-,+-,is_previous_dnp,is_second_dnp
0,1.0,1.0,1,2016-10-26,PHI,,OKC,L 97-103,*,22:25,...,0,2,4,4,20,8.7,-6,,False,False
1,2.0,2.0,2,2016-10-29,PHI,,ATL,L 72-104,*,15:28,...,1,2,3,3,14,8.8,-3,,False,False
2,3.0,3.0,3,2016-11-01,PHI,,ORL,L 101-103,*,24:52,...,0,4,4,2,18,15.1,5,,False,False
3,3.0,,4,2016-11-02,PHI,@,CHO,L 93-109,Not With Team,Not With Team,...,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,,False,False
4,4.0,4.0,5,2016-11-05,PHI,,CLE,L 101-102,*,25:15,...,1,4,6,3,22,16.3,-11,,True,False


In [12]:
lm = career[(career['is_previous_dnp']) & (~career['is_second_dnp']) & (~career['Gcar'].isna())]
lm.head()

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Unnamed: 5,Opp,Result,GS,MP,...,STL,BLK,TOV,PF,PTS,GmSc,+/-,+-,is_previous_dnp,is_second_dnp
4,4.0,4.0,5,2016-11-05,PHI,,CLE,L 101-102,*,25:15,...,1,4,6,3,22,16.3,-11,,True,False
7,6.0,6.0,8,2016-11-11,PHI,,IND,W 109-105 (OT),*,25:45,...,0,2,5,3,25,13.9,4,,True,False
9,7.0,7.0,10,2016-11-14,PHI,@,HOU,L 88-115,*,22:31,...,1,2,6,5,13,7.3,3,,True,False
11,8.0,8.0,12,2016-11-17,PHI,@,MIN,L 86-110,*,22:26,...,0,1,2,3,10,6.6,-19,,True,False
16,12.0,12.0,17,2016-11-27,PHI,,CLE,L 108-112,*,25:11,...,0,3,3,2,22,17.2,-3,,True,False


In [8]:
lm.columns

Index(['Rk', 'Gcar', 'Gtm', 'Date', 'Team', 'Unnamed: 5', 'Opp', 'Result',
       'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%',
       'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'GmSc', '+/-', '+-', 'is_previous_dnp',
       'is_second_dnp'],
      dtype='object')

In [9]:
lm_stats = lm[['MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%',
               'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
               'TOV', 'PF', 'PTS', 'GmSc', '+/-']].pipe(minutes_played).astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['MP'] = df['MP'].str[:-3].astype(int) + (df['MP'].str[-2:].astype(int) / 60)


In [10]:
lm_stats.mean()

MP      27.949405
FG       7.535714
FGA     16.232143
FG%      0.460554
3P       1.107143
3PA      3.660714
3P%      0.293182
2P       6.428571
2PA     12.571429
2P%      0.513661
eFG%     0.493804
FT       7.946429
FTA      9.714286
FT%      0.803482
ORB      2.142857
DRB      7.642857
TRB      9.785714
AST      3.375000
STL      1.160714
BLK      1.660714
TOV      3.678571
PF       3.250000
PTS     24.125000
GmSc    18.569643
+/-      4.520000
dtype: float64

In [13]:
played = career.dropna(subset='Gcar')
played_stats = played[['MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%',
                       'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
                       'TOV', 'PF', 'PTS', 'GmSc', '+/-']].pipe(minutes_played).astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['MP'] = df['MP'].str[:-3].astype(int) + (df['MP'].str[-2:].astype(int) / 60)


In [14]:
played_stats.mean()

MP      30.732117
FG       9.108407
FGA     18.168142
FG%      0.497743
3P       1.176991
3PA      3.469027
3P%      0.340087
2P       7.931416
2PA     14.699115
2P%      0.535429
eFG%     0.529982
FT       8.311947
FTA     10.039823
FT%      0.815722
ORB      2.230088
DRB      8.803097
TRB     11.033186
AST      3.668142
STL      0.909292
BLK      1.632743
TOV      3.422566
PF       3.028761
PTS     27.705752
GmSc    22.128097
+/-      5.914352
dtype: float64

In [15]:
lm_stats.mean() - played_stats.mean()

MP     -2.782712
FG     -1.572693
FGA    -1.935999
FG%    -0.037190
3P     -0.069848
3PA     0.191688
3P%    -0.046905
2P     -1.502845
2PA    -2.127686
2P%    -0.021768
eFG%   -0.036179
FT     -0.365518
FTA    -0.325537
FT%    -0.012240
ORB    -0.087231
DRB    -1.160240
TRB    -1.247472
AST    -0.293142
STL     0.251422
BLK     0.027971
TOV     0.256005
PF      0.221239
PTS    -3.580752
GmSc   -3.558454
+/-    -1.394352
dtype: float64