In [33]:
import statsmodels
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType
import pandas as pd
import matplotlib as plt
from scipy import stats

### Get Play by play box score in 2024

In [34]:
client.regular_season_player_box_scores(
    player_identifier="wembavi01",
    season_end_year=2024,
    output_type=OutputType.CSV,
    output_file_path="wemby_2024.csv"
)

In [35]:
df = pd.read_csv("wemby_2024.csv")
df.head(5)

Unnamed: 0,active,date,points_scored,plus_minus,team,location,opponent,outcome,seconds_played,made_field_goals,...,made_free_throws,attempted_free_throws,offensive_rebounds,defensive_rebounds,assists,steals,blocks,turnovers,personal_fouls,game_score
0,True,2023-10-25,15,0,SAN ANTONIO SPURS,HOME,DALLAS MAVERICKS,LOSS,1399,6,...,0,1,0,5,2,2,1,5,5,9.3
1,True,2023-10-27,21,9,SAN ANTONIO SPURS,HOME,HOUSTON ROCKETS,WIN,1859,7,...,7,8,5,7,1,3,3,4,2,16.7
2,True,2023-10-29,11,-14,SAN ANTONIO SPURS,AWAY,LOS ANGELES CLIPPERS,LOSS,1576,4,...,3,6,1,4,2,1,1,5,1,4.0
3,True,2023-10-31,18,-3,SAN ANTONIO SPURS,AWAY,PHOENIX SUNS,WIN,1693,6,...,4,5,3,5,1,0,4,5,3,12.5
4,True,2023-11-02,38,21,SAN ANTONIO SPURS,AWAY,PHOENIX SUNS,WIN,2061,15,...,5,6,2,8,2,1,2,2,3,29.8


In [36]:
df["minutes"] = round(df["seconds_played"] / 60)
df["minutes"].apply(int)

0     23
1     31
2     26
3     28
4     34
5     38
6     21
7     30
8     35
9     34
10    28
11    30
12    31
13    27
14    33
15    35
16    25
17    29
18    32
19    32
20    33
21    32
22    31
23    31
24    22
25    24
26    24
27    24
28    26
29    26
30    25
31    21
32    20
33    27
34    27
35    28
36    28
37    28
38    29
39    30
40    31
41    30
42    28
43    27
44    28
45    23
46    29
47    29
48    27
49    31
50    31
51    27
52    34
53    32
54    31
55    30
56    31
Name: minutes, dtype: int64

## Normalize ratings

In [37]:
def per_48_minutes(row, stat="blocks"):
    return row[stat] * (48 / row["minutes"])

In [38]:
df["blocks_per_48"] = df.apply(per_48_minutes, axis=1)
df["steals_per_48"] = df.apply(per_48_minutes, stat="steals", axis=1)
df["rebounds"] = df["defensive_rebounds"] + df["offensive_rebounds"] #Add rebounds column
df["rebounds_per_48"] = df.apply(per_48_minutes, stat="rebounds", axis=1)

In [39]:
df["steals_per_48"].describe()

count    57.000000
mean      2.134609
std       2.165380
min       0.000000
25%       0.000000
50%       1.714286
75%       3.200000
max      11.520000
Name: steals_per_48, dtype: float64

In [40]:
df = df[["minutes", "rebounds", "steals", "blocks", "blocks_per_48", "rebounds_per_48", "steals_per_48"]]

In [41]:
df = df.rename(columns={"minutes": "MIN", "rebounds": "REB", "steals": "STL", "blocks": "BLK", "blocks_per_48": "BLK/48", "rebounds_per_48": "REB/48", "steals_per_48": "STL/48"}) 
df 

Unnamed: 0,MIN,REB,STL,BLK,BLK/48,REB/48,STL/48
0,23.0,5,2,1,2.086957,10.434783,4.173913
1,31.0,12,3,3,4.645161,18.580645,4.645161
2,26.0,5,1,1,1.846154,9.230769,1.846154
3,28.0,8,0,4,6.857143,13.714286,0.0
4,34.0,10,1,2,2.823529,14.117647,1.411765
5,38.0,9,1,5,6.315789,11.368421,1.263158
6,21.0,10,1,2,4.571429,22.857143,2.285714
7,30.0,9,0,1,1.6,14.4,0.0
8,35.0,9,1,4,5.485714,12.342857,1.371429
9,34.0,11,1,1,1.411765,15.529412,1.411765


In [42]:
df["Date"] = "2023-24"
df["Name"] = "Victor Wembanyama"
df

Unnamed: 0,MIN,REB,STL,BLK,BLK/48,REB/48,STL/48,Date,Name
0,23.0,5,2,1,2.086957,10.434783,4.173913,2023-24,Victor Wembanyama
1,31.0,12,3,3,4.645161,18.580645,4.645161,2023-24,Victor Wembanyama
2,26.0,5,1,1,1.846154,9.230769,1.846154,2023-24,Victor Wembanyama
3,28.0,8,0,4,6.857143,13.714286,0.0,2023-24,Victor Wembanyama
4,34.0,10,1,2,2.823529,14.117647,1.411765,2023-24,Victor Wembanyama
5,38.0,9,1,5,6.315789,11.368421,1.263158,2023-24,Victor Wembanyama
6,21.0,10,1,2,4.571429,22.857143,2.285714,2023-24,Victor Wembanyama
7,30.0,9,0,1,1.6,14.4,0.0,2023-24,Victor Wembanyama
8,35.0,9,1,4,5.485714,12.342857,1.371429,2023-24,Victor Wembanyama
9,34.0,11,1,1,1.411765,15.529412,1.411765,2023-24,Victor Wembanyama


In [43]:
df.to_csv("wemby_2024_individual.csv", index=False)

In [44]:
# client.regular_season_player_box_scores(
#     player_identifier="wembavi01",
#     season_end_year=2024,
#     output_type=OutputType.CSV,
#     output_file_path="wemby_2024.csv"
# )   