In [39]:
from functools import reduce
from typing import List

from nba_api.stats.endpoints import (
    LeagueDashPlayerStats,
    LeagueDashPlayerBioStats,
    TeamGameLogs,
)
import pandas as pd
import math

In [14]:
df = pd.read_parquet("data/gamelog_2023.parquet")
print(df.iloc[0].to_markdown())

|                   | 0                   |
|:------------------|:--------------------|
| game_n            | 0                   |
| SEASON_YEAR       | 2022-23             |
| TEAM_ID           | 1610612746          |
| TEAM_ABBREVIATION | LAC                 |
| TEAM_NAME         | LA Clippers         |
| GAME_ID           | 0022200207          |
| GAME_DATE         | 2022-11-15T00:00:00 |
| MATCHUP           | LAC @ DAL           |
| WL                | L                   |
| MIN               | 48.0                |
| FGM               | 34                  |
| FGA               | 64                  |
| FG_PCT            | 0.531               |
| FG3M              | 12                  |
| FG3A              | 28                  |
| FG3_PCT           | 0.429               |
| FTM               | 21                  |
| FTA               | 26                  |
| FT_PCT            | 0.808               |
| OREB              | 3                   |
| DREB              | 37        

In [49]:
eff = df[["TEAM_ABBREVIATION", "GAME_DATE", "MATCHUP", "OFF_RATING", "DEF_RATING", "PTS", "POSS"]]

In [50]:
eff

Unnamed: 0,TEAM_ABBREVIATION,GAME_DATE,MATCHUP,OFF_RATING,DEF_RATING,PTS,POSS
0,LAC,2022-11-15T00:00:00,LAC @ DAL,106.3,109.6,101,95
1,SAC,2022-11-15T00:00:00,SAC vs. BKN,140.4,111.0,153,109
2,UTA,2022-11-15T00:00:00,UTA vs. NYK,107.8,114.6,111,103
3,SAS,2022-11-15T00:00:00,SAS @ POR,112.2,120.6,110,98
4,BKN,2022-11-15T00:00:00,BKN @ SAC,111.0,140.4,121,109
...,...,...,...,...,...,...,...
469,IND,2022-11-16T00:00:00,IND @ CHA,120.2,107.6,125,104
470,NOP,2022-11-16T00:00:00,NOP vs. CHI,126.5,111.1,124,98
471,MIL,2022-11-16T00:00:00,MIL vs. CLE,120.2,103.2,113,94
472,MIN,2022-11-16T00:00:00,MIN @ ORL,124.8,106.9,126,101


In [17]:
eff.loc[eff["TEAM_ABBREVIATION"] == "BOS"]

Unnamed: 0,TEAM_ABBREVIATION,GAME_DATE,MATCHUP,OFF_RATING,DEF_RATING,PTS
20,BOS,2022-11-14T00:00:00,BOS vs. OKC,112.5,108.0,126
40,BOS,2022-11-12T00:00:00,BOS @ DET,119.4,109.1,117
64,BOS,2022-11-11T00:00:00,BOS vs. DEN,140.9,120.4,131
86,BOS,2022-11-09T00:00:00,BOS vs. DET,125.5,109.8,128
113,BOS,2022-11-07T00:00:00,BOS @ MEM,110.1,105.0,109
148,BOS,2022-11-05T00:00:00,BOS @ NYK,135.7,119.2,133
162,BOS,2022-11-04T00:00:00,BOS vs. CHI,119.4,114.4,123
186,BOS,2022-11-02T00:00:00,BOS @ CLE,102.7,103.6,113
233,BOS,2022-10-30T00:00:00,BOS vs. WAS,115.5,96.9,112
278,BOS,2022-10-28T00:00:00,BOS vs. CLE,112.8,121.1,123


In [31]:
# so in game 20, we had 126 and an ORTG of 112.5
# ORTG = 100 * (pts / poss) -> ORTG/100 = pts / poss -> (ORTG/100) * poss = pts -> pts / (ORTG/100) = poss
# so this game has 112 possessions
126/(112.5/100)

112.0

In [44]:
# off_rating is truncated, so we can round this value to get the true value
df["poss"] = round(df["PTS"] / (df["OFF_RATING"] / 100))
df

Unnamed: 0,game_n,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,PACE_RANK,PIE_RANK,poss
0,0,2022-23,1610612746,LAC,LA Clippers,0022200207,2022-11-15T00:00:00,LAC @ DAL,L,48.0,...,153,418,30,61,410,34,19,375,167,95.0
1,1,2022-23,1610612758,SAC,Sacramento Kings,0022200210,2022-11-15T00:00:00,SAC vs. BKN,W,48.0,...,5,89,4,8,256,4,3,13,27,109.0
2,2,2022-23,1610612762,UTA,Utah Jazz,0022200208,2022-11-15T00:00:00,UTA vs. NYK,L,48.0,...,118,111,294,183,396,251,198,91,214,103.0
3,3,2022-23,1610612759,SAS,San Antonio Spurs,0022200209,2022-11-15T00:00:00,SAS @ POR,L,48.0,...,9,199,184,237,368,125,145,297,231,98.0
4,4,2022-23,1610612751,BKN,Brooklyn Nets,0022200210,2022-11-15T00:00:00,BKN @ SAC,L,48.0,...,201,417,332,413,256,140,69,13,394,109.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,21,2022-23,1610612754,IND,Indiana Pacers,0022200211,2022-11-16T00:00:00,IND @ CHA,W,48.0,...,10,20,24,23,10,9,6,3,8,104.0
470,22,2022-23,1610612740,NOP,New Orleans Pelicans,0022200217,2022-11-16T00:00:00,NOP vs. CHI,W,48.0,...,6,4,12,5,16,8,7,15,5,98.0
471,23,2022-23,1610612749,MIL,Milwaukee Bucks,0022200216,2022-11-16T00:00:00,MIL vs. CLE,W,48.0,...,14,2,1,1,13,12,15,21,10,94.0
472,24,2022-23,1610612750,MIN,Minnesota Timberwolves,0022200212,2022-11-16T00:00:00,MIN @ ORL,W,48.0,...,9,19,22,19,6,10,9,7,2,101.0


In [51]:
# now that we have possessions, we can accurately sum and average the values
teameff = df[["TEAM_ABBREVIATION", "GAME_DATE", "MATCHUP", "OFF_RATING", "DEF_RATING", "PTS", "poss"]]
teameff.to_dict(orient="records")

[{'TEAM_ABBREVIATION': 'LAC',
  'GAME_DATE': '2022-11-15T00:00:00',
  'MATCHUP': 'LAC @ DAL',
  'OFF_RATING': 106.3,
  'DEF_RATING': 109.6,
  'PTS': 101,
  'poss': 95.0},
 {'TEAM_ABBREVIATION': 'SAC',
  'GAME_DATE': '2022-11-15T00:00:00',
  'MATCHUP': 'SAC vs. BKN',
  'OFF_RATING': 140.4,
  'DEF_RATING': 111.0,
  'PTS': 153,
  'poss': 109.0},
 {'TEAM_ABBREVIATION': 'UTA',
  'GAME_DATE': '2022-11-15T00:00:00',
  'MATCHUP': 'UTA vs. NYK',
  'OFF_RATING': 107.8,
  'DEF_RATING': 114.6,
  'PTS': 111,
  'poss': 103.0},
 {'TEAM_ABBREVIATION': 'SAS',
  'GAME_DATE': '2022-11-15T00:00:00',
  'MATCHUP': 'SAS @ POR',
  'OFF_RATING': 112.2,
  'DEF_RATING': 120.6,
  'PTS': 110,
  'poss': 98.0},
 {'TEAM_ABBREVIATION': 'BKN',
  'GAME_DATE': '2022-11-15T00:00:00',
  'MATCHUP': 'BKN @ SAC',
  'OFF_RATING': 111.0,
  'DEF_RATING': 140.4,
  'PTS': 121,
  'poss': 109.0},
 {'TEAM_ABBREVIATION': 'NOP',
  'GAME_DATE': '2022-11-15T00:00:00',
  'MATCHUP': 'NOP vs. MEM',
  'OFF_RATING': 115.3,
  'DEF_RATING': 103

In [27]:
(112.5/100) / 126

0.008928571428571428

In [28]:
112.5/126

0.8928571428571429