In [1]:
import pandas as pd
from lxml import html
import requests
import datetime

In [2]:
nicknames = {"Hawks":"ATL","Celtics":"BOS","Nets":"BRK","Hornets":"CHO","Bulls":"CHI","Cavaliers":"CLE",
             "Mavericks":"DAL","Nuggets":"DEN","Pistons":"DET","Warriors":"GSW","Rockets":"HOU","Pacers":"IND",
             "Clippers":"LAC","Lakers":"LAL","Grizzlies":"MEM","Heat":"MIA","Bucks":"MIL","Timberwolves":"MIN",
             "Pelicans":"NOP","Knicks":"NYK","Thunder":"OKC","Magic":"ORL","76ers":"PHI","Suns":"PHO",
             "Blazers":"POR","Kings":"SAC","Spurs":"SAS","Raptors":"TOR","Jazz":"UTA","Wizards":"WAS"}
             
teams = list(nicknames.values())

In [3]:
def get_player_info():
    """
    Get HTML tables containing player info for each team roster on basketball-reference.com 
    """
    # Add player data from HTML tables to a dictionary with team abbreviations as the keys
    rosters = {}
    for team in teams:
        url = f"https://www.basketball-reference.com/teams/{team}/2022.html#all_roster"
        roster = pd.read_html(url)[0]
        roster.drop(columns=["No.","Unnamed: 6","College"],inplace=True)
        if roster.isna().sum().sum()>0:
            print(f"{team} has missing player values")
        roster = roster.dropna()

        # Change birth date to each player's current age, and change height to inches for easier data analysis
        ages = []
        inches = []
        for info in zip(roster["Birth Date"],roster["Ht"]):
            age = int((datetime.datetime.now()-datetime.datetime.strptime(info[0],'%B %d, %Y')).days/365.25)
            ages.append(age)

            height = info[1].split("-")
            ht_inches = int(height[0])*12+int(height[1])
            inches.append(ht_inches)


        roster["Birth Date"] = pd.Series(ages)
        roster.rename(columns={"Birth Date": "Age"}, inplace=True)
        roster["Ht"] = pd.Series(inches)
        roster["Exp"] = roster["Exp"].replace({"R":"0"}).astype("int64") # Change rookie experience values to zero for converting series to int
        roster["Player"] = [name.rstrip(" (TW)") for name in roster["Player"]] # Remove unnecessary indicator for two-way players

        # Get each rostered player's site ID via html request
        response = requests.get(url)
        tree = html.fromstring(response.content)
        roster["ID"] = [tree.xpath(f'//*[@id="roster"]/tbody/tr[{i}]/td[1]/a')[0].values()[0].split("/")[3].split(".")[0] for i in range(1,len(roster)+1)]
        rosters[team] = roster
        
    # Condense all team dictionaries into a single dataframe
    for i,tm in enumerate(rosters):
        rosters[tm]["Team"] = [tm for _ in range(len(rosters[tm]))]
        if i==0:
            players = rosters[tm]
        else:
            players = players.append(rosters[tm])
    
    # re-arrange columns and drop all suffixes from player names
    players = players[["Player","Team","Pos","Ht","Wt","Age","Exp","ID"]].reset_index(drop=True)
    players["Player"] = [name.replace(" Jr.","").replace(" Sr.","").replace(" II","").replace(" III","").replace(" IV","") for name in players["Player"]]
    return players


players = get_player_info()
players = players.append({"Player":"Craig Sword","Team":"NOP","Pos":"G","Ht":75,"Wt":196,"Age":27,"Exp":0,"ID":"swordcr01"},ignore_index=True)

NOP has missing player values


In [4]:
def get_player_logs():
    """
    Get HTML tables containing player logs from basketball-reference.com
    """
    # Organize each player's individual game logs into a dictionary with player's name as the key
    player_logs = {}
    for i in players.index:
        try: # Player logs for some players with limited playing time omit some standard columns, which breaks the below code 
            player, id = players["Player"][i], players["ID"][i]
            url = f'https://www.basketball-reference.com/players/{id[0]}/{id}/gamelog/2022'
            log = pd.read_html(url,attrs={"id":"pgl_basic"})[0].drop(columns=["Rk","G","Age","FG%","FT%"])
            # Drop, rename, and create columns
            if "3P%" in log.columns:
                log = log.drop(columns="3P%")
            for i in log.index:
                if (i % 20 == 0 and i != 0) or not str(log["PTS"][i]).isnumeric():
                    log.drop(i,inplace=True)
            log["Unnamed: 5"] = ["Road" if i=="@" else "Home" for i in log["Unnamed: 5"]]
            log.rename(columns={"Unnamed: 5": "Court"}, inplace=True)
            log["W/L"] = [i.split(" ")[0] for i in log["Unnamed: 7"]]
            log["Spread"] = [i.split(" ")[1] for i in log["Unnamed: 7"]]
            log["Spread"] = [int(i.split("+")[1].rstrip(")")) if "+" in i else int(i.split("(")[1].rstrip(")")) for i in log["Spread"]]
            log["+/-"] = log["+/-"].fillna(0).astype("int64")
            log.drop(columns=("Unnamed: 7"),inplace=True)
            # Change datatypes of numeric columns to ints and convert decimal columns to floats
            for i in log:
                try:
                    log[i] = log[i].astype("int64")
                except:
                    pass
            log["MP"] = [round(int(i.split(":")[0]) + int(i.split(":")[1])/60,1) for i in log["MP"]] # Convert minutes played to a decimal
            log["GmSc"] = log["GmSc"].astype("float64")
            stats = ["Date","Tm","Opp","Court","W/L","Spread","+/-","GS","MP","GmSc","PTS","FG",
                     "FGA","3P","3PA","FT","FTA","ORB","DRB","TRB","AST","BLK","STL","TOV","PF"]
            # Fill empty stats with zeroes
            for stat in stats:
                if stat not in log.columns:
                    log[stat] = [0 for i in range(len(log))]
            player_logs[player] = log[stats].reset_index(drop=True)
        except:
            pass
    return player_logs
    
player_logs = get_player_logs()

In [5]:
def get_games():
    """
    Combine games played from each player's game log into nested dictionaries that can be indexed into by matchup and date in either order.
    Matchup formatted with both teams' abbreviations in alphabetical order split by a hyphen.
    Date formatted as "yyyy-mm-dd"
    """
    games = {}
    for player_log in [(name,player_logs[name][player_logs[name].columns]) for name in player_logs]:
        name = player_log[0]
        log = player_log[1]
        log["Player"] = name
        for idx in log.index:
            tms = sorted([log["Tm"][idx],log["Opp"][idx]])
            MU = f'{tms[0]}-{tms[1]}'
            date = log["Date"][idx]
            if MU not in games:
                games[MU] = {}
            if date not in games[MU]:
                games[MU][date] = pd.DataFrame()
            if date not in games:
                games[date] = {}
            if MU not in games[date]:
                games[date][MU] = pd.DataFrame()
            games[MU][date] = pd.concat([games[MU][date],log[idx:idx+1]]).reset_index(drop=True)
            games[date][MU] = pd.concat([games[date][MU],log[idx:idx+1]]).reset_index(drop=True)
    return games
    
games = get_games()

In [6]:
def team_logs(team,opponent="ANY"):
    """
    Look up player stats for all games played by a certain team.
    Option to view specific team matchups with 'opponent' parameter
    """
    if team in teams:
        MU_info = []
        team_games = []
        for MU in games:
            if opponent in teams:
                if team in MU and opponent in MU:
                    for date in list(games[MU]):
                        MU_info.append((date,MU))
            else:
                if team in MU:
                    for date in list(games[MU]):
                        MU_info.append((date,MU))
        for info in sorted(MU_info):
            team_games.append(games[info[0]][info[1]])
            
        cols = ["Player","Team","Pos","Ht","Wt","GS","MP","FG","FGA","3P",
                "3PA","FT","FTA","PTS","TRB","AST","BLK","STL","TOV","PF"]
        for i,v in enumerate(team_games):
            team_games[i] = pd.merge(players[cols[:5]],v,how="inner")[cols].sort_values(["Team","GS","PTS"],ascending=False,ignore_index=True)

        return team_games
    raise NameError("You entered an invalid team abbrevation")

In [7]:
def get_player_offense(filter="totals",team="ALL",position="All",minutes=0):
    """
    Create a sortable leaderboard for player stats.
    Filter: Three options to view the player stats
            - 'totals' returns player totals for the season
            - 'means' returns player averages per game
            - 'per_36' returns player averages per 36 minutes played
    Team: Look up only the player stats for a certain team
            - Any team abbreviation from 'teams' is allowed
    Position: Look up only the players stats for a certain position
            - Available Options: 'PG','SG','SF','PF','C'
    Minutes: Look up only the player stats for players with a minimum amount of minutes played
    """

    aggregates = {}
    for player in player_logs:
        if player_logs[player]["MP"].sum()>minutes:
            aggregates[player] = {}
            # aggregates[player]["GP"] = len(player_logs[player])
            for stat in player_logs[player]:
                if player_logs[player][stat].dtype == "int64" or player_logs[player][stat].dtype == "float64":
                    if filter == "totals":
                        aggregates[player][stat] = player_logs[player][stat].sum()
                    elif filter == "means":
                        aggregates[player][stat] = round(player_logs[player][stat].mean(),1)
                    elif filter == "per_36":
                        aggregates[player][stat] = round(player_logs[player][stat].sum()/player_logs[player]["MP"].sum()*36,1)

    leaderboard = pd.DataFrame(aggregates).transpose().dropna()
    leaderboard = pd.merge(players[players.columns[:6]],leaderboard,how="inner",left_on="Player",right_on=leaderboard.index)

    cols = (list(leaderboard.columns[:6])+list(leaderboard.columns[9:25]))
    cols.remove("ORB"), cols.remove("DRB")

    if filter=="totals":
        for stat in cols[-12:]:
            leaderboard[stat] = leaderboard[stat].astype("int64")
    if team in teams:
        leaderboard = leaderboard[leaderboard["Team"]==team]
    if position in players["Pos"].unique():
        leaderboard = leaderboard[leaderboard["Pos"]==position]

    return leaderboard[cols].sort_values(by="GmSc",ascending=False)

In [8]:
def get_team_offense(filter="means",position="ALL",minutes=0,min_ht=min(players["Ht"]),max_ht=max(players["Ht"])):
    """
    Create a sortable leaderboard for all team stats.
    Filter: Three options to view the team stats
            - 'totals' returns player totals for the season
            - 'means' returns player averages per game
            - 'per_36' returns player averages per 36 minutes played
    Position: Include only the stats for a certain position
            - Available Options: 'PG','SG','SF','PF','C'
    Minutes: Include only the stats for players with a minimum amount of minutes played
    Min_Ht: Only include stats from players with a minimum height expressed in inches as an integer
    Max_Ht: Only include stats from players with a maximum height expressed in inches as an integer
    """
    totals = get_player_offense("totals")
    cols = [col for col in totals if totals[col].dtype=="int64"]
    team_totals = {}
    for tm in teams:
        team = totals[totals["Team"]==tm]
        if position in players["Pos"].unique():
            team = team[team["Pos"]==position]
        team = team[(team["Ht"]>=min_ht) & (team["Ht"]<=max_ht)]
        team_totals[tm] = [team[col].sum() for col in cols]

    df = pd.DataFrame(team_totals).transpose()
    for stat,col in zip(df,cols):
        df.rename(columns={stat:col},inplace=True)
    if filter=="totals":
        return df.sort_values(by="PTS",ascending=False)

    if filter=="means":
        for stat in df:
            df[stat] = df[stat].astype("float64")
        for team in teams:
            GP = 0
            MUs = [list(games[i].keys()) for i in games if team in i]
            for i in MUs:
                for j in i:
                    GP += 1
            for stat in df.columns[:3]:
                df.loc[team][stat] = round(players[players["Team"]==team][stat].mean(),1)
            for stat in df.columns[3:]:
                df.loc[team][stat] = round(df.loc[team][stat]/GP,1)
        return df.sort_values(by="PTS",ascending=False)
    raise NameError("The filter parameter must be either 'totals' or 'means")

In [9]:
def get_team_defense(filter="totals",position="ALL",roster="FULL",min_ht=min(players["Ht"]),max_ht=max(players["Ht"])):
    """
    Create a sortable leaderboard for all stats against each team.
    Filter: Three options to view the team stats
            - 'totals' returns player totals for the season
            - 'means' returns player averages per game
            - 'per_36' returns player averages per 36 minutes played
    Position: Include only the stats for a certain position
            - Available Options: 'PG','SG','SF','PF','C'
    Minutes: Include only the stats for players with a minimum amount of minutes played
    Min_Ht: Only include stats from players with a minimum height expressed in inches as an integer
    Max_Ht: Only include stats from players with a maximum height expressed in inches as an integer
    """
    if filter=="totals" or filter=="means" or filter=="per_36":
        team_totals = {}
        for team in teams:
            opp_logs = pd.DataFrame()
            logs = team_logs(team)
            for log in logs:
                opp_stats = log[~log["Player"].isin(players[players["Team"]==team]["Player"])]
                if position in players["Pos"].unique():
                    opp_stats = opp_stats[opp_stats["Pos"]==position]
                if type(roster)==int:
                    opp_stats = opp_stats[opp_stats["GS"]==roster]
                opp_stats = opp_stats[(opp_stats["Ht"]>=min_ht) & (opp_stats["Ht"]<=max_ht)]
                opp_logs = pd.concat([opp_stats,opp_logs])
            cols_sums = [("GP",len(logs))]+[(stat,opp_logs[stat].sum()) for stat in opp_logs.columns[6:] if opp_logs[stat].dtype=="int64" or opp_logs[stat].dtype=="float64"]
            team_totals[team] = [sums[1] for sums in cols_sums]
        cols = [cols[0] for cols in cols_sums]
        df = pd.DataFrame(team_totals).transpose()
        for name,num in zip(cols,df.columns):
            df.rename({num:name},axis=1,inplace=True)
            if filter=="means" and name!="GP":
                df[name] = round(df[name]/df["GP"],1)
            elif filter=="per_36" and name!="GP" and name!="MP":
                df[name] = round(df[name]/df["MP"]*36,1)
            else:
                df[name] = df[name].astype("int64")
        if filter=="means" or filter=="per_36":
            df["FG%"] = round(df["FG"]/df["FGA"],3)
            df["3P%"] = round(df["3P"]/df["3PA"],3)
        return df.sort_values(by="PTS")
    raise NameError("Filter parameter only accepts three keywords: 'totals,' 'means,' or 'per_36'")


team_def_means = get_team_defense("means")
team_def_C  = get_team_defense("per_36","C")
team_def_PF = get_team_defense("per_36","PF")
team_def_SF = get_team_defense("per_36","SF")
team_def_SG = get_team_defense("per_36","SG")
team_def_PG = get_team_defense("per_36","PG")

In [10]:
def get_injury_report(team="ALL"):
    """
    Get players with designated injuries from basketball-reference and determine their game status for the day
    """
    df = pd.read_html("https://www.basketball-reference.com/friv/injuries.fcgi")[0]

    designation = []
    for i in df["Description"]:
        if "OUT" in i:
            designation.append("Out")
        elif "DOUBTFUL" in i:
            designation.append("Doubtful")
        elif "PROBABLE" in i:
            designation.append("Probable")
        elif "out" in i.lower():
            designation.append("Out")
        else:
            designation.append("Questionable")

    df["Designation"] = designation
    df["Update"] = [(datetime.datetime.now()-datetime.datetime.strptime(i,"%a, %b %d, %Y")).days for i in df["Update"]]
    df["Team"] = [nicknames[i.split()[-1]] for i in df["Team"]]
    df["Type"] = [i.split("(")[1].split(")")[0] for i in df["Description"]]
    df["Surgery"] = ["surgery" in i.lower() for i in df["Description"]]
    
    df = df[["Player","Team","Designation","Update","Type","Surgery","Description"]].sort_values(by="Update")
    if team in teams:
        return df[df["Team"]==team]
    return df

injury_report = get_injury_report()

if "Mohamed Bamba" in injury_report["Player"]:
    mo_bam = injury_report[injury_report["Player"]=="Mohamed Bamba"].index[0]
    injury_report.loc[mo_bam,"Player"] = "Mo Bamba"


def injury_filter(team,filter="means",minutes=0):
    """
    Get only players not placed on the injury report for a given team.
    Team: Look up only the player stats for a certain team
            - Any team abbreviation from 'teams' is allowed
    Filter: Three options to view the team stats
            - 'totals' returns player totals for the season
            - 'means' returns player averages per game
            - 'per_36' returns player averages per 36 minutes played
    Minutes: Include only the stats for players with a minimum amount of minutes played
    """
    df = get_player_offense(filter,team=team,minutes=minutes)
    return df[~df["Player"].isin(injury_report["Player"])]

# Return players with name inconsistencies between injury report and player data
injury_report[~injury_report["Player"].isin(players["Player"])]

Unnamed: 0,Player,Team,Designation,Update,Type,Surgery,Description
16,Justin Jackson,BOS,Out,6,Health and Safety Protocols,False,Out (Health and Safety Protocols) - The Celtic...
121,Daulton Hommes,NOP,Out,33,Fibula,False,Out (Fibula) - Hommes is dealing with a right ...


In [11]:
def get_matchups(date_=datetime.date.today()):
    """
    Get game matchups and times from basketball-reference for a given date.
    Date: formatted 'yyyy-mm-dd'; uses current date as the default
    """
    month_ = date_.strftime("%B").lower()
    url = f"https://www.basketball-reference.com/leagues/NBA_2022_games-{month_}.html"
    df = pd.read_html(url)[0].rename(columns={"Visitor/Neutral":"Road","Home/Neutral":"Home"})
    df = df.iloc[:,:5].drop(columns="PTS")
    df["Date"] = [datetime.datetime.strptime(i,"%a, %b %d, %Y").date() for i in df["Date"]]
    df["Road"] = [nicknames[i.split()[-1]] for i in df["Road"]]
    df["Home"] = [nicknames[i.split()[-1]] for i in df["Home"]]
    df["MU"] = [f"{sorted(i)[0]}-{sorted(i)[1]}" for i in df[["Road","Home"]].values]
    df = df[df["Date"]==date_]
    return df

games_today = get_matchups()

In [12]:
def get_dashboard(team,category):
    """
    An attempt to simplify and condense many of the previous functions for quicker matchup analysis.
    Team: Look up only the player stats for a certain team
            - Any team abbreviation from 'teams' is allowed
    Category: Look up certain aspects of a given matchup
            - History: The previous player logs for each matchup between two teams.
            - Injuries: Shows injury status of all current players from both teams in a matchup.
            - Rosters: Shows current full roster of both teams in a matchup.
            - Overall: Look up team defense stats for both teams in a matchup.
                - Can also look up team defenses against certain positions by passing in that position abbreviation
                    - ex("PG","SG","SF","PF","C") 
    """
    category == category.capitalize()
    if team not in teams:
        raise NameError("Invalid team selection")
    if category not in ["History","Injuries","Rosters","Overall"]+list(players["Pos"].unique()):
        raise NameError("Invalid category selection")

    if team in list(games_today["Home"])+list(games_today["Road"]):
        dfs = {"PG":team_def_PG,"SG":team_def_SG,"SF":team_def_SF,"PF":team_def_PF,"C":team_def_C,"Overall":team_def_means}
        cols = ["Player","Tm","Court","GS","MP","PTS","AST","TRB","FG","FGA",
                "3P","3PA","FT","FTA","BLK","STL","TOV","PF","+/-","GmSc"]

        for MU in games_today["MU"]:
            if team in MU:
                tm1,tm2 = MU.split("-")[0], MU.split("-")[1]
                if category=="History":
                    return team_logs(tm1,tm2)
                if category=="Injuries":
                    return injury_report[(injury_report["Team"]==tm1) | (injury_report["Team"]==tm2)].sort_values("Team")
                if category=="Rosters":
                    return injury_filter(tm1,minutes=200).append(injury_filter(tm2,minutes=200))
                if category in players["Pos"].unique() or category=="Overall":
                    df = dfs[category]
                    aggregates = df.agg(["mean","std"])
                    for i in aggregates:
                        df[i] = round((df[i]-aggregates[i]["mean"])/aggregates[i]["std"],2)
                    return df[(df.index==tm1) | (df.index==tm2)]

    return f"{team} doesn't play today"

In [13]:
matchup = 0
tm1, tm2 = games_today.iloc[matchup,2], games_today.iloc[matchup,3]
games_today

Unnamed: 0,Date,Start (ET),Road,Home,MU
196,2021-12-30,7:00p,PHI,BRK,BRK-PHI
197,2021-12-30,7:00p,MIL,ORL,MIL-ORL
198,2021-12-30,7:00p,CLE,WAS,CLE-WAS
199,2021-12-30,9:30p,GSW,DEN,DEN-GSW


In [14]:
get_dashboard(tm1,"Injuries")

Unnamed: 0,Player,Team,Designation,Update,Type,Surgery,Description
23,Kevin Durant,BRK,Questionable,1,Health protocols,False,Day To Day (Health protocols) - Durant is prob...
21,LaMarcus Aldridge,BRK,Questionable,1,Health protocols,False,Day To Day (Health protocols) - Aldridge is pr...
22,David Duke,BRK,Out,7,Health and safety protocols,False,Out (Health and safety protocols) - The Nets p...
24,Kessler Edwards,BRK,Out,7,Health and safety protocols,False,Out (Health and safety protocols) - The Nets p...
26,Day'Ron Sharpe,BRK,Out,12,Health and Safety Protocols,False,Out (Health and Safety Protocols) - The Nets p...
25,Joe Harris,BRK,Out,31,Ankle,True,Out (Ankle) - Nets coach Steve Nash said Harri...
149,Shake Milton,PHI,Questionable,1,Health and safety protocols,False,Day To Day (Health and safety protocols) - Mil...
148,Danny Green,PHI,Out,8,Health Protocols,False,Out (Health Protocols) - Green is listed as OU...
150,Ben Simmons,PHI,Out,70,Back,False,Out (Back) - Simmons is not expected to play f...


In [15]:
injury_filter(tm1,"per_36",minutes=300)

Unnamed: 0,Player,Team,Pos,Ht,Wt,Age,MP,GmSc,PTS,FG,FGA,3P,3PA,FT,FTA,TRB,AST,BLK,STL,TOV
242,Joel Embiid,PHI,C,84.0,280.0,27.0,36.0,23.3,27.9,8.7,18.6,1.5,3.7,9.1,11.1,11.8,4.4,1.5,1.2,2.8
239,Tobias Harris,PHI,PF,80.0,226.0,29.0,36.0,15.0,19.6,7.3,15.9,1.2,4.0,3.8,4.5,8.1,3.8,0.6,0.5,1.8
237,Andre Drummond,PHI,C,82.0,279.0,28.0,36.0,14.1,11.0,4.5,9.0,0.0,0.1,2.0,3.5,17.1,3.5,2.0,1.9,3.1
233,Tyrese Maxey,PHI,PG,74.0,200.0,21.0,36.0,13.3,17.2,6.5,13.9,1.2,3.4,3.0,3.4,3.5,4.7,0.6,0.7,1.3
234,Seth Curry,PHI,SG,74.0,185.0,31.0,36.0,12.3,16.6,6.2,12.2,2.2,5.5,1.9,2.2,3.1,3.5,0.2,0.8,1.8
235,Georges Niang,PHI,PF,79.0,230.0,28.0,36.0,10.7,16.2,5.9,13.0,3.1,7.9,1.4,1.6,4.0,2.2,0.4,0.9,1.2
236,Furkan Korkmaz,PHI,SG,79.0,202.0,24.0,36.0,10.1,14.2,5.2,13.6,2.3,7.8,1.5,1.8,4.7,3.6,0.2,1.0,1.4
238,Matisse Thybulle,PHI,SG,77.0,201.0,24.0,36.0,7.6,7.8,3.2,6.9,1.0,3.5,0.4,0.4,3.3,1.5,1.6,2.4,1.2


In [16]:
injury_filter(tm2,"per_36",minutes=200)

Unnamed: 0,Player,Team,Pos,Ht,Wt,Age,MP,GmSc,PTS,FG,FGA,3P,3PA,FT,FTA,TRB,AST,BLK,STL,TOV
26,James Harden,BRK,PG,77.0,220.0,32.0,36.0,18.8,21.8,6.3,15.2,2.5,7.3,6.6,7.5,7.9,9.7,0.7,1.4,4.8
36,Nic Claxton,BRK,C,83.0,215.0,22.0,36.0,13.2,14.3,6.3,9.6,0.0,0.0,1.6,3.0,8.2,1.3,2.3,0.5,1.2
24,Patty Mills,BRK,PG,73.0,180.0,33.0,36.0,12.0,17.0,6.0,13.3,4.0,9.1,1.0,1.1,2.4,3.1,0.3,0.7,1.3
27,DeAndre' Bembry,BRK,SF,77.0,210.0,27.0,36.0,10.0,11.0,4.8,8.0,0.8,1.6,0.6,1.3,5.9,2.2,1.0,1.6,1.4
29,Bruce Brown,BRK,SF,76.0,202.0,25.0,36.0,9.9,11.3,4.9,10.1,0.5,1.6,1.0,1.3,6.8,1.9,0.6,2.0,1.0
31,Blake Griffin,BRK,PF,81.0,250.0,32.0,36.0,9.7,11.2,4.1,11.1,1.1,5.3,2.0,2.9,8.5,3.6,0.8,1.0,0.9
32,Cam Thomas,BRK,SG,76.0,210.0,20.0,36.0,9.0,14.1,5.5,14.5,1.3,5.7,1.7,2.1,4.7,2.7,0.4,0.9,1.2
34,James Johnson,BRK,PF,79.0,240.0,34.0,36.0,8.7,9.5,4.2,8.9,0.5,1.8,0.6,1.5,7.9,3.7,0.9,1.0,1.2
33,Paul Millsap,BRK,PF,79.0,257.0,36.0,36.0,8.6,10.6,4.0,11.4,0.9,3.7,1.7,2.4,11.4,3.3,1.2,0.7,1.4
25,Jevon Carter,BRK,PG,73.0,200.0,26.0,36.0,4.8,7.5,2.5,9.2,2.1,7.2,0.3,0.4,4.8,3.2,0.8,1.1,1.4


In [17]:
team_logs(tm1,tm2)[-1]

Unnamed: 0,Player,Team,Pos,Ht,Wt,GS,MP,FG,FGA,3P,3PA,FT,FTA,PTS,TRB,AST,BLK,STL,TOV,PF
0,Joel Embiid,PHI,C,84.0,280.0,1,33.2,11,20,1,3,9,11,32,9,6,0,1,2,4
1,Seth Curry,PHI,SG,74.0,185.0,1,38.5,13,21,3,9,0,0,29,3,4,0,1,1,3
2,Shake Milton,PHI,PG,77.0,205.0,1,33.9,5,13,2,5,1,2,13,6,0,1,1,3,5
3,Tobias Harris,PHI,PF,80.0,226.0,1,36.8,3,17,0,5,5,5,11,7,5,1,0,1,5
4,Danny Green,PHI,SF,78.0,215.0,1,39.8,2,7,2,7,2,2,8,2,0,1,2,0,2
5,Matisse Thybulle,PHI,SG,77.0,201.0,0,18.2,3,5,2,4,0,0,8,2,1,0,2,0,1
6,Andre Drummond,PHI,C,82.0,279.0,0,14.8,2,2,0,0,0,0,4,6,2,0,3,2,2
7,Isaiah Joe,PHI,SG,76.0,165.0,0,24.8,0,4,0,1,0,0,0,3,1,1,0,0,0
8,Kevin Durant,BRK,PF,82.0,240.0,1,39.0,13,24,2,3,6,6,34,11,8,1,0,4,1
9,Blake Griffin,BRK,PF,81.0,250.0,1,31.8,7,16,1,5,2,2,17,9,2,0,0,0,3


In [18]:
# results = {}
# results = {team:[] for team in teams}
# day = datetime.date(2021,10,19)
# while day != datetime.date.today():
#     for team in teams:
#         try:
#             for MU in games[str(day)]:
#                 if team in MU:
#                     game = games[str(day)][MU][["Tm","W/L","Spread","Court","Opp"]]
#                     game = [(game["W/L"][i],game["Spread"][i],game["Court"][i],game["Opp"][i]) for i,v in enumerate(game["Tm"]) if v==team][0]
#                     results[team].append((day,game[0],game[1],game[2],game[3]))
#         except:
#             pass
#     day = day+datetime.timedelta(days=1)
# for team in teams:
#     for result in results[team]:
#         results[team] = pd.DataFrame(results[team],columns=["Date","W/L","Spread","Court","Opp"])
#         results[team]["Rest"] = [(results[team]["Date"][i]-results[team]["Date"][i-1]-datetime.timedelta(1)).days if i!=0 else 3 for i in results[team]["Date"].index]


In [19]:
### TODO: Combine all team result dataframes together and find home court advantage, W% by days rest, etc.
# results["CHI"]["W"] = [True if i=="W" else False for i in results["CHI"]["W/L"]]
# results["CHI"]["L"] = [True if i=="L" else False for i in results["CHI"]["W/L"]]
# results["CHI"]["Home"] = [True if i=="Home" else False for i in results["CHI"]["Court"]]
# results["CHI"].aggregate({"W":"sum","L":"sum","Spread":"sum"})

In [20]:
### TODO: Create individual defense leaderboard
# x = team_logs("CHI")[-1]
# x["Pos"] = [5 if i=="C" else i for i in[4 if i=="PF" else i for i in [3 if i=="SF" else i for i in [2 if i=="SG" else i for i in [1 if i=="PG" else i for i in x["Pos"]]]]]]
# x["Team"] = [1 if i=="CHI" else 0 for i in x["Team"]]
# x = x[x["GS"]==1].sort_values(["Team","Pos","Ht"]).reset_index(drop=True)
# x

What makes a good rebounder?
1) Height
2) Wingspan
3) Relative Weight
3) Positioning on the court when shots are taken
4) Hustle

How to check for good rebounders?
 - Compare each player's TRB to other players of the same size / wingspan
    - TRB-Ht: R = .459
    - TRB-Wt: R = .504
 - Check for a negative correlation for 3PA and TRB with centers (R = -.327)

In [21]:
# centers_per_36 = get_player_offense(filter="per_36",position="C")
# centers_per_36["PPI"] = round(centers_per_36["Wt"] / centers_per_36["Ht"],2)
# centers_per_36["zHT"] = round((centers_per_36["Ht"]-centers_per_36["Ht"].mean())/centers_per_36["Ht"].std(),2)
# centers_per_36["zPPI"]= round((centers_per_36["PPI"]-centers_per_36["PPI"].mean())/centers_per_36["PPI"].std(),2)
# centers_per_36["z3PA"]= -round((centers_per_36["3PA"]-centers_per_36["3PA"].mean())/centers_per_36["3PA"].std(),2)
# centers_per_36["Z"]= round(sum([centers_per_36["zHT"],centers_per_36["zPPI"],centers_per_36["z3PA"]])/3,2)

# centers_per_36.sort_values(by="Z",ascending=True)[:30].drop(columns=["Pos","MP","Wt","3P","3PA"])

### Compare two teams
# centers_per_36[(centers_per_36["Team"]=="MIA") | (centers_per_36["Team"]=="CHI")].drop(columns=["Pos","MP","Wt","3P","3PA"])

### Find stats that relate to each other
# centers_per_36.corr()

### Reveal normal rebounding range for position
# print(f'{centers_per_36["TRB"].mean()-centers_per_36["TRB"].std()} - {centers_per_36["TRB"].mean()+centers_per_36["TRB"].std()}')