# Estimate Today's NBA lineup

Create an estimate of today's NBA lineups with additional markers for sense-checking or manually overriding projected lineups:

- add markers for those on injury report
- add markers for who played in previous games
- add markers for who started in previous games
- add markers for who recently changed starting status
- add links to player news (TODO)

## Player News

https://www.rotoworld.com/api/player_news

## Setup

In [1]:
import urllib3

import pymongo
import pandas as pd
from ohmysportsfeedspy import MySportsFeeds
from bs4 import BeautifulSoup
import textdistance

In [2]:
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

In [3]:
mongo_password = "XAf5vfZWQ8Uis5eQ"
client = pymongo.MongoClient(
    f"mongodb+srv://nba-fantasy:{mongo_password}@nba-fantasy-hu5fx.gcp.mongodb.net/test?retryWrites=true&w=majority"
)

In [4]:
msf = MySportsFeeds(version="2.0")
msf.authenticate("eb73bbc8-9751-4d78-bd32-ea51bf", "MYSPORTSFEEDS")

## MSF Expected Lineup

In [5]:
today = '20200116'
cursor = client["nbafantasy"]["lineup"].aggregate(
    [
        {
            '$addFields': {
                'gameDate': {
                    '$dateToString': {
                        'date': {
                            '$add': [
                                {
                                    '$toDate': '$game.startTime'
                                }, -18000000
                            ]
                        }, 
                        'format': '%Y%m%d'
                    }
                }
            }
        }, {
            '$match': {
                'gameDate': today
            }
        },
        {'$unwind': {'path': '$teamLineups'}},
        {'$unwind': {'path': '$teamLineups.expected.lineupPositions'}},
        {
            '$match': {
                'teamLineups.expected.lineupPositions.player': {'$ne': None}
            }
        },
        {
            '$project': {
                '_id': 0, 
                'game_id': '$game.id', 
                'team_id': '$teamLineups.team.id',
                'team': '$teamLineups.team.abbreviation', 
                'player_id': '$teamLineups.expected.lineupPositions.player.id',
                'first_name': '$teamLineups.expected.lineupPositions.player.firstName',
                'last_name': '$teamLineups.expected.lineupPositions.player.lastName',
                'player_role': '$teamLineups.expected.lineupPositions.position'
            }
        },
        {
            '$sort': {
                'game_id': 1, 
                'team_id': 1, 
                'player_role': 1
            }
        }
    ]
)

msf_lineup = (
    pd.DataFrame(cursor)
    .assign(player_role=lambda x: x["player_role"].str.replace("[0-9]", ""))
)

In [6]:
msf_lineup.head()

Unnamed: 0,game_id,team_id,team,player_id,first_name,last_name,player_role
0,53652,95,ORL,9492,Terrence,Ross,Bench
1,53652,95,ORL,15524,BJ,Johnson,Bench
2,53652,95,ORL,15310,Gary,Clark,Bench
3,53652,95,ORL,15278,Mohamed,Bamba,Bench
4,53652,95,ORL,9406,Aaron,Gordon,Starter


In [7]:
teams_playing = msf_lineup.team_id.unique()

## Team Rosters

In [8]:
cursor = client["nbafantasy"]["player"].aggregate(
    [
        {
            '$project': {
                '_id': 0,
                'player_id': '$_id',
                'first_name': '$firstName',
                'last_name': '$lastName',
                'jersey_number': '$jerseyNumber',
                'contract_start': '$currentContractYear.seasonStartYear',
                'contract_duration': '$currentContractYear.overallContract.totalYears',
                'team_id': '$currentTeam.id',
                'team': '$currentTeam.abbreviation',
                'roster_status': '$currentRosterStatus',
            }
        },
        {
            '$sort': {
                'player_id': 1, 
                'team_id': 1, 
            }
        }
    ]
)

team_roster = pd.DataFrame(cursor)

In [9]:
team_roster.head()

Unnamed: 0,player_id,first_name,last_name,jersey_number,contract_start,contract_duration,team_id,team,roster_status
0,9082,Al,Horford,42,2017.0,4.0,85.0,PHI,ROSTER
1,9083,Mike,Scott,30,2017.0,1.0,85.0,PHI,ROSTER
2,9084,Dennis,Schroder,17,2017.0,4.0,96.0,OKL,ROSTER
3,9085,Kirk,Hinrich,12,,,,,RETIRED
4,9086,Kent,Bazemore,24,2017.0,4.0,97.0,POR,ROSTER


## Rotogrinders lineups

In [10]:
http = urllib3.PoolManager()

In [11]:
url = "https://rotogrinders.com/lineups/nba"
response = http.request('GET', url)



In [12]:
soup = BeautifulSoup(response.data, 'lxml')

In [13]:
lineup = []
for blk in soup.find_all("div", attrs={"class": "blk crd lineup"}):
    teams = blk.find_all("span", attrs={"class": "shrt"})
    for i, ul in enumerate(blk.find_all("ul", attrs={"class": "players"})):
        for j, li in enumerate(ul.find_all("li", attrs={"class": "player"})):
            team = teams[i].text
            starter = 1 if j < 5 else 0
            player = li.find("a").get("title")
            lineup.append({"team": team, "player": player, "starter": starter})

In [14]:
rotogrinders_lineup = (
    pd.DataFrame(lineup)
    .drop_duplicates(subset=["team", "player"])
)

In [15]:
team_map = {
    "BKN": "BRO",
    "OKC": "OKL",
    "PHO": "PHX"
}

In [16]:
expected_lineup = (
    rotogrinders_lineup
    .assign(team=lambda x: x["team"].apply(lambda y: team_map.get(y, y)))
    .assign(player_id=
        lambda rg_df: (rg_df["team"] + rg_df["player"].str.lower()).apply(
            lambda rg_player: team_roster.loc[
                (team_roster["team"].fillna("NA") + team_roster["first_name"].str.lower() + " " + team_roster["last_name"].str.lower())
                .apply(lambda msf_player: int(msf_player[0:3] == rg_player[0:3]) + textdistance.damerau_levenshtein.normalized_similarity(msf_player, rg_player)).idxmax(),
            ]["player_id"]
        )
    )
    .assign(match_quality=
        lambda rg_df: (rg_df["team"] + rg_df["player"].str.lower()).apply(
            lambda rg_player: 
                (team_roster["team"].fillna("NA") + team_roster["first_name"].str.lower() + " " + team_roster["last_name"].str.lower())
                .apply(lambda msf_player: textdistance.damerau_levenshtein.normalized_similarity(msf_player, rg_player)).max()
        )
    )
    .sort_values(by=['player_id', 'match_quality'], ascending=[True, False])
    .drop_duplicates(subset=["player_id"])
    .merge(msf_lineup[["team", "team_id", "game_id"]].drop_duplicates())
)

In [17]:
expected_lineup.to_csv("data/rotogrinders_expected_lineup.csv", index=False)
expected_lineup.to_csv(f"data/rotogrinders_expected_lineup_{today}.csv", index=False)

In [18]:
len(expected_lineup.team.unique())

10

## Injury Report

In [None]:
payload = msf.msf_get_data(
    league="nba",
    feed="player_injuries",
    format="json"
)

In [None]:
injury_report = (
    pd.DataFrame(payload["players"])
    .rename(
        columns={
            "id": "player_id"
        }
    )
    .pipe(
        lambda x: pd.concat(
            [
                x[["player_id", "firstName", "lastName"]],
                x["currentInjury"].apply(pd.Series),
                x["currentTeam"].apply(pd.Series)
            ],
            axis=1
        )
    )
    .rename(
        columns={
            "playingProbability": "injury_status",
            "description": "injury_description",
            "currentRosterStatus": "roster_status",
            "firstName": "first_name",
            "lastName": "last_name",
            "abbreviation": "team",
            "id": "team_id",
        }
    )
)

In [None]:
injury_report.head()

## Historical Lineups

In [None]:
cursor = client["nbafantasy"]["lineup"].aggregate(
    [
        {'$unwind': {'path': '$teamLineups'}},
        {'$unwind': {'path': '$teamLineups.actual.lineupPositions'}},
        {
            '$match': {
                'teamLineups.actual.lineupPositions.player': {'$ne': None}
            }
        },
        {
            '$project': {
                '_id': 0,
                'game_id': '$game.id',
                'date': '$game.startTime',
                'team_id': '$teamLineups.team.id', 
                'team': '$teamLineups.team.abbreviation',
                'player_id': '$teamLineups.actual.lineupPositions.player.id', 
                'first_name': '$teamLineups.actual.lineupPositions.player.firstName', 
                'last_name': '$teamLineups.actual.lineupPositions.player.lastName', 
                'player_role': '$teamLineups.actual.lineupPositions.position',
                'position': '$teamLineups.actual.lineupPositions.player.position'
            }
        },
        {
            '$sort': {
                'game_id': 1, 
                'team_id': 1, 
                'player_role': 1
            }
        }
    ]
)

historical_lineups = (
    pd.DataFrame(cursor)
    .assign(player_role=lambda x: x["player_role"].str.replace("[0-9]", ""))
    .assign(date=lambda x: pd.to_datetime(x["date"]))
    # there are ~200 cases of duplicate player roles (assume starter is correct)
    .sort_values(by=["game_id", "team_id", "player_id", "player_role"])
    .loc[lambda x: ~x.duplicated(subset=["game_id", "team_id", "player_id"], keep="last")]
)

In [None]:
historical_lineups.head()

## Historical performance

In [None]:
game_stats_cursor = client["nbafantasy"]["gamelog"].aggregate([
    {
        "$project": {
            "_id": 0,
            "game_id": "$game.id",
            "team_id": "$team.id",
            "player_id": "$player.id",
            "date": "$game.startTime",
            "fg3m": "$stats.fieldGoals.fg3PtMade",
            "reb": "$stats.rebounds.reb",
            "ast": "$stats.offense.ast",
            "pts": "$stats.offense.pts",
            "tov": "$stats.defense.tov",
            "stl": "$stats.defense.stl",
            "blk": "$stats.defense.blk",
            "fta": "$stats.freeThrows.ftAtt",
            "ftm": "$stats.freeThrows.ftMade",
            "fga": "$stats.fieldGoals.fgAtt",
            "fgm": "$stats.fieldGoals.fgMade",
            "minSeconds": "$stats.miscellaneous.minSeconds",
        }
    }
])
game_stats = (
    pd.DataFrame(game_stats_cursor)
    .assign(date=lambda x:pd.to_datetime(x["date"]).dt.tz_convert('US/Eastern'))
)

In [None]:
avg_stats = (
    game_stats
    .assign(minutes=lambda x: x["minSeconds"] / 60)
    .groupby(["player_id"])
    .apply(lambda x: x.nlargest(10, 'date')[["minutes", 'pts']].mean())
    .reset_index()
)

## Most recent game played

In [None]:
last_game_played = (
    historical_lineups
    .groupby("player_id")
    .apply(lambda x: x.nlargest(1, "date"))
    .reset_index(drop=True)
    .assign(played_this_season=lambda x: x.date.gt("2019-09-01").astype(int))
    .rename(columns={"date": "last_game", "player_role": "role_in_last_game", "team": "last_team", "team_id": "last_team_id"})
    .drop(columns=["game_id", "position"])
)

## Last ten games played

In [None]:
last_10_games_played = (
    historical_lineups
    .assign(starter=lambda x: x["player_role"].eq("Starter").astype(int))
    .groupby("player_id")
    .apply(lambda x: x.nlargest(10, "date"))
    .reset_index(drop=True)
    .groupby("player_id")["starter"].mean()
    .rename("percent_started")
    .reset_index()
)

## Played in team's most recent game

In [None]:
last_10_team_games = (
    historical_lineups
    .groupby(["team_id", "game_id"])
    .apply(lambda x: x.head(1)[["team_id", "game_id", "date", "team"]])
    .reset_index(drop=True)
    .groupby(["team_id"])
    .apply(lambda x: x.nlargest(10, "date"))
    .reset_index(drop=True)
)

In [None]:
played_last_team_game = (
    last_10_team_games
    .merge(team_roster)
    .merge(historical_lineups, how="left")
    .groupby("player_id")
    .apply(lambda x: x.nlargest(1, "date"))
    .reset_index(drop=True)
    .assign(played_in_last_team_game=lambda x: x["player_role"].notna().astype(int))
    .assign(started_in_last_team_game=lambda x: x["player_role"].eq("Starter").astype(int))
    [["player_id", "played_in_last_team_game", "started_in_last_team_game"]]
)

## Played in team's most recent ten games

In [None]:
played_last_10_team_games = (
    last_10_team_games
    .merge(team_roster)
    .merge(historical_lineups, how="left")
    .assign(perc_team_games_played=lambda x: x["player_role"].notna().astype(int))
    .assign(perc_team_games_started=lambda x: x["player_role"].eq("Starter").astype(int))
    .groupby("player_id")[["perc_team_games_started", "perc_team_games_played"]]
    .mean()
    .reset_index()
)

## Combine features

In [None]:
lineup_projections = (
    team_roster.query("team_id in @teams_playing")
    .merge(expected_lineups, how="outer")
    .merge(injury_report.query("team_id in @teams_playing"), how="outer")
    .merge(last_game_played.query("last_team_id in @teams_playing"), how="outer")
    .merge(last_10_games_played, how="left")
    .merge(played_last_team_game, how="left")
    .merge(played_last_10_team_games, how="left")
    .merge(avg_stats, how="left")
    .query("roster_status == roster_status or played_this_season == 1")
    .query("roster_status == 'ROSTER' or player_role == player_role")
    .sort_values(by=["team_id", "player_id"])
    .assign(flag=lambda x:
        (x["player_role"].eq("Starter") & x["percent_started"].ne(1))
        | (x["injury_status"].isna() & x["injury_status"].eq("PROBABLE"))
    )
    .assign(starter=lambda x: x["player_role"].eq("Starter").astype(int))
    .assign(playing=lambda x: x["player_role"].notna().astype(int))
    .drop(columns=["game_id"])
    .merge(expected_lineups[["team_id", "game_id"]].drop_duplicates())
    [[
        "player_id", "first_name", "last_name", "team_id", "team", "starter", "playing", "flag", "game_id", "player_role", "injury_description", "injury_status",
        "role_in_last_game", "played_this_season", "percent_started", "played_in_last_team_game", "started_in_last_team_game",
        "perc_team_games_started", "perc_team_games_played", "minutes", "pts"
    ]]
)

In [None]:
lineup_projections

In [None]:
lineup_projections.to_csv("data/lineup_projections.csv", index=False)