# NBA Fantasy Lineup Optimization

In [None]:
import pandas as pd
import numpy as np
from itertools import combinations

In [None]:
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999
pd.options.display.max_colwidth = -1
pd.options.display.width = -1

## Load data

In [None]:
current_week = "23"
weekly_game_count = (
    pd.read_csv("data/weekly_game_count.csv")
    .rename(columns={current_week: "num_games"})
)

In [None]:
weekly_game_count.head()

In [None]:
boxscore_data = pd.read_parquet("data/basic_boxscore.parquet")

In [None]:
# last 30 games played
filtered_boxscore_data = (
    boxscore_data
    .loc[lambda x: x["mp"].notnull()]
    .sort_values(by=["player_link", "game_url"], ascending=[True, False], inplace=False)
    .assign(counter=lambda x: 1)
    .assign(rank=lambda x: x.groupby(["player_link"])["counter"].cumsum())
    .loc[lambda x: x["rank"].le(30)]
    .assign(neg_to=lambda x: x["tov"] * -1)
)

player_team = (
    filtered_boxscore_data.loc[lambda x: x["rank"].eq(1), ["player_link", "team"]]
)

filtered_boxscore_data = (
    filtered_boxscore_data
    .rename(columns={"team": "game_team"})
    .merge(player_team, how="left", on=["player_link"])
    .merge(weekly_game_count[["team", "num_games"]], how="left", on=["team"])
)

In [None]:
filtered_boxscore_data.head()

## Simulate week's games

In [None]:
def resample_games(boxscore_data):
    """Randomly resample n number of games per player, 
    where n is the max of 'num_games' column for each player
    """
    np.random.seed()
    return boxscore_data.groupby(["player_link"]).apply(lambda x: x.iloc[np.random.randint(0, len(x), x["num_games"].max())])

In [None]:
def avg_player_stats(boxscore_data):
    stats = ["ast", "blk", "fg", "fga", "fg3", "ft", "fta", "pts", "stl", "trb", "neg_to"]
    return boxscore_data.groupby(["player"])[stats].sum().reset_index()

In [None]:
def avg_stats(player_stats):
    stats = [col for col in player_stats.columns if col != "player"]
    total_stats = player_stats[stats].sum()
    total_stats["fg_pct"] = total_stats["fg"] / total_stats["fga"]
    total_stats["ft_pct"] = total_stats["ft"] / total_stats["fta"]
    return total_stats

In [None]:
def which_categories_won(score, opponent_score):
    stats = ["ast", "blk", "fg_pct", "fg3", "ft_pct", "pts", "stl", "trb", "neg_to"]
    return (score[stats] > opponent_score[stats]).astype(int) + (score[stats] == opponent_score[stats]).astype(int) / 2

In [None]:
def categories_won(score, opponent_score):
    stats = ["ast", "blk", "fg_pct", "fg3", "ft_pct", "pts", "stl", "trb", "neg_to"]
    return (score[stats] > opponent_score[stats]).sum() + (score[stats] == opponent_score[stats]).sum() / 2

In [None]:
def simulated_result(data):
    (boxscore_data, opponent_boxscore_data) = data
    score = avg_stats(avg_player_stats(resample_games(boxscore_data)))  # simulated stats
    opponent_score = avg_stats(avg_player_stats(resample_games(opponent_boxscore_data)))  # simulated opponent stats
    return categories_won(score, opponent_score) >= 4.5

In [None]:
def simulated_categories(data):
    (boxscore_data, opponent_boxscore_data) = data
    score = avg_stats(avg_player_stats(resample_games(boxscore_data)))  # simulated stats
    opponent_score = avg_stats(avg_player_stats(resample_games(opponent_boxscore_data)))  # simulated opponent stats
    return which_categories_won(score, opponent_score)

In [None]:
opponent_players = [
    "Bogdan Bogdanovic",
    "Tyreke Evans",
    "Damian Lillard",
    "Isaiah Thomas",
    "LaMarcus Aldridge",
    "Jarrett Allen",
    "Clint Capela",
    "Joel Embiid",
    "Enes Kanter",
    "Deandre Ayton"
]

In [None]:
available_players = [
    "Bradley Beal",
    "Joe Ingles",
    "Khris Middleton",
    "Lou Williams",
    "Giannis Antetokounmpo",
    "Harrison Barnes",
    "Bojan Bogdanovic",
    "Thaddeus Young", 
    "Bobby Portis",
    "Dwight Powell",
    "Spencer Dinwiddie",
    "Andrew Wiggins",
    "Jerami Grant",
    "Robin Lopez"
]

In [None]:
import multiprocessing
pools = multiprocessing.Pool(processes=multiprocessing.cpu_count())

In [None]:
%%time
n_trials = 100
results = dict()
opponent_boxscore_data = filtered_boxscore_data.loc[lambda x: x["player"].isin(opponent_players)]
for i, players in enumerate(combinations(available_players, 10)):
    team_boxscore_data = filtered_boxscore_data.loc[lambda x: x["player"].isin(players)]
    result = sum(pools.map(simulated_result, ((team_boxscore_data, opponent_boxscore_data) for _ in range(n_trials)))) / n_trials
    results[players] = result
    print(f"{i}, {players}: {result}")

In [None]:
for (players, win_pct) in sorted(results.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{players}: {win_pct}")

In [None]:
top_combos = [players for (players, win_pct) in sorted(results.items(), key=lambda x: x[1], reverse=True)[:10]]

In [None]:
%%time
n_trials = 500
top_results = dict()
for i, players in enumerate(top_combos):
    team_boxscore_data = filtered_boxscore_data.loc[lambda x: x["player"].isin(players)]
    result = sum(pools.map(simulated_result, ((team_boxscore_data, opponent_boxscore_data) for _ in range(n_trials)))) / n_trials
    top_results[players] = result
    print(f"{i}, {players}: {result}")

In [None]:
top_players = sorted(top_results.items(), key=lambda x: x[1], reverse=True)[0][0]; top_players

In [None]:
n_trials = 100
team_boxscore_data = filtered_boxscore_data.loc[lambda x: x["player"].isin(top_players)]
sum(pools.map(simulated_categories, ((team_boxscore_data, opponent_boxscore_data) for _ in range(n_trials)))) / n_trials