In [12]:
import sleepy
import pandas as pd
import numpy as np
import math
import random
import itertools
import plotly.express as px
from tqdm import tqdm
import time
import multiprocessing as mp
from threading import Thread
from multiprocessing_functions import simulate_schedules, get_ranking
import queue

In [2]:
USERNAME = 'TheRealFergus'
YEAR = 2023

In [3]:
# Gets league and owner data
user_id = sleepy.get_user_data(USERNAME)["user_id"]
league_id = sleepy.get_league_ids(user_id, YEAR)[0]
league_raw = sleepy.get_leage(league_id)
owners_raw = sleepy.get_owners(league_id)
owners = owners_raw[["username", "owner_id", "roster_id"]]

In [4]:
# Stores the number of teams in the league
num_teams = owners.shape[0]

# Stores the number of the first week of the league playoffs
playoff_week1 = league_raw["settings"]["playoff_week_start"]

# Maps a unique matchup id corresponding to each combination of two roster ids
matchup_to_roster_id = {matchup[0] + 1: matchup[1] for matchup in 
                        enumerate(itertools.combinations(range(1,13), 2))}

roster_to_matchup_id = {val: key for (key, val) in matchup_to_roster_id.items()}

In [5]:
# Gets regular season matchup data
matchups = (sleepy.get_matchups(league_id, season=True)
            .query(f"starter == True & week < {playoff_week1}")
            .groupby(["week", "roster_id", "matchup_id"])
            [["team_points"]]
            .first()
            .reset_index())

# Merges matchups with owners to include usersernames
matchups = (
    matchups.merge(owners.reset_index(),
                    left_on="roster_id",
                      right_on = "roster_id")
            .assign(matchup_id = matchups["matchup_id"].astype(int))
            .drop(columns="index"))

# Reassigns the matchup id
matchups["matchup_id"] = (
    matchups
    .groupby(["week", "matchup_id"])
    ["roster_id"]
    .transform(lambda x: roster_to_matchup_id[tuple(x.unique())])
    )

# The first week's games
matchups.head(12)

Unnamed: 0,week,roster_id,matchup_id,team_points,username,owner_id
0,1,1,10,95.92,alecwilson,781258862778015744
1,1,2,15,102.31,namebrant,737201118836346880
2,1,3,27,87.1,therealfergus,871830995287085056
3,1,4,36,80.88,empireyikesback,340376049508429824
4,1,5,40,75.52,pacc,791907251894984704
5,1,6,15,104.94,tonygordzilla22,790423754491678720
6,1,7,40,116.85,mackjyers21,463115290251620352
7,1,8,60,105.44,burgertownthicnred,865421962913157120
8,1,9,27,138.1,thezirconisdragon,865438032692649984
9,1,10,36,87.78,black8yellownation,865844843182694400


In [6]:
# Formats the actual season schedule
season_schedule = tuple(
    tuple(week) for week in matchups.groupby("week")["matchup_id"].agg(set)
)

# Gets the total season points for each owner
owners_points = matchups.groupby("username")[["team_points"]].sum()

In [7]:
# Gets all possible weekly schedules, each tuple contains six matchp id's 
# corresponding to a single game. There are 10395 possible weekly schedules
all_weeks = []
for week in tqdm(itertools.combinations(matchup_to_roster_id.values(), 6), 
                 total = math.comb(66, 6)):
    s = set()
    for match in week:
        s.update(match)
    if len(s) == 12:
        all_weeks.append(tuple(roster_to_matchup_id[match] for match in week))
all_weeks = tuple(all_weeks)

100%|██████████| 90858768/90858768 [00:41<00:00, 2182846.39it/s]


In [8]:
# Maps each week schedule in all possible weeks to a set of weeks. Given that the
# key week appears in a season schedule, none of the weeks in the value set can 
# also appear
similar_weeks = {}
for key_week in tqdm(all_weeks, total = len(all_weeks)):
    similar_weeks[key_week] = (
        set([week for week in all_weeks if len(set(week + key_week)) != 12])
        )

100%|██████████| 10395/10395 [00:24<00:00, 425.85it/s]


In [13]:
manager = mp.Manager()
total_records = manager.dict()
ranking_counts = manager.dict()
progress_queue = mp.Queue()

for username in owners["username"]:
    total_records[username] = manager.list(np.zeros(12, dtype=int))

if __name__ == "__main__":

    num_sims_per_process = 100  # Number of simulations per process
    num_processes = 5  # Number of processes (cores) to use
    total_sims = num_sims_per_process * num_processes
    pbar = tqdm(total=total_sims)

    def update_progress_bar(progress_queue, total_sims):
        completed_sims = 0
        while completed_sims < total_sims:
            try:
                progress_queue.get_nowait()
                completed_sims += 1
                pbar.update(1)
            except queue.Empty:
                time.sleep(1)
        pbar.close()

    progress_thread = Thread(target = update_progress_bar,
                              args=(progress_queue, total_sims))
    progress_thread.start()

    processes = []

    for p in range(num_processes):

        while len(processes) >= mp.cpu_count():
            time.sleep(1)
            processes = [p for p in processes if p.is_alive()]

        process = mp.Process(
            target = simulate_schedules,
            args = (all_weeks, similar_weeks, owners, matchup_to_roster_id, matchups, owners_points, total_records, ranking_counts, num_sims_per_process, progress_queue)
        )
        process.start()
        processes.append(process)

    for process in processes:
        process.join()

tot_rec = {username: pd.Series(list(lst), index = range(1,13)) 
           for username, lst in dict(total_records).items()}

records_df = (pd.DataFrame(tot_rec)
              .transpose()
              .sort_values(by = list(range(1,13)), ascending = False))

playoffs_df = (pd.DataFrame(dict(ranking_counts), index = ["Count"])
               .transpose()
               .reset_index()
               .query("Count > 0")
               .rename(columns={'level_0': 1, 'level_1': 2, 'level_2': 3,
                                'level_3': 4, 'level_4': 5, 'level_5': 6,
                                'level_6': 7, 'level_7': 8, 'level_8': 9,
                                'level_9': 10, 'level_10': 11, 'level_11': 12})
               .sort_values(by = 'Count', ascending = False))

records_df.to_csv(f'{YEAR}_{total_sims}_simulated_records.csv',
                   index_label = 'username')

playoffs_df.to_csv(f'{YEAR}_{total_sims}_simulated_playoffs.csv',
                    index = False)

  0%|          | 0/500 [01:27<?, ?it/s]
100%|██████████| 500/500 [00:21<00:00, 23.10it/s]


In [23]:
tot_rec = {username: pd.Series(list(lst), index = range(1,13)) 
           for username, lst in dict(total_records).items()}

records_df = (pd.DataFrame(tot_rec)
              .transpose()
              .sort_values(by = list(range(1,13)), ascending = False))

playoffs_df = (pd.DataFrame(dict(ranking_counts), index = ["Count"])
               .transpose()
               .reset_index()
               .query("Count > 0")
               .rename(columns={'level_0': 1, 'level_1': 2, 'level_2': 3,
                                'level_3': 4, 'level_4': 5, 'level_5': 6,
                                'level_6': 7, 'level_7': 8, 'level_8': 9,
                                'level_9': 10, 'level_10': 11, 'level_11': 12})
               .sort_values(by = 'Count', ascending = False))

records_df.to_csv(f'{YEAR}_{total_sims}_simulated_records.csv',
                   index_label = 'username')

playoffs_df.to_csv(f'{YEAR}_{total_sims}_simulated_playoffs.csv',
                    index = False)

In [14]:
# Reads previously stored simulated records
records_df = pd.read_csv(f'{YEAR}_{total_sims}_simulated_records.csv', 
                         index_col="username")
playoffs_df = pd.read_csv(f'{YEAR}_{total_sims}_simulated_playoffs.csv', 
                          index_col = False)

records_df_prop = records_df / (total_sims / 100)
records_df_count = records_df

In [16]:
# Stores the true season ranks
season_ranks = get_ranking(season_schedule, owners, matchup_to_roster_id, matchups, owners_points)
pd.DataFrame(season_ranks)

Unnamed: 0,rank
pacc,1
thezirconisdragon,2
herbietime,3
alecwilson,4
empireyikesback,5
burgertownthicnred,6
therealfergus,7
mackjyers21,8
shakylegs,9
tonygordzilla22,10


In [17]:
# Highlights each user's actual ranking from the season in the simulated dataframe
def highlight_cells(row):
    col_to_highlight = season_ranks[row.name]
    return ['color: red' if int(col) == int(col_to_highlight) 
                            else '' for col in row.index]

highlighted_df = records_df_prop.astype(str).style.apply(highlight_cells,
                                                          axis = 1)
highlighted_df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
herbietime,51.6,24.6,14.2,7.0,2.2,0.2,0.2,0.0,0.0,0.0,0.0,0.0
thezirconisdragon,18.4,21.8,22.4,19.6,10.6,3.6,1.8,0.8,0.4,0.6,0.0,0.0
pacc,14.6,20.4,27.8,21.4,7.6,5.0,2.6,0.2,0.4,0.0,0.0,0.0
alecwilson,13.0,26.4,21.8,19.2,10.2,6.2,2.0,1.0,0.2,0.0,0.0,0.0
empireyikesback,1.0,3.4,5.8,13.6,24.0,23.4,14.6,7.2,5.0,1.4,0.2,0.4
therealfergus,1.0,1.8,5.4,11.2,23.0,21.2,15.8,11.6,5.0,2.2,1.4,0.4
mackjyers21,0.2,1.2,1.4,5.0,10.2,16.0,17.6,25.4,12.6,6.2,4.2,0.0
burgertownthicnred,0.2,0.4,0.6,1.6,8.2,16.6,24.0,20.6,15.2,9.4,2.6,0.6
shakylegs,0.0,0.0,0.6,0.4,1.6,3.2,10.8,14.4,24.2,23.0,16.2,5.6
namebrant,0.0,0.0,0.0,0.8,2.0,2.8,6.2,10.6,20.4,26.8,23.6,6.8


In [19]:
def get_prob(username, rank, type = "equal"):
    """Gets the probability of a user ranking equal to, worse, or better than 
    they actually did. Probabilites are taken from the simulated rankings.

    Args:
        username (str): the username to get the probability ranking
        rank (int): the rank to determinge
        type (str, optional): Can be "equal", "worse", or "better". For each 
        option. Defaults to "equal".

    Returns:
        str: The function returns the probability of the given user ranking 
        equal/better/or worse than the passed rank. Probabilities are taken from
        the simulated records dataframe.
    """
    user_ix = records_df_prop.index.get_loc(username)
    if type == "equal":
        return records_df_prop.iloc[user_ix, rank-1]
    elif type == "worse":
        return records_df_prop.iloc[user_ix, rank:].sum()
    elif type == "better":
        return records_df_prop.iloc[user_ix, :rank-1].sum()
    elif type == "playoff":
        return records_df_prop.iloc[user_ix, :6].sum()

# Gets the probability dictionaries of worse/equal/better for each user
probs_worse = {username: get_prob(username, rank, type = "worse") for (username, rank) in season_ranks.items()}
probs_better = {username: get_prob(username, rank, type = "better") for (username, rank) in season_ranks.items()}
probs_equal = {username: get_prob(username, rank, type = "equal") for (username, rank) in season_ranks.items()}
probs_playoff = {username: get_prob(username, rank, type = "playoff") for (username, rank) in season_ranks.items()}

# Stores the probabilites of each user's ranking for the season
season_probs = (pd.DataFrame(season_ranks)
                .assign(Better = pd.Series(probs_better))
                .assign(Equal = pd.Series(probs_equal))
                .assign(Worse = pd.Series(probs_worse))
                .assign(Playoff = pd.Series(probs_playoff))
                .reset_index(names="username")
                .set_index("rank")
                .round(2))
season_probs

Unnamed: 0_level_0,username,Better,Equal,Worse,Playoff
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,pacc,0.0,14.6,85.4,96.8
2,thezirconisdragon,18.4,21.8,59.8,96.4
3,herbietime,76.2,14.2,9.6,99.8
4,alecwilson,61.2,19.2,19.6,96.8
5,empireyikesback,23.8,24.0,52.2,71.2
6,burgertownthicnred,11.0,16.6,72.4,27.6
7,therealfergus,63.6,15.8,20.6,63.6
8,mackjyers21,51.6,25.4,23.0,34.0
9,shakylegs,31.0,24.2,44.8,5.8
10,tonygordzilla22,27.8,24.6,47.6,2.2


In [20]:
# Plots the season probabilities as a stacked bar chart
season_probs_melt = pd.melt(season_probs, id_vars=['username', "Playoff"], var_name=' ', value_name='Chance %')
fig = px.bar(
        season_probs_melt, 
        x='username', 
        y='Chance %', 
        color = ' ', 
        title = f'Chance of Ranking: Worse Than / Equal To / Better Than Actual Ranking for {YEAR} Season',
        color_discrete_sequence = px.colors.qualitative.D3[:3][::-1],
        )

fig.update_layout(width=1200,
                  height=600,
                  xaxis_title = None,
                  title_x = 0.5,
                  font=dict(size=14),
                  margin=dict(t=70, b=75, l=100, r=50),
                  bargap=0.5,
                  legend = dict(title = None,
                                font = dict(size=20))
                 )

In [None]:
playoffs_df = (pd.DataFrame(playoff_seeds, index = ["Count"])
               .transpose()
               .reset_index()
               .rename(columns={'level_0': 1, 'level_1': 2, 'level_2': 3,
                                'level_3': 4, 'level_4': 5, 'level_5': 6}))

playoffs_df = playoffs_df[playoffs_df["Count"] > 0]

In [None]:
playoffs_df_prob = pd.DataFrame(playoffs_df)

def better_equal(username, rank):
    return get_prob(username, rank, type = "better") + get_prob(username, rank, type = "equal")

for i in range(1,7):
    playoffs_df_prob[i] = playoffs_df[i].apply(lambda x: better_equal(x,i))

In [None]:
idx_unlikely = playoffs_df_prob.iloc[:,:6].sum(axis = 1).idxmin()

In [None]:
unlikely_playoff_seed = (pd.concat([playoffs_df.loc[451170,:6].rename("Username"),
                                    playoffs_df_prob.loc[451170,:6].rename("Prob >=")],
                                      axis = 1)
                            .rename(columns={f'{idx_unlikely}':'as'}))
unlikely_playoff_seed

In [None]:
playoffs_df[playoffs_df[1] == 'tonygordzilla22']