In [None]:
import sleepy
import pandas as pd
import numpy as np
import math
import random
import itertools
import plotly.express as px
from collections import Counter
from tqdm import tqdm
import time
import sys

In [None]:
USERNAME = 'TheRealFergus'
YEAR = 2023
N = 1000000 # The number of similutaions to run

In [None]:
# Gets league and owner data
user_id = sleepy.get_user_data(USERNAME)["user_id"]
league_id = sleepy.get_league_ids(user_id, YEAR)[0]
league_raw = sleepy.get_leage(league_id)
owners_raw = sleepy.get_owners(league_id)
owners = owners_raw[["username", "owner_id", "roster_id"]]

In [None]:
# Stores the number of teams in the league
num_teams = owners.shape[0]

# Stores the number of the first week of the league playoffs
playoff_week1 = league_raw["settings"]["playoff_week_start"]

# Maps a unique matchup id corresponding to each combination of two roster ids
matchup_to_roster_id = {matchup[0] + 1: matchup[1] for matchup in 
                        enumerate(itertools.combinations(range(1,13), 2))}

roster_to_matchup_id = {val: key for (key, val) in matchup_to_roster_id.items()}

In [None]:
# Gets regular season matchup data
matchups = (sleepy.get_matchups(league_id, season=True)
            .query(f"starter == True & week < {playoff_week1}")
            .groupby(["week", "roster_id", "matchup_id"])
            [["team_points"]]
            .first()
            .reset_index())

# Merges matchups with owners to include usersernames
matchups = (
    matchups.merge(owners.reset_index(),
                    left_on="roster_id",
                      right_on = "roster_id")
            .assign(matchup_id = matchups["matchup_id"].astype(int))
            .drop(columns="index"))

# Reassigns the matchup id
matchups["matchup_id"] = (
    matchups
    .groupby(["week", "matchup_id"])
    ["roster_id"]
    .transform(lambda x: roster_to_matchup_id[tuple(x.unique())])
    )

# The first week's games
matchups.head(12)

In [None]:
# Formats the actual season schedule
season_schedule = tuple(
    tuple(week) for week in matchups.groupby("week")["matchup_id"].agg(set)
)

# Gets the total season points for each owner
owners_points = matchups.groupby("username")[["team_points"]].sum()

In [None]:
# Gets all possible weekly schedules, each tuple contains six matchp id's 
# corresponding to a single game. There are 10395 possible weekly schedules
all_weeks = []
for week in tqdm(itertools.combinations(matchup_to_roster_id.values(), 6), 
                 total = math.comb(66, 6)):
    s = set()
    for match in week:
        s.update(match)
    if len(s) == 12:
        all_weeks.append(tuple(roster_to_matchup_id[match] for match in week))
all_weeks = tuple(all_weeks)

In [None]:
# Maps each week schedule in all possible weeks to a set of weeks. Given that the
# key week appears in a season schedule, none of the weeks in the value set can 
# also appear
similar_weeks = {}
for key_week in tqdm(all_weeks, total = len(all_weeks)):
    similar_weeks[key_week] = (
        set([week for week in all_weeks if len(set(week + key_week)) != 12])
        )

In [None]:
def generate_schedule():
    """Generates a 14 week season schedule for an 12 team leage. Each team plays
    all other teams once in the first 11 weeks. The first 3 weeks are repeated
    for the last 3 weeks.

    Returns:
        list: A length 14 list of length 6 tuples containing the matchup ids for
    each week.
    """
    weeks = set(all_weeks)
    schedule = []

    # Recursively fills the weekly schedule at random
    def pick_week(weeks):
        if len(schedule) == 11:
            return
        else:
            tuple_weeks = tuple(weeks)
            choice = random.choice(tuple_weeks)
            schedule.append(choice)
            weeks -= similar_weeks[choice]
            pick_week(weeks)
    
    # The above method has about a 70% success rate to pick a valid yearly 
    # schedule. This while loop will continue until a valid schedule is picked.
    while len(schedule) != 11:
        try:
            pick_week(weeks)
        except IndexError:
            weeks = set(all_weeks)
            schedule = []

    # Adds the first three weeks of the schedule to the end to finish the 14
    # week season
    schedule += schedule[:3]
 
    return(np.array(schedule))

In [None]:
def get_ranking(schedule):
    """Gets the regular season rankings of the team given a season schedule

    Args:
        schedule (list): A list of tuples containing the matchup ids for each
        week.

    Returns:
        pd.Series: a pandas series indexed by username containing the rank of 
        each team at the end of the season.
    """

    standings = {username: pd.Series([0,0,0], index=["win", "loss", "draw"]) 
             for username in owners["username"]}

    # Records the outcome of a single match in the standings dictionary
    def record_outcome(week_num, matchup_id):

        team1, team2 = matchup_to_roster_id[matchup_id]

        df = (
            matchups[(matchups["week"] == week_num) 
                     & ((matchups["roster_id"] == team1) 
                        | (matchups["roster_id"] == team2))]
            )
        
        if df["team_points"].nunique() == 1:
            standings[df.iloc[0, 1]]["draw"] += 1
            standings[df.iloc[1, 1]]["draw"] += 1
        
        else:
            max_username = df.loc[df["team_points"].idxmax(), "username"]
            min_username = df.loc[df["team_points"].idxmin(), "username"]

            standings[max_username]["win"] += 1
            standings[min_username]["loss"] += 1
        
    # Records the outcome of all matches in the standings
    for week_num in range(1, len(schedule) + 1):
        for matchup_id in schedule[week_num - 1]:
            record_outcome(week_num, matchup_id)

    # Determines the team ranks of the season
    df = (pd.DataFrame.from_dict(standings, orient='index')
          .merge(owners_points, left_index = True, right_index= True)
          .sort_values(by=["win", "team_points"], ascending = False)
          .assign(rank = np.arange(1,13)))

    return df["rank"]


In [None]:
# Stores the true season ranks
season_ranks = get_ranking(season_schedule)
pd.DataFrame(season_ranks)

In [None]:
# Initializes a dicitonary to hold simulated rankings
total_records = {
    username: pd.Series([0 for i in range(12)], index=range(1,13)) 
    for username in owners["username"]
    }

# Initialized a dictionary that will hold the count of each unique ranking seen
ranking_counts = Counter()

for i in tqdm(range(N), total = N):

    # Generate a schedule and get the ranking
    ranks = get_ranking(generate_schedule())

    # Store each user's rank
    for username, rank in ranks.items():
        total_records[username][rank] += 1

    # Stores the rank order for 
    ranking_counts[tuple(ranks.index)] += 1
    

# Stores the similated records in a dataframe and csv
records_df = (pd.DataFrame(total_records)
              .transpose()
              .sort_values(by = list(range(1,13)), ascending = False))
records_df.to_csv(f'{YEAR}_{N}_simulated_records.csv', index_label = 'username')

# Stores the simulated playoff seeds in a dataframe and csv
playoffs_df = (pd.DataFrame(ranking_counts, index = ["Count"])
               .transpose()
               .reset_index()
               .query("Count > 0")
               .rename(columns={'level_0': 1, 'level_1': 2, 'level_2': 3,
                                'level_3': 4, 'level_4': 5, 'level_5': 6,
                                'level_6': 7, 'level_7': 8, 'level_8': 9,
                                'level_9': 10, 'level_10': 11, 'level_11': 12})
               .sort_values(by = 'Count', ascending = False))

playoffs_df.to_csv(f'{YEAR}_{N}_simulated_playoffs.csv', index = False)

In [None]:
import multiprocessing 
from multiprocessing.shared_memory import SharedMemory
from multiprocessing import Manager
import sys
from multiprocessing_function import simulate_schedules

total_records = Manager().dict()
ranking_counts = Manager().dict()

for username in owners["username"]:
    total_records[username] = pd.Series([0 for i in range(12)], index=range(1,13))


if __name__ == "__main__":

    num_sims = 100

    # Start processes
    process = multiprocessing.Process(
        target=simulate_schedules,
        args=(all_weeks, similar_weeks, owners, matchup_to_roster_id, matchups, owners_points, total_records, ranking_counts, num_sims)
    )
    process.start()
    process.join()

In [None]:
# Reads previously stored simulated records
records_df = pd.read_csv(f'{YEAR}_{N}_simulated_records.csv', 
                         index_col="username")
playoffs_df = pd.read_csv(f'{YEAR}_{N}_simulated_playoffs.csv', 
                          index_col = False)

records_df_prop = records_df / (N / 100)
records_df_count = records_df

In [None]:
# Highlights each user's actual ranking from the season in the simulated dataframe
def highlight_cells(row):
    col_to_highlight = season_ranks[row.name]
    return ['color: red' if int(col) == int(col_to_highlight) 
                            else '' for col in row.index]

highlighted_df = records_df_prop.astype(str).style.apply(highlight_cells,
                                                          axis = 1)
highlighted_df

In [None]:
def get_prob(username, rank, type = "equal"):
    """Gets the probability of a user ranking equal to, worse, or better than 
    they actually did. Probabilites are taken from the simulated rankings.

    Args:
        username (str): the username to get the probability ranking
        rank (int): the rank to determinge
        type (str, optional): Can be "equal", "worse", or "better". For each 
        option. Defaults to "equal".

    Returns:
        str: The function returns the probability of the given user ranking 
        equal/better/or worse than the passed rank. Probabilities are taken from
        the simulated records dataframe.
    """
    user_ix = records_df.index.get_loc(username)
    if type == "equal":
        return records_df.iloc[user_ix, rank-1]
    elif type == "worse":
        return records_df.iloc[user_ix, rank:].sum()
    elif type == "better":
        return records_df.iloc[user_ix, :rank-1].sum()
    elif type == "playoff":
        return records_df.iloc[user_ix, :6].sum()

# Gets the probability dictionaries of worse/equal/better for each user
probs_worse = {username: get_prob(username, rank, type = "worse") for (username, rank) in season_ranks.items()}
probs_better = {username: get_prob(username, rank, type = "better") for (username, rank) in season_ranks.items()}
probs_equal = {username: get_prob(username, rank, type = "equal") for (username, rank) in season_ranks.items()}
probs_playoff = {username: get_prob(username, rank, type = "playoff") for (username, rank) in season_ranks.items()}

# Stores the probabilites of each user's ranking for the season
season_probs = (pd.DataFrame(season_ranks)
                .assign(Better = pd.Series(probs_better))
                .assign(Equal = pd.Series(probs_equal))
                .assign(Worse = pd.Series(probs_worse))
                .assign(Playoff = pd.Series(probs_playoff))
                .reset_index(names="username")
                .set_index("rank")
                .round(2))
season_probs

In [None]:
# Plots the season probabilities as a stacked bar chart
season_probs_melt = pd.melt(season_probs, id_vars=['username', "Playoff"], var_name=' ', value_name='Chance %')
fig = px.bar(
        season_probs_melt, 
        x='username', 
        y='Chance %', 
        color = ' ', 
        title = f'Chance of Ranking: Worse Than / Equal To / Better Than Actual Ranking for {YEAR} Season',
        color_discrete_sequence = px.colors.qualitative.D3[:3][::-1],
        )

fig.update_layout(width=1200,
                  height=600,
                  xaxis_title = None,
                  title_x = 0.5,
                  font=dict(size=14),
                  margin=dict(t=70, b=75, l=100, r=50),
                  bargap=0.5,
                  legend = dict(title = None,
                                font = dict(size=20))
                 )

In [None]:
playoffs_df = (pd.DataFrame(playoff_seeds, index = ["Count"])
               .transpose()
               .reset_index()
               .rename(columns={'level_0': 1, 'level_1': 2, 'level_2': 3,
                                'level_3': 4, 'level_4': 5, 'level_5': 6}))

playoffs_df = playoffs_df[playoffs_df["Count"] > 0]

In [None]:
playoffs_df_prob = pd.DataFrame(playoffs_df)

def better_equal(username, rank):
    return get_prob(username, rank, type = "better") + get_prob(username, rank, type = "equal")

for i in range(1,7):
    playoffs_df_prob[i] = playoffs_df[i].apply(lambda x: better_equal(x,i))

In [None]:
idx_unlikely = playoffs_df_prob.iloc[:,:6].sum(axis = 1).idxmin()

In [None]:
unlikely_playoff_seed = (pd.concat([playoffs_df.loc[451170,:6].rename("Username"),
                                    playoffs_df_prob.loc[451170,:6].rename("Prob >=")],
                                      axis = 1)
                            .rename(columns={f'{idx_unlikely}':'as'}))
unlikely_playoff_seed

In [None]:
playoffs_df[playoffs_df[1] == 'tonygordzilla22']