# LOL player cost model

In [None]:
# Load the data
import os

import pandas as pd
import dateutil

LOL_COST_FILE_PATH = os.path.join(
    os.environ["FANTASY_ARCHIVE_BASE"], "lol", "cost_archive"
)
DB_FILEPATH = os.path.join(os.environ["FANTASY_HOME"], "lol_hist_2014-2021.scored.db")


dk_dfs: list[pd.DataFrame] = []
fd_dfs: list[pd.DataFrame] = []
for filename in os.listdir(LOL_COST_FILE_PATH):
    if "draftkings" in filename:
        df = pd.read_csv(os.path.join(LOL_COST_FILE_PATH, filename))
        df["service"] = "draftkings"
        dk_dfs.append(df)
    elif "fanduel" in filename:
        df = pd.read_csv(os.path.join(LOL_COST_FILE_PATH, filename))
        df["service"] = "fanduel"
        df["filename"] = filename
        fd_dfs.append(df)

dk_df = pd.concat(dk_dfs)
fd_df = pd.concat(fd_dfs)

display(dk_df)
display(fd_df)


In [None]:
# transform draftkings data
dk_cleaned_df = dk_df[dk_df["Roster Position"] != "CPT"].rename(
    {
        "Position": "pos",
        "Name": "name",
        "Salary": "cost",
        "TeamAbbrev": "team",
    },
    axis=1,
)


def opp_func(x):
    teams = x["Game Info"].split(" ", 1)[0].split("@")
    return teams[1] if teams[0] == x["team"] else teams[0]


dk_cleaned_df["opp"] = dk_cleaned_df.apply(opp_func, axis=1)


def date_from_game_info(x):
    date_ = x.split(" ", 2)[1]
    return dateutil.parser.parse(f"{date_[6:]}-{date_[:2]}-{date_[3:5]}")


dk_cleaned_df["date"] = dk_cleaned_df["Game Info"].apply(date_from_game_info)
dk_cleaned_df = dk_cleaned_df[["pos", "name", "cost", "team", "service", "opp", "date"]]

display(dk_cleaned_df)


In [None]:
# transform fanduel data
fd_cleaned_df = fd_df.rename(
    {
        "Position": "pos",
        "First Name": "name",
        "Salary": "cost",
        "Team": "team",
        "Opponent": "opp",
    },
    axis=1,
)


def fd_date_from_filename(x):
    date_ = x.split("_", 2)[2].split("-", 1)[0]
    return dateutil.parser.parse(f"{date_[:4]}-{date_[4:6]}-{date_[6:]}")


fd_cleaned_df["date"] = fd_cleaned_df["filename"].apply(fd_date_from_filename)
fd_cleaned_df = fd_cleaned_df[["pos", "name", "cost", "team", "service", "opp", "date"]]
display(fd_cleaned_df)


In [None]:
data_df = pd.concat([dk_cleaned_df, fd_cleaned_df])
data_df["name"] = data_df["name"].apply(lambda x: x.upper().strip())
data_df["team"] = data_df["team"].apply(lambda x: x.upper().strip())
data_df["opp"] = data_df["opp"].apply(lambda x: x.upper().strip())
data_df["season"] = data_df["date"].apply(lambda x: x.year)
display(data_df)

In [None]:
# combine data and add points
from datetime import timedelta
import sqlite3

GAME_SQL = """
select game.id, home.abbr, home.id, away.abbr, away.id, season
from game
    join team as home on home.id = game.home_team_id
    join team as away on away.id = game.away_team_id
where 
    date between ? and ?
    and home.abbr in (?, ?)
    and away.abbr in (?, ?)
"""

MISSING_PLAYERS: set[str] = set()
MISSING_GAMES: set[str] = set()


def get_game_id(conn, team, opp, date_) -> tuple[int, int] | None:
    """returns game_id, season"""
    teams: dict[str, int] = {}
    for rows in conn.execute(
        GAME_SQL,
        (
            date_.strftime("%Y-%m-%d"),
            (date_ + timedelta(days=1)).strftime("%Y-%m-%d"),
            team,
            opp,
            team,
            opp,
        ),
    ):
        game_id = rows[0]
        teams[rows[1]] = rows[2]
        teams[rows[3]] = rows[4]
        return (rows[0], rows[5], teams)
    MISSING_GAMES.add((team, opp, date_))
    return None


def get_pts(
    conn, season, stat_id, team_id: None | int = None, player_name: None | str = None
) -> pd.DataFrame:
    if team_id is not None:
        # pull all points data for the team (and cache it)
        return pd.read_sql(
            """
            select date, value 
            from calculation_datum 
            join game on calculation_datum.game_id = game.id
            where 
                team_id = ? and 
                player_id is null and 
                statistic_id = ? and
                season = ?
            order by date desc
            """,
            conn,
            params=(team_id, stat_id, season),
            parse_dates=["date"],
        )

    # get player id
    # pull all points data for the player (and cache it)
    return pd.read_sql(
        """
        select date, value 
        from calculation_datum 
            join player on player.id = calculation_datum.player_id
            join game on calculation_datum.game_id = game.id
        where statistic_id = ? and player.first_name = ? and season = ?
        order by date desc
        """,
        conn,
        params=(stat_id, player_name, season),
        parse_dates=["date"],
    )

NO_PTS = {"pts-1": None, "pts-mean-5": None, "pts-mean-std": None}

def points_func(x, conn, stats, team_abbr_remap):
    # find the game based on team, opp and date
    # print(f"{x=}")
    x_team = team_abbr_remap.get(x.team, x.team)
    if isinstance(x_team, dict):
        x_team = x_team.get(x.service, x.team)
    x_opp = team_abbr_remap.get(x.opp, x.opp)
    if isinstance(x_opp, dict):
        x_opp = x_opp.get(x.service, x.opp)
    x_name = x["name"]
    game_result = get_game_id(conn, x_team, x_opp, x.date)
    if game_result is None:
        return None
    game_id, season, teams = game_result

    # display(f"{game_id=} {teams=} {season=}")
    stat_id = stats[(x.service, "T" if x.pos == "TEAM" else "P")]
    # storage for player/teams points
    if x.pos != "TEAM" and x_name in MISSING_PLAYERS:
        return NO_PTS

    pts_df = get_pts(
        conn,
        season,
        stat_id,
        teams[x_team] if x.pos == "TEAM" else None,
        x_name if x.pos != "TEAM" else None,
    )
    if len(pts_df) == 0:
        if x.pos == "TEAM":
            assert (
                len(pts_df) > 0
            ), f"No PTS data found for {stat_id=} {teams[x_team]=} {season=} {x_name=}\n{x}"
        else:
            row = conn.execute(
                "select count(*) from player where first_name = ?", (x_name,)
            ).fetchall()[0]
            if row[0] == 0:
                MISSING_PLAYERS.add(x_name)
            else:
                MISSING_PLAYERS.add((x["name"], season))
        return NO_PTS

    # calculate points based on date
    pts_std_df = pts_df[pts_df["date"] < x.date]
    if len(pts_std_df) == 0:
        return NO_PTS

    pts_1 = pts_std_df.iloc[0]["value"]
    pts_5 = pts_std_df["value"].head(5).mean()
    pts_std = pts_std_df["value"].mean()
    pts_data = {"pts-1": pts_1, "pts-mean-5": pts_5, "pts-mean-std": pts_std}
    return pts_data


def get_stats(conn) -> dict[tuple[str, str], int]:
    """
    get the stat IDs for the services
    returns dict[(service, T/P), stat_id] where T is team and P is player
    """
    sql = """
        select id, name, player_or_team
        from statistic
        where name like "%_performance_score"
    """
    stats = {}
    for row in conn.execute(sql):
        assert row[1].startswith("dk_") or row[1].startswith("fd_")
        service = "draftkings" if row[1].startswith("dk_") else "fanduel"
        stats[(service, row[2])] = row[0]
    return stats


conn = sqlite3.connect(f"file:{DB_FILEPATH}?mode=ro", uri=True)
stats = get_stats(conn)
display(stats)
# for row_n in range(79, 85):
#     row = data_df.iloc[row_n]
#     display(row_n, row)
#     display(points_func(row, conn, stats, TEAM_ABBR_REMAP))
conn.close()


In [None]:
from datetime import date

from tqdm import tqdm


# players that were searched for during processing and were not found
MISSING_PLAYERS = set()
MISSING_GAMES = set()
DROP_GAMES = [
    ("FLA", "RNS", "2022-02-06"),
    ("RNS", "FLA", "2022-02-06"),
]
# drop all data from the input dataframe for these players/(player, season)
PLAYERS_TO_DROP = {
    "XDSMILEY",
    "HUSTLINBEAST",
    "MIKUYA",
    "MINGJING",
    "CROWNSHOT",
    "MINGG",
    "BOAL",
    "TF BLADE",
    "BECCA",
    "DSN",
    "BEICHAUN",
    "RANGJUN",
    "INVI",
    "YE",
    "HOYOUNG",
    "SWORDD",
    "BONNIE",
    "BLUEKNIGHT",
    "CULT",
    "BROKENBLADE",
    "VV",
    "INSULATOR",
    "CRESCENT",
    "KYOUMA",
    "HUSLINBEAST",
    "PARISGRE",
    "POCKET RHINO",
    "NATURAL",
    "XUNYU",
    "STEPZ",
    "ST1NG",
    "MASZKA",
    "XUNYU" "NATURAL",
    "WIGGILY",
    "YEONGJAE",
    "MESTRE",
    "OWO",
    "WUMING",
    "SS",
    "NATURAL" "WIGGILY",
    "BBD",
    "GUMAYUSHI",
    "HONG",
    "JESH",
    "JIUMENG",
    "LIKA1",
    "LWANDY",
    "M1SSION",
    "QUAD",
    "DEUS",
    "MATT",
    "ZIONSPARTAN",
    "MATSU",
    "HANABII",
    "KAMITO",
    "GRIG",
    "BERTHO",
    "NUKESALOT",
    "CODYSUN",
    "RYOMA",
}
PLAYER_SEASON_TO_DROP: list[tuple[str, int]] = [
    ("LEO", 2022),
    ("SVENSKEREN", 2022),
    ("IGNAR", 2022),
    ("ASPER", 2022),
    ("NISQY", 2022),
    ("LUCAS", 2022),
    ("BROKEN BLADE", 2022),
    ("YIMENG", 2022),
    ("CLEARLOVE", 2022),
    ("FY", 2022),
    ("JUNJIA", 2022),
    ("PUDDING", 2022),
    ("BUNNY", 2022),
    ("CHELIZI", 2022),
    ("NOWAY", 2020),
    ("KHYNM", 2020),
    ("S1XU", 2020),
    ("PROFIT", 2020),
    ("SWIFFER", 2020),
    ("P1NOY", 2021),
    ("PING", 2020),
    ("ALPHAMONG", 2020),
    ("HOON", 2020),
    ("KAAS", 2020),
    ("BANKAI", 2020),
    ("WAYNE", 2021),
    ("SARCASM", 2021),
    ("RAVENZIN", 2020),
    ("CURSE", 2020),
    ("MYSTIC", 2021),
    ("RICKY", 2020),
    ("RUBY", 2020),
    ("KAMI", 2020),
    ("POSS", 2020),
    ("X1LAN", 2021),
    ("FLAME", 2020),
    ("CATCH", 2021),
    ("MOYU", 2020),
    ("NING", 2021),
    ("PLEX", 2020),
    ("KARTIS", 2021),
    ("MOUSTY", 2020),
    ("SNOW", 2021),
    ("ATLANTA", 2020),
    ("TEACHERMA", 2021),
]
# dict[abbr, newabbr] or dict[abbr, dict[service, newabbr]]
TEAM_ABBR_REMAP: dict[str, str | dict[str, str]] = {
    "SHO": "SP",
    "RNS": "RSG",
    "ML": "MAD",
    "NR": "NS",
    "DK": "DWG",
    "FCS": "S04",
    "KDF": "KF",
    "NMG": "NSM",
    "KN": "K1CK",
    "LBR": "LIB",
    "CRZ": "CRU",
    "SUPM": "SM",
    "IW": "WILD",
    "FTC": "FNC",
    "AGO": "RGO",
    "FB": "BRO",
    "VFG": "GIA",
    "EST": "ES",
    "MG": "MSF",
    "ROG": "RGE",
    "FKL": "FAL",
    "FL": "FSH",
    "FQ": "FLY",
    "VG": "RA",
    "SK": "SKG",
    "DMO": "TT",
    "AF": "KF",
    "SB": "LSB",
    "DYN": "NS",
    "ITZ": "INTZ",
    "IFG": "IF",
    "FNC.R": "FR",
    "MSF.P": "MP",
    "VGIA": "GIA",
    "PT7": "77PT",
    "SUP": "SM",
    "PSG": "PSGT",
    "UOL": "UOLC",
    "AST": "ATS",
    "DFM": "DNFM",
    "OHT": "100",
    "RED": "REDC",
    "LLL": "LOUD",
    "IM": "IMT",
    "APK": "SP",
    "TD": "NS",
    "ES": {"fanduel": "XL"},
}
print(f"data_df len = {len(data_df)}")
filtered_df = data_df[~data_df["name"].isin(PLAYERS_TO_DROP)]
for name, season in PLAYER_SEASON_TO_DROP:
    filtered_df = filtered_df[
        ~((filtered_df["name"] == name) & (filtered_df["season"] == season))
    ]
    print(
        f"filtered data post player-season drop for {name}-{season} : n={len(filtered_df)}"
    )
for team, opp, date in DROP_GAMES:
    filtered_df = filtered_df[
        ~(
            (filtered_df.team == team)
            & (filtered_df.opp == opp)
            & (filtered_df.date == date)
        )
    ]
    print(
        f"filtered data post game drop for {date} : {team} vs {opp} : n={len(filtered_df)}"
    )

print(f"n={len(filtered_df)}")

conn = sqlite3.connect(f"file:{DB_FILEPATH}?mode=ro", uri=True)

tqdm.pandas()
try:
    pts_df = filtered_df.progress_apply(
        points_func, axis=1, result_type="expand", args=(conn, stats, TEAM_ABBR_REMAP)
    )
finally:
    conn.close()
    print(
        f"New players to drop: {[mp for mp in MISSING_PLAYERS if isinstance(mp, str)]}"
    )
    print(
        f"New player-seasons to drop: {[mp for mp in MISSING_PLAYERS if isinstance(mp, tuple)]}"
    )
    display("Missing Games", MISSING_GAMES)
final_df = pd.concat([filtered_df, pts_df], axis=1)
final_df.to_csv("lol-cost.csv", index=False)
display(final_df)


In [None]:
# df = filtered_df
# display(data_df.query("name == 'HANABII'"))
# display(df[(df.team == "RNS") & (df.opp == "FLA") & (df.date == "2022-02-06")])
# print(f"New players to drop: {[mp for mp in MISSING_PLAYERS if isinstance(mp, str)]}")
# print(f"New player-seasons to drop: {[mp for mp in MISSING_PLAYERS if isinstance(mp, tuple)]}")
# display(pts_df)
# display("Missing games",MISSING_GAMES)