# Cost Data Analysis
Data review used to inform LOL slate generation 

In [None]:
import os

import pandas as pd

cost_df = pd.read_csv(
    os.path.join(os.environ["FANTASY_ARCHIVE_BASE"], "lol", "lol-cost.csv.gz"),
    parse_dates=["date"],
)
display(cost_df)


In [None]:
from datetime import timedelta
import sqlite3
import cachetools

from tqdm import tqdm


DB_FILEPATH = os.path.join(os.environ["FANTASY_HOME"], "lol_hist_2014-2021.scored.db")

_GAME_ID_CACHE = cachetools.LRUCache(maxsize=128)


def _game_id_cache_key(row, *args):
    return (row["date"].strftime("%Y-%m-%d"),) + tuple(
        sorted([row["team"], row["opp"]])
    )


@cachetools.cached(cache=_GAME_ID_CACHE, key=_game_id_cache_key)
def _get_game_id(row, conn: sqlite3.Connection):
    cursor = conn.cursor()
    cursor.execute(
        """
        SELECT game.id, game.date, home.league, away.league, home.abbr, away.abbr
        FROM game
        join team as home on game.home_team_id = home.id
        join team as away on game.away_team_id = away.id
        WHERE
            date between ? and ?
            and home.abbr in (?, ?)
            and away.abbr in (?, ?)
        """,
        (
            row["date"].strftime("%Y-%m-%d"),
            (row["date"] + timedelta(days=1)).strftime("%Y-%m-%d"),
            row["team"],
            row["opp"],
            row["team"],
            row["opp"],
        ),
    )
    result = cursor.fetchone()
    if result is None:
        return None
    league = (
        result[2]
        if result[2] == result[3]
        else f"{result[2] or '?'}-{result[3] or '?'}"
    )
    return {
        "game_id": result[0],
        "date": result[1],
        "league": league,
        "home": result[4],
        "away": result[5],
    }


tqdm.pandas()

_GAME_ID_CACHE.clear()
conn = sqlite3.connect(f"file:{DB_FILEPATH}?mode=ro", uri=True)
try:
    game_info_df: pd.DataFrame = cost_df.progress_apply(
        _get_game_id, axis=1, result_type="expand", args=(conn,)
    )
finally:
    conn.close()

game_info_df["slate-id"] = cost_df["slate-id"]

In [None]:
unique_games_df = game_info_df.dropna().drop_duplicates().sort_values(["slate-id", "date"])
with pd.option_context("display.max_rows", None):
    display(unique_games_df)
