In [59]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt

import psycopg2
import pymysql
import sqlalchemy_redshift
from configparser import ConfigParser
from sqlalchemy import create_engine

from numpy import linalg as la
from scipy.stats import norm, poisson, gamma, rv_discrete

parser = ConfigParser()
_ = parser.read("../notebook.cfg")

# Connect to zack attack
za_user = parser.get("nffddev_2", "user")
za_pwd = parser.get("nffddev_2", "password")

za_engine = create_engine(
    f"mysql+pymysql://{za_user}:{za_pwd}@nffddev.numberfire.com/zack_attack",
    connect_args = dict(host='nffddev.numberfire.com', port=3306)
)
za_conn = za_engine.connect()

# connect to redshift
red_user = parser.get("redshift", "user")
red_pwd = parser.get("redshift", "password")

red_engine = create_engine(
    f"postgresql+psycopg2://{red_user}:{red_pwd}@rs1.usdfs.fdbox.net/fanduel",
    connect_args = dict(port=5439)
)
red_conn = red_engine.connect()

# Simulation data file
sim_file = "../data/CorrelationId_44a07c75-5d46-48c9-99d6-bd7003a688f7_Timestamp_2023-02-02_21_38_12_771.json"

Exception during reset or similar
Traceback (most recent call last):
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 763, in _finalize_fairy
    fairy._reset(pool, transaction_was_reset)
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 1038, in _reset
    pool._dialect.do_rollback(self)
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 683, in do_rollback
    dbapi_connection.rollback()
psycopg2.errors.AdminShutdown: terminating connection due to session timeout
SSL connection has been closed unexpectedly



## Data

### Game Data

In [60]:
game_qry = '''
select 
g.id as game_id,
g.date as gamedate, 
g.away_team_id,
a.abbrev as away_abbrev,
g.home_team_id,
h.abbrev as home_abbrev
from nfl_game g
inner join nfl_team h on h.id = g.home_team_id
inner join nfl_team a on a.id = g.away_team_id
'''
nfl_games = pd.read_sql(game_qry, za_conn)
nfl_games["gamedate"] = pd.to_datetime(nfl_games["gamedate"])

### Prop Data

In [61]:
prop_qry = '''
with prop as (
    select
    gamedate, season, week, prop_name,
    position, name, id as player_id, team, opp,
    coalesce(fd, pinny, dk, czr, mgm) as line
    from analyst_dev.nfl_prop_data
    union
    select
    cast(left(gamedate,10) as date) gamedate, season, week, prop_name,
    position, name, id as player_id, team, opp,
    coalesce(fd, pinny) as line
    from analyst_dev.nfl_prop_data_archive
)
select * from prop
where gamedate = '2023-02-12'
and prop_name in ('pass_yards', 'pass_attempts', 'pass_completions', 'rec')
order by line desc
'''
prop = pd.read_sql(prop_qry, red_conn).rename(columns={"name":"player_name"})
prop["gamedate"] = pd.to_datetime(prop["gamedate"])

prop = prop\
    .merge(
        pd.concat([
            nfl_games.rename(columns={"home_abbrev":"team","away_abbrev":"opp", "home_team_id":"team_id"}),
            nfl_games.rename(columns={"home_abbrev":"opp","away_abbrev":"team", "away_team_id":"team_id"}),
        ]),
        on = ["gamedate", "team", "opp"]
    )\
    .drop(["home_team_id", "away_team_id"], axis=1)\
    .merge(nfl_games.drop("gamedate",axis=1), on = 'game_id')
prop["tm"] = prop.apply(
        lambda x: "a" if x["team_id"] == x["home_team_id"] else "b",
        axis=1
    )

prop['rank'] = prop\
        .groupby(["team_id", "game_id", "prop_name"])["line"]\
        .rank(method="first", ascending=False)

prop = prop.query("rank <= 10")\
    .assign(
        var_name = lambda x: x["prop_name"].astype(str)+\
            "_"+x["position"].astype(str)+\
            "_"+x["rank"].astype(int).astype(str)+\
            "_"+x["tm"].astype(str)
    )

### Prop Charts

In [62]:
prop_charts_qry = '''
select *
from analyst_dev.nfl_prop_charts
where prop_name in ('pass_yards', 'pass_attempts', 'pass_completions', 'rec')
'''
prop_charts = pd.read_sql(prop_charts_qry, red_conn)

line_cols = [c for c in prop_charts.columns if "u_" in c[0:2]]
id_cols = ["chart_type", "prop_name", "position", "mean"]
prop_piv = prop_charts.melt(
    id_vars=id_cols, value_vars=line_cols, 
    var_name='x', value_name='cdf'
).dropna()
prop_piv["x"] = [x.replace("u_", "") for x in prop_piv["x"]]
prop_piv["x"] = prop_piv["x"].astype(int)

### NF Projections

In [63]:
## NF proj
nf_qry = '''
select
h.name as home_name,
a.name as away_name,
skill.*,
st.pass_yards,
st.pass_completions,
st.pass_attempts,
st.rec,
p.name as player_name,
p.position,
case
    when skill.team_id = g.home_team_id then 'a'
    when skill.team_id = g.away_team_id then 'b'
end as tm
from (
    select player_id, team_id, game_id,
    pass_yards as mean_pass_yards, sd_pass_yards,
    pass_completions as mean_pass_completions, sd_pass_completions,
    pass_attempts as mean_pass_attempts, sd_pass_attempts,
    rec as mean_rec, sd_rec
    from nfl_projection_skill
    union
    select player_id, team_id, game_id,
    pass_yards as mean_pass_yards, sd_pass_yards,
    pass_completions as mean_pass_completions, sd_pass_completions,
    pass_attempts as mean_pass_attempts, sd_pass_attempts,
    rec as mean_rec, sd_rec
    from nfl_projection_archive_skill
) skill
left join nfl_player p on skill.player_id = p.id
left join nfl_statline_skill st on st.player_id = skill.player_id and st.game_id = skill.game_id
left join nfl_game g on skill.game_id = g.id
left join nfl_team h on h.id = g.home_team_id
left join nfl_team a on a.id = g.away_team_id
where (skill.team_id = g.home_team_id or skill.team_id = g.away_team_id);
'''
proj = pd.read_sql(nf_qry, za_conn)

# convert projections to long format
prop_names = ["pass_yards", "pass_attempts", "pass_completions", "rec"]
proj_long = []
for prop_name in prop_names:
    proj_long_i = proj.copy()\
        [["player_id", "player_name", "game_id", "position", "tm", f"sd_{prop_name}", f"mean_{prop_name}", prop_name]]\
        .rename(columns = {f"sd_{prop_name}":"sd", f"mean_{prop_name}":"line", prop_name:"value"})\
        .query("line > 0")
    proj_long_i["prop_name"] = prop_name
    proj_long.append(proj_long_i[["player_id", "player_name","game_id", "position", "tm", "prop_name", "line", "sd", "value"]])
proj_long = pd.concat(proj_long,axis=0).fillna(0)

proj_long['rank'] = proj_long\
        .groupby(["tm", "game_id", "prop_name"])["line"]\
        .rank(method="first", ascending=False)

proj_long = proj_long.query("rank <= 10")\
    .assign(
        var_name = lambda x: x["prop_name"].astype(str)+\
            "_"+x["position"].astype(str)+\
            "_"+x["rank"].astype(int).astype(str)+\
            "_"+x["tm"].astype(str)
    )

proj = proj_long; del proj_long

### Sim Data

In [65]:
f = open(sim_file) # whatever the name of your sim json file is
sims = json.load(f)

print(sims.keys())
# SimulationResults has the goods in it

def to_df(x, name=None):
    """
    Simple function that converts
    different objects to a pandas
    DataFrame. (Probably bad code)
    """
    try:
        out = pd.DataFrame(x)
    except:
        try:
            out = pd.DataFrame(x, index = [0])
            if name is not None:
                out.columns = [f"{name}_{c}" for c in out.columns]
        except:
            out = pd.DataFrame({name:x},index=[0])
    return out

def parse_sim_results(sims, SimIndex):
    """
    Function to process a single simulation
    """
    # pull out simulation result
    result = sims["SimulationResults"][SimIndex]
    
    result_dict = {"SimResults": pd.DataFrame({"SimIndex":SimIndex},index=[0])}
    for key in result.keys():
        element_df = to_df(result[key], key)
        if element_df.shape[0] == 1:
            result_dict["SimResults"] = pd.concat([result_dict["SimResults"], element_df], axis = 1)
        else:
            if "SimIndex" not in element_df.columns:
                element_df["SimIndex"] = SimIndex
            result_dict[key] = element_df

    return result_dict

def process_simulation_results(sims):
    """
    Function to process all of the sim_df
    """
    n_sims = sims["NumberOfSimulations"]
    print(f"Parsing results for {n_sims} of simulations")

    sim_results = {}
    for i in tqdm(range(n_sims)):
        sim_i_result = parse_sim_results(sims, i)
        for key in sim_i_result.keys():
            if key in sim_results.keys():
                sim_results[key] = pd.concat([sim_results[key], sim_i_result[key]], axis=0)
            else:
                sim_results[key] = sim_i_result[key]

    return sim_results


# Process the Simulation Results
SimulationResults = process_simulation_results(sims)

dict_keys(['GameId', 'Solution', 'MarketIdentifiers', 'BatchingEnabled', 'HomePlayers', 'AwayPlayers', 'SimulationResults', 'NextPlayProbabilities', 'EventId', 'NumberOfSimulations', 'HomeTeamStats', 'AwayTeamStats', 'ModelInputs'])
Parsing results for 10000 of simulations


100%|██████████| 10000/10000 [01:40<00:00, 99.97it/s]


## Model

### Cholesky

In [66]:
corr_mat = proj\
    .pivot(
        index = 'game_id', 
        columns='var_name',
        values='value'
    )\
    .reset_index()\
    .drop("game_id",axis=1)\
    .corr().fillna(0)
# corr_mat.columns = ["_".join(a) for a in corr_mat.columns.to_flat_index()]
# corr_mat.index = ["_".join(a) for a in corr_mat.index.to_flat_index()]

In [67]:
def isPD(B):
    """Returns true when input is positive-definite, via Cholesky"""
    try:
        _ = la.cholesky(B)
        return True
    except la.LinAlgError:
        return False

def nearestPD(A):
    """Find the nearest positive-definite matrix to input
    A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
    credits [2].
    [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd
    [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
    matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
    """
    B = (A + A.T) / 2
    _, s, V = la.svd(B)
    H = np.dot(V.T, np.dot(np.diag(s), V))
    A2 = (B + H) / 2
    A3 = (A2 + A2.T) / 2
    if isPD(A3):
        return A3
    spacing = np.spacing(la.norm(A))
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1
    return A3

In [68]:
def process_sim_df(sim_df, game_id, names, prop_name):
    
    sim_df["total"] = sim_df["plr1"] +  sim_df["plr2"]
    sim_df["plr1_diff"] = sim_df["plr1"] - sim_df["plr2"]
    sim_df["plr1_more"] = (sim_df["plr1"] > sim_df["plr2"]).astype(int)
    sim_df["plr2_more"] = (sim_df["plr1"] < sim_df["plr2"]).astype(int)

    # TOTAL
    total = np.round(np.median(sim_df["total"]) * 2)/2
    p_over = np.mean(sim_df["total"] > total)
    p_under = np.mean(sim_df["total"] < total)
    if (total).is_integer():
        if p_over < p_under:
            total -= 0.5
        else:
            total += 0.5

    # SPREAD
    spread = np.round( -1*np.median(sim_df["plr1_diff"]) * 2)/2
    p_cover = np.mean(sim_df["plr1_diff"] > -1*spread)
    p_not_cover = np.mean(sim_df["plr1_diff"] < -1*spread)
    if (spread).is_integer():
        if p_cover < p_not_cover:
            spread += 0.5
        else:
            spread -= 0.5

    # MONEYLINE
    p_plr1_more = np.mean(sim_df["plr1_more"])
    p_plr1_more = sim_df.query("plr1 != plr2")["plr1_more"].mean()
    p_plr2_more = sim_df.query("plr1 != plr2")["plr2_more"].mean()

    ## add on juice
    p_plr1_cover = np.mean(sim_df["plr1"] + spread > sim_df["plr2"])
    p_plr2_cover = np.mean(sim_df["plr1"] + spread < sim_df["plr2"])
    p_over = np.mean(sim_df["total"] > total)
    p_under = np.mean(sim_df["total"] < total)

    if all([not np.isnan(total), p_plr1_more > 0, p_plr1_more]):
        # format output
        out = pd.DataFrame({
            "game_id": game_id,
            "Matchup": f"{names[0]} vs {names[1]}",
            "prop_name": prop_name,
            "Player1": names[0],
            "Player2": names[1],
            #"Prob Player 1 Moneyline": p_plr1_more,
            "Odds Player 1 Moneyline": 1/p_plr1_more,
            "Odds Player 2 Moneyline": 1/(1-p_plr1_more),
            "Spread": spread,
            #"Prob Player1 Cover": p_plr1_cover,
            "Odds Player 1 Cover": 1/p_plr1_cover,
            "Odds Player 2 Cover": 1/(1-p_plr1_cover),
            "Total": total,
            #"Prob Over": p_over,
            "Odds Over": 1/p_over,
            "Odds Under": 1/p_under,
        },index=[0])

        return out

In [69]:
def build_prop_line(player_ids, game_id, prop_name, proj):

    game_proj = proj.query(f"game_id == {game_id} and prop_name == '{prop_name}'")

    var = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["var_name"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["var_name"].values[0]
    ])
    mu = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")[f"line"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")[f"line"].values[0]
    ])
    # std = np.array([
    #     game_proj.query(f"player_id=={player_ids[0]}")[f"sd"].values[0],
    #     game_proj.query(f"player_id=={player_ids[1]}")[f"sd"].values[0]
    # ])
    names = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["player_name"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["player_name"].values[0]
    ])

    # sort the list so player 1 is always the favorite
    if np.max(mu) == mu[1]:
        var = var[::-1]
        mu = mu[::-1]
        #std = std[::-1]
        player_ids = player_ids[::-1]
        names = names[::-1]

    c = nearestPD(corr_mat.loc[var, var])
    L = np.linalg.cholesky(c)

    n_vars = len(var)
    s_samples = 10_000
    uncorrelated = np.random.standard_normal((n_vars, s_samples))
    correlated = np.dot(L, uncorrelated)

    # Begin Psuedo Sim
    plr_sims = []
    for i in range(n_vars):
        variable = correlated[i]
        z  = (variable - np.mean(variable))/np.std(variable)
        cdf = norm.cdf(z)
        prop_dist = prop_piv.query(f"prop_name == '{prop_name}'")\
            .sort_values(["mean", "x"])\
            .assign(diff = lambda x: np.abs(x["mean"] - mu[i]))
        prop_dist = prop_dist[prop_dist['diff'] == prop_dist['diff'].min()]
        sim = np.array([prop_dist[prop_dist["cdf"] < c]["x"].max() for c in cdf])
        sim[np.isnan(sim)] = 0
        plr_sims.append(sim)

    # format sims data for processing
    sim_df = pd.DataFrame(plr_sims).T.rename(columns={0:"plr1",1:"plr2"})

    # Process sims and return results
    return process_sim_df(sim_df, game_id, names, prop_name)

In [70]:
prop_names = ["pass_attempts", "pass_completions", "rec"]
slate = prop\
    .merge(prop, on = ['game_id', 'prop_name'], suffixes=["_1", "_2"])\
    .query(f"player_id_1 != player_id_2 and line_1 >= line_2 and prop_name in {prop_names}")\
    [["game_id", "prop_name", "player_id_1", "position_1", "player_id_2", "position_2"]]


slate = slate.query("game_id == 8297")

chol_lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    chol_lines.append(build_prop_line([row["player_id_1"], row["player_id_2"]], row["game_id"], row["prop_name"], prop))
chol_lines = pd.concat(chol_lines, axis=0)

100%|██████████| 30/30 [01:49<00:00,  3.65s/it]


### Yards

In [71]:
def build_yards_line(player_ids, prop, sims, SimulationResults):

    prop_dict = {
        "receiving": 8,
        "rushing": 5,
        "passing": 1,
        "rushing and receiving": 13
    }

    assert prop in prop_dict.keys(), "prop must be in ['receiving', 'rushing', 'passing', 'rushing and receiving']"

    # pull out id tables
    AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
    HomePlayers = pd.DataFrame(sims["HomePlayers"])
    players_df = pd.concat([
            HomePlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]],            
            AwayPlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]]
         ],axis=0)\
        .query(f"PlayerId in {player_ids}")\
        .copy()

    # pull out yards
    yards_df = SimulationResults["PlayerYards"]

    # aggregate rushing and recieving
    yard_df_rush_rec = yards_df.copy()\
        .query("PropositionType in [5,8]")\
        .groupby(["SimIndex", "PlayerId"])\
        .agg("sum").reset_index()\
        .assign(PropositionType = 13)

    # union rush and recieving back to original data frame
    yards_df = pd.concat([yards_df, yard_df_rush_rec], axis = 0)

    yards_df = yards_df\
        .query(f"""
            PlayerId in {player_ids}\
            and PropositionType == {prop_dict[prop]}
        """).copy()

    yards_df = yards_df.merge(players_df,on = "PlayerId").copy()

    # find average yards per player
    mu = [
        yards_df.query(f"PlayerId=={player_ids[0]}")["YardsGained"].mean(),
        yards_df.query(f"PlayerId=={player_ids[1]}")["YardsGained"].mean()
    ]

    if all([mu[0] > 0, mu[1] > 0]):

        names = [
            players_df.query(f"PlayerId=={player_ids[0]}")["PlayerName"].values[0],
            players_df.query(f"PlayerId=={player_ids[1]}")["PlayerName"].values[0]
        ]

        # sort player ids so favorite 
        if np.max(mu) == mu[1]:
            mu = mu[::-1]
            player_ids = player_ids[::-1]
            names = names[::-1]

        # join yards df to itself to stack players side by side
        yards_df_index = pd.DataFrame({
            "SimIndex":range(sims["NumberOfSimulations"])
        })

        yards_df_both  = pd.merge(
            yards_df.query(f"PlayerId=={player_ids[0]}")[["SimIndex", "YardsGained"]],
            yards_df.query(f"PlayerId=={player_ids[1]}")[["SimIndex", "YardsGained"]],
            on = "SimIndex", suffixes=["_plr1", "_plr2"], how = 'outer'
        ).copy()
        
        # format final sims table for processing
        sim_df = yards_df_index\
            .merge(
                yards_df_both, 
                on = "SimIndex", how = "left"
            ).fillna(0).copy()\
            .rename(columns={"YardsGained_plr1":"plr1", "YardsGained_plr2":"plr2"})

        game_id = sims["GameId"]
        return process_sim_df(sim_df, game_id, names, prop_name)

#build_yards_line([1321, 222], "receiving", sims, SimulationResults)

In [72]:
slate = pd.concat([
    pd.DataFrame(sims["AwayPlayers"])[["PlayerId", "PlayerName", "Position"]],            
    pd.DataFrame(sims["HomePlayers"])[["PlayerId", "PlayerName", "Position"]]
],axis=0).assign(join_ix = 1)

slate = slate.merge(slate, on= "join_ix", suffixes=["_1", "_2"])\
    .query("PlayerId_1 < PlayerId_2")

slate = pd.concat([
    slate.assign(prop = "rushing and receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "rushing").query("Position_1 in [1,2,4,7] and Position_2 in [1,2,4,7]"),
    slate.assign(prop = "passing").query("Position_1 in [1] and Position_2 in [1]"),
],axis=0)

yards_lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    yards_lines.append(build_yards_line([row["PlayerId_1"], row["PlayerId_2"]], row["prop"], sims, SimulationResults))
yards_lines = pd.concat(yards_lines, axis=0)

100%|██████████| 832/832 [00:26<00:00, 31.87it/s]


### Touchdowns

In [73]:
def build_td_line(player_ids, prop_name, sims, SimulationResults):

    prop_dict = {
        "receiving": 8,
        "rushing": 5,
        "passing": 1,
        "rushing and receiving": -100
    }

    assert prop_name in prop_dict.keys(), "prop_name must be in ['receiving', 'rushing', 'passing']"

    # pull out id tables
    AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
    HomePlayers = pd.DataFrame(sims["HomePlayers"])
    players_df = pd.concat([
            HomePlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]].assign(ScoringSide=1),            
            AwayPlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]].assign(ScoringSide=2)
            ],axis=0)\
        .query(f"PlayerId in {player_ids}")\
        .copy()

    # pull out yards
    td_df = SimulationResults["ScoringPlays"]\
        .query("ScoreType == 0")\
        .assign(is_pass = (lambda x: (-np.isnan(x.ScoredByPassingPlayerIndex)).astype(int)))


    if prop_name == "receiving":
        td_df = td_df\
            .query(f"is_pass == 1")\
            .rename(columns={"ScoringPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()

    elif prop_name == "rushing":
        td_df = td_df\
            .query(f"is_pass == 0")\
            .rename(columns={"ScoringPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()

    elif prop_name == "passing":
        td_df = td_df\
            .query(f"is_pass == 1")\
            .rename(columns={"ScoredByPassingPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()

    elif prop_name == "rushing and receiving":
        td_df = td_df\
            .rename(columns={"ScoringPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()


    mu = [
        td_df.query(f"PlayerId=={player_ids[0]}")["td"].sum()/sims["NumberOfSimulations"],
        td_df.query(f"PlayerId=={player_ids[1]}")["td"].sum()/sims["NumberOfSimulations"]
    ]

    if all([mu[0] > 0, mu[1] > 0]):

        names = [
            players_df.query(f"PlayerId=={player_ids[0]}")["PlayerName"].values[0],
            players_df.query(f"PlayerId=={player_ids[1]}")["PlayerName"].values[0]
        ]

        # sort player ids so favorite 
        if np.max(mu) == mu[1]:
            mu = mu[::-1]
            player_ids = player_ids[::-1]
            names = names[::-1]

        # Join on self and join on all sims to ensure full representation
        td_df_index = pd.DataFrame({
            "SimIndex":range(sims["NumberOfSimulations"])
        })

        td_df_both = pd.merge(
            td_df.query(f"PlayerId=={player_ids[0]}")[["SimIndex", "td"]],
            td_df.query(f"PlayerId=={player_ids[1]}")[["SimIndex", "td"]],
            on = "SimIndex", suffixes=["_plr1", "_plr2"], how = 'outer'
        )
        
        # format sim_df for processing
        sim_df = td_df_index\
        .merge(
            td_df_both,
            on = "SimIndex", how = "left"
        ).fillna(0).copy()\
        .rename(columns={"td_plr1":"plr1","td_plr2":"plr2"})
        
        game_id = sims["GameId"]
        return process_sim_df(sim_df, game_id, names, prop_name)

In [74]:
slate = pd.concat([
    pd.DataFrame(sims["AwayPlayers"])[["PlayerId", "PlayerName", "Position"]],            
    pd.DataFrame(sims["HomePlayers"])[["PlayerId", "PlayerName", "Position"]]
],axis=0).assign(join_ix = 1)

slate = slate.merge(slate, on= "join_ix", suffixes=["_1", "_2"])\
    .query("PlayerId_1 < PlayerId_2")

slate = pd.concat([
    slate.assign(prop = "rushing and receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "rushing").query("Position_1 in [1,2,4,7] and Position_2 in [1,2,4,7]"),
    slate.assign(prop = "passing").query("Position_1 in [1] and Position_2 in [1]"),
],axis=0)

td_lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    td_lines.append(build_td_line([row["PlayerId_1"], row["PlayerId_2"]], row["prop"], sims, SimulationResults))
td_lines = pd.concat(td_lines, axis=0)

100%|██████████| 832/832 [00:19<00:00, 41.78it/s]


In [75]:
lines = pd.concat([chol_lines, yards_lines, td_lines],axis=0).rename(columns={"prop_name":"Prop Type"})
lines.drop("game_id",axis=1).to_csv(f"../data/lines_{str(datetime.today())[0:10]}.csv", index=False)