In [67]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt

import psycopg2
import pymysql
import sqlalchemy_redshift
from configparser import ConfigParser
from sqlalchemy import create_engine

parser = ConfigParser()
_ = parser.read("../notebook.cfg")

# Connect to zack attack
za_user = parser.get("nffddev_2", "user")
za_pwd = parser.get("nffddev_2", "password")

za_engine = create_engine(
    f"mysql+pymysql://{za_user}:{za_pwd}@nffddev.numberfire.com/zack_attack",
    connect_args = dict(host='nffddev.numberfire.com', port=3306)
)
za_conn = za_engine.connect()

# connect to redshift
red_user = parser.get("redshift", "user")
red_pwd = parser.get("redshift", "password")

red_engine = create_engine(
    f"postgresql+psycopg2://{red_user}:{red_pwd}@rs1.usdfs.fdbox.net/fanduel",
    connect_args = dict(port=5439)
)
red_conn = red_engine.connect()

# Simulation data file
sim_file = "../data/CorrelationId_81c262fb-8bc1-464e-9f86-f2e87fbfa6b9_Timestamp_2023-02-01_21_56_37_569.json"

Exception during reset or similar
Traceback (most recent call last):
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 763, in _finalize_fairy
    fairy._reset(pool, transaction_was_reset)
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 1038, in _reset
    pool._dialect.do_rollback(self)
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 683, in do_rollback
    dbapi_connection.rollback()
psycopg2.errors.AdminShutdown: terminating connection due to session timeout
SSL connection has been closed unexpectedly



## Data

### NFL Game and Stat Data

In [68]:
# Game
game_qry = '''
select 
g.*, 
a.abbrev as away_team_abbrev,
h.abbrev as home_team_abbrev
from nfl_game g
inner join nfl_team h on h.id = g.home_team_id
inner join nfl_team a on a.id = g.away_team_id
'''
game = pd.read_sql(game_qry, za_conn).rename(columns={"id":"game_id"})

# Receptions
rec_qry = '''
select 
game_id, team_id,
player_id, rec 
from nfl_statline_skill
'''
rec = pd.read_sql(rec_qry, za_conn)

### NF Projections

In [69]:
## NF Projections
proj_qry = '''
select
h.name as home_name,
a.name as away_name,
skill.*,
p.name as player_name,
p.position,
case
    when skill.team_id = g.home_team_id then 'a'
    when skill.team_id = g.away_team_id then 'b'
end as tm
from (
    select
    player_id, team_id, game_id, rec as mean_rec, sd_rec
    from nfl_projection_skill
    union
    select
    player_id, team_id, game_id, rec as mean_rec, sd_rec
    from nfl_projection_archive_skill
) skill
inner join nfl_player p on skill.player_id = p.id
inner join nfl_game g on skill.game_id = g.id
inner join nfl_team h on h.id = g.home_team_id
inner join nfl_team a on a.id = g.away_team_id
where p.position in ('TE', 'WR', 'RB')
'''
proj = pd.read_sql(proj_qry, za_conn)\

proj['rank'] = proj.groupby(["team_id", "game_id"])['mean_rec'].rank(method="first", ascending=False)
proj = proj.query("rank <= 10").merge(rec, on = ["game_id", "team_id", "player_id"], how = "left")
proj["var_name"] = proj["position"] + "_" + proj["rank"].astype(int).astype(str) + "_" + proj["tm"]

### Prop Lines

In [70]:
## Prop Lines
prop_qry = '''
with nfl_prop_full as (
    select
    left(gamedate,10) gamedate,
    season,week,prop_name,position,name,
    id as player_id,team, opp,
    fd, pinny, dk, czr, mgm
    from analyst_dev.nfl_prop_data
    union
    select
    left(gamedate,10) gamedate,
    season,week,prop_name,position,name,
    id as player_id,team, opp,
    fd, pinny, NULL as dk, NULL as czr, mgm
    from analyst_dev.nfl_prop_data_archive
),
prop_filt as (
    select
    gamedate, season, week,
    player_id, name, position, team, opp,
    coalesce(fd, pinny, dk, czr, mgm) as line
    from nfl_prop_full
    where prop_name in ('rec')
),
prop_rank as (
    select *,
    row_number() over (partition by season, week, team order by line) rank
    from prop_filt
    where line is not null
    and position in ('TE', 'WR', 'RB')
)
select * from prop_rank
'''

prop = pd.read_sql(prop_qry, red_conn)

# get game ids for prop
nfl_prop_game = pd.concat([
    game.rename(columns={"home_team_abbrev":"team", "away_team_abbrev":"opp", "home_team_id":"team_id"}).assign(tm='a'),
    game.rename(columns={"away_team_abbrev":"team", "home_team_abbrev":"opp", "away_team_id":"team_id"}).assign(tm='b')
],axis=0)[["season", "week", "game_id", "team_id", "team", "opp", 'tm']]

prop = prop.merge(
    nfl_prop_game,
    on = ["season", "week", "team", "opp"]
)
prop["var_name"] = prop["position"] + "_" + prop["rank"].astype(str) + "_" + prop["tm"]

prop = prop.merge(rec, on = ["game_id", "player_id", "team_id"], how = "left").fillna(0)

### Super Bowl Simulation Data

In [71]:

f = open(sim_file) # whatever the name of your sim json file is
sims = json.load(f)

print(sims.keys())
# SimulationResults has the goods in it

def to_df(x, name=None):
    """
    Simple function that converts
    different objects to a pandas
    DataFrame. (Probably bad code)
    """
    try:
        out = pd.DataFrame(x)
    except:
        try:
            out = pd.DataFrame(x, index = [0])
            if name is not None:
                out.columns = [f"{name}_{c}" for c in out.columns]
        except:
            out = pd.DataFrame({name:x},index=[0])
    return out

def parse_sim_results(sims, SimIndex):
    """
    Function to process a single simulation
    """
    # pull out simulation result
    result = sims["SimulationResults"][SimIndex]
    
    result_dict = {"SimResults": pd.DataFrame({"SimIndex":SimIndex},index=[0])}
    for key in result.keys():
        element_df = to_df(result[key], key)
        if element_df.shape[0] == 1:
            result_dict["SimResults"] = pd.concat([result_dict["SimResults"], element_df], axis = 1)
        else:
            if "SimIndex" not in element_df.columns:
                element_df["SimIndex"] = SimIndex
            result_dict[key] = element_df

    return result_dict

def process_simulation_results(sims):
    """
    Function to process all of the sims
    """
    n_sims = sims["NumberOfSimulations"]
    print(f"Parsing results for {n_sims} of simulations")

    sim_results = {}
    for i in tqdm(range(n_sims)):
        sim_i_result = parse_sim_results(sims, i)
        for key in sim_i_result.keys():
            if key in sim_results.keys():
                sim_results[key] = pd.concat([sim_results[key], sim_i_result[key]], axis=0)
            else:
                sim_results[key] = sim_i_result[key]

    return sim_results


# Process the Simulation Results
SimulationResults = process_simulation_results(sims)

dict_keys(['GameId', 'Solution', 'MarketIdentifiers', 'BatchingEnabled', 'HomePlayers', 'AwayPlayers', 'SimulationResults', 'NextPlayProbabilities', 'EventId', 'NumberOfSimulations', 'HomeTeamStats', 'AwayTeamStats', 'ModelInputs'])
Parsing results for 10000 of simulations


100%|██████████| 10000/10000 [01:36<00:00, 104.17it/s]


## Lines

### Receptions

In [72]:
# using NF for projections
corr_mat = proj.dropna()\
    .query(f"game_id != 8297")\
    .pivot(index = 'game_id', columns="var_name", values = "mean_rec")\
    .fillna(0).corr()

In [73]:
def isPD(B):
    """Returns true when input is positive-definite, via Cholesky"""
    try:
        _ = la.cholesky(B)
        return True
    except la.LinAlgError:
        return False

def nearestPD(A):
    """Find the nearest positive-definite matrix to input
    A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
    credits [2].
    [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd
    [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
    matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
    """
    B = (A + A.T) / 2
    _, s, V = la.svd(B)
    H = np.dot(V.T, np.dot(np.diag(s), V))
    A2 = (B + H) / 2
    A3 = (A2 + A2.T) / 2
    if isPD(A3):
        return A3
    spacing = np.spacing(la.norm(A))
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1
    return A3

In [78]:
from numpy import linalg as la
from scipy.stats import norm, poisson, gamma, rv_discrete

def build_reception_line(player_ids, game_id, proj):

    game_proj = proj.query(f"game_id=={game_id}")

    var = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["var_name"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["var_name"].values[0]
    ])
    mu = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["mean_rec"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["mean_rec"].values[0]
    ])
    std = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["sd_rec"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["sd_rec"].values[0]
    ])
    names = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["player_name"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["player_name"].values[0]
    ])

    # sort the list so player 1 is always the favorite
    if np.max(mu) == mu[1]:
        var = var[::-1]
        mu = mu[::-1]
        std = std[::-1]
        player_ids = player_ids[::-1]
        names = names[::-1]

    c = nearestPD(corr_mat.loc[var, var])
    L = np.linalg.cholesky(c)

    n_vars = len(var)
    s_samples = 10_000
    uncorrelated = np.random.standard_normal((n_vars, s_samples))
    correlated = np.dot(L, uncorrelated)

    plr_rec_sims = []
    for i in range(n_vars):
        var = correlated[i]
        z = (var - np.mean(var))/np.std(var)
        cdf = norm.cdf(z)
        plr_rec_sims.append(poisson(mu[i]).ppf(cdf))
        # shape = ( float(mu[i]) / float(std[i]) ) ** 2
        # scale = ( float(std[i]) ** 2) / float(mu[i])
        #plr_rec_sims.append(np.floor(gamma(a = shape, scale = scale).ppf(q=cdf)))

    sims = pd.DataFrame(plr_rec_sims).T.rename(columns={0:"rec_plr1",1:"rec_plr2"})
    sims["total"] = sims["rec_plr1"] +  sims["rec_plr2"]
    sims["plr1_diff"] = sims["rec_plr1"] - sims["rec_plr2"]
    sims["plr1_more"] = (sims["rec_plr1"] > sims["rec_plr2"]).astype(int)
    sims["plr2_more"] = (sims["rec_plr1"] < sims["rec_plr2"]).astype(int)

    # TOTAL
    total = np.round(np.median(sims["total"]) * 2)/2
    p_over = np.mean(sims["total"] > total)
    p_under = np.mean(sims["total"] < total)
    if (total).is_integer():
        if p_over < p_under:
            total -= 0.5
        else:
            total += 0.5

    # SPREAD
    spread = np.round( -1*np.median(sims["plr1_diff"]) * 2)/2
    p_cover = np.mean(sims["plr1_diff"] > -1*spread)
    p_not_cover = np.mean(sims["plr1_diff"] < -1*spread)
    if (spread).is_integer():
        if p_cover < p_not_cover:
            spread += 0.5
        else:
            spread -= 0.5


    # MONEYLINE
    p_plr1_more = np.mean(sims["plr1_more"])
    p_plr1_more = sims.query("rec_plr1 != rec_plr2")["plr1_more"].mean()
    p_plr2_more = sims.query("rec_plr1 != rec_plr2")["plr2_more"].mean()

    ## add on juice
    p_plr1_cover = np.mean(sims["rec_plr1"] + spread > sims["rec_plr2"])
    p_plr2_cover = np.mean(sims["rec_plr1"] + spread < sims["rec_plr2"])
    p_over = np.mean(sims["total"] > total)
    p_under = np.mean(sims["total"] > total)

    # format output
    matchup = f"{names[0]} vs {names[1]}"
    prop_name = 'receptions'
    out = pd.DataFrame({
        "game_id": game_id,
        "Matchup": matchup,
        "prop_name": prop_name,
        "Player1": names[0],
        "Player2": names[1],
        #"Prob Player 1 Moneyline": p_plr1_more,
        "Odds Player 1 Moneyline": 1/p_plr1_more,
        "Odds Player 2 Moneyline": 1/(1-p_plr1_more),
        "Spread": spread,
        #"Prob Player1 Cover": p_plr1_cover,
        "Odds Player 1 Cover": 1/p_plr1_cover,
        "Odds Player 2 Cover": 1/(1-p_plr1_cover),
        "Total": total,
        #"Prob Over": p_over,
        "Odds Over": 1/p_over,
        "Odds Under": 1/p_under,
    },index=[0])

    return out

In [79]:
#slate = proj[["game_id", "player_id", "player_name", "mean_rec"]].copy()
slate = prop[["game_id", "player_id", "name", "line"]].rename(columns={"name":"player_name","line":"mean_rec"})
slate = slate\
    .merge(slate, on = 'game_id', suffixes=["_1", "_2"])\
    .query("player_id_1 != player_id_2")\
    .query("mean_rec_1 >= mean_rec_2")\
    [["game_id", "player_id_1", "player_id_2"]]

slate = slate.query("game_id == 8297")

rec_lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    rec_lines.append(build_reception_line([row["player_id_1"], row["player_id_2"]], row["game_id"], proj))
rec_lines = pd.concat(rec_lines, axis=0)


100%|██████████| 28/28 [00:04<00:00,  6.30it/s]


### Yards

In [80]:
def build_yards_line(player_ids, prop, sims, SimulationResults):

    prop_dict = {
        "receiving": 8,
        "rushing": 5,
        "passing": 1,
        "rushing and receiving": 13
    }

    assert prop in prop_dict.keys(), "prop must be in ['receiving', 'rushing', 'passing', 'rushing and receiving']"

    # pull out id tables
    AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
    HomePlayers = pd.DataFrame(sims["HomePlayers"])
    players_df = pd.concat([
            HomePlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]],            
            AwayPlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]]
         ],axis=0)\
        .query(f"PlayerId in {player_ids}")\
        .copy()

    # pull out yards
    yards_df = SimulationResults["PlayerYards"]

    # aggregate rushing and recieving
    yard_df_rush_rec = yards_df.copy()\
        .query("PropositionType in [5,8]")\
        .groupby(["SimIndex", "PlayerId"])\
        .agg("sum").reset_index()\
        .assign(PropositionType = 13)

    # union rush and recieving back to original data frame
    yards_df = pd.concat([yards_df, yard_df_rush_rec], axis = 0)

    yards_df = yards_df\
        .query(f"""
            PlayerId in {player_ids}\
            and PropositionType == {prop_dict[prop]}
        """).copy()

    yards_df = yards_df.merge(players_df,on = "PlayerId").copy()

    # find average yards per player
    mu = [
        yards_df.query(f"PlayerId=={player_ids[0]}")["YardsGained"].mean(),
        yards_df.query(f"PlayerId=={player_ids[1]}")["YardsGained"].mean()
    ]

    if all([mu[0] > 0, mu[1] > 0]):

        names = [
            players_df.query(f"PlayerId=={player_ids[0]}")["PlayerName"].values[0],
            players_df.query(f"PlayerId=={player_ids[1]}")["PlayerName"].values[0]
        ]

        # sort player ids so favorite 
        if np.max(mu) == mu[1]:
            mu = mu[::-1]
            player_ids = player_ids[::-1]
            names = names[::-1]

        # join yards df to itself to stack players side by side
        yards_df_index = pd.DataFrame({
            "SimIndex":range(sims["NumberOfSimulations"])
        })

        yards_df_both  = pd.merge(
            yards_df.query(f"PlayerId=={player_ids[0]}")[["SimIndex", "YardsGained"]],
            yards_df.query(f"PlayerId=={player_ids[1]}")[["SimIndex", "YardsGained"]],
            on = "SimIndex", suffixes=["_plr1", "_plr2"], how = 'outer'
        ).copy()
        
        yards_df = yards_df_index.merge(
            yards_df_both, 
            on = "SimIndex", how = "left"
        ).fillna(0).copy()

        yards_df["total"] = yards_df["YardsGained_plr1"] + yards_df["YardsGained_plr2"]
        yards_df["plr1_diff"] = yards_df["YardsGained_plr1"] - yards_df["YardsGained_plr2"]
        yards_df["plr1_more"] = yards_df.apply(lambda x: 1 if x.YardsGained_plr1 > x.YardsGained_plr2 else 0, axis = 1)
        yards_df["plr2_more"] = yards_df.apply(lambda x: 1 if x.YardsGained_plr2 > x.YardsGained_plr1 else 0, axis = 1)

        # TOTAL
        total = np.round(np.median(yards_df["total"]) * 2)/2
        p_over = np.mean(yards_df["total"] > total)
        p_under = np.mean(yards_df["total"] < total)
        if (total).is_integer():
            if p_over < p_under:
                total -= 0.5
            else:
                total += 0.5

        # SPREAD
        spread = np.round( -1*np.median(yards_df["plr1_diff"]) * 2)/2
        p_cover = np.mean(yards_df["plr1_diff"] > -1*spread)
        p_not_cover = np.mean(yards_df["plr1_diff"] < -1*spread)
        if (spread).is_integer():
            if p_cover < p_not_cover:
                spread += 0.5
            else:
                spread -= 0.5


        # MONEYLINE
        p_plr1_more = yards_df.query("YardsGained_plr1 != YardsGained_plr2")["plr1_more"].mean()
        p_plr2_more = yards_df.query("YardsGained_plr1 != YardsGained_plr2")["plr2_more"].mean()

        ## add on juice
        p_plr1_cover = np.mean(yards_df["YardsGained_plr1"] + spread > yards_df["YardsGained_plr2"])
        p_plr2_cover = np.mean(yards_df["YardsGained_plr1"] + spread < yards_df["YardsGained_plr2"])
        p_over = np.mean(yards_df["total"] > total)
        p_under = np.mean(yards_df["total"] < total)

        # format output
        matchup = f"{names[0]} vs {names[1]}"
        prop_name = f"{prop} yards"
        out = pd.DataFrame({
            "game_id": sims["GameId"],
            "Matchup": matchup,
            "prop_name": prop_name,
            "Player1": names[0],
            "Player2": names[1],
            #"Prob Player 1 Moneyline": p_plr1_more,
            "Odds Player 1 Moneyline": 1/p_plr1_more,
            "Odds Player 2 Moneyline": 1/p_plr2_more,
            "Spread": spread,
            #"Prob Player1 Cover": p_plr1_cover,
            "Odds Player 1 Cover": 1/p_plr1_cover,
            "Odds Player 2 Cover": 1/(1-p_plr1_cover),
            "Total": total,
            #"Prob Over": p_over,
            "Odds Over": 1/p_over,
            "Odds Under": 1/p_under
        },index=[0])

        return out

#build_yards_line([1321, 222], "receiving", sims, SimulationResults)

In [81]:
slate = pd.concat([
    pd.DataFrame(sims["AwayPlayers"])[["PlayerId", "PlayerName", "Position"]],            
    pd.DataFrame(sims["HomePlayers"])[["PlayerId", "PlayerName", "Position"]]
],axis=0).assign(join_ix = 1)
SimulationResults["PlayerYards"].query("PlayerId == 1382")

Unnamed: 0,SimIndex,PlayerId,PropositionType,YardsGained,ExpectedYards
10,0,1382,1,250.0,292.5
11,0,1382,5,29.0,18.5
10,1,1382,1,277.0,292.5
11,1,1382,5,14.0,18.5
10,2,1382,1,214.0,292.5
...,...,...,...,...,...
11,9997,1382,5,59.0,18.5
10,9998,1382,1,242.0,292.5
11,9998,1382,5,25.0,18.5
10,9999,1382,1,319.0,292.5


In [82]:
slate = pd.concat([
    pd.DataFrame(sims["AwayPlayers"])[["PlayerId", "PlayerName", "Position"]],            
    pd.DataFrame(sims["HomePlayers"])[["PlayerId", "PlayerName", "Position"]]
],axis=0).assign(join_ix = 1)

slate = slate.merge(slate, on= "join_ix", suffixes=["_1", "_2"])\
    .query("PlayerId_1 < PlayerId_2")

slate = pd.concat([
    slate.assign(prop = "rushing and receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "rushing").query("Position_1 in [1,2,4,7] and Position_2 in [1,2,4,7]"),
    slate.assign(prop = "passing").query("Position_1 in [1] and Position_2 in [1]"),
],axis=0)

yards_lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    yards_lines.append(build_yards_line([row["PlayerId_1"], row["PlayerId_2"]], row["prop"], sims, SimulationResults))
yards_lines = pd.concat(yards_lines, axis=0)

100%|██████████| 832/832 [00:42<00:00, 19.58it/s]


### Touchdowns

In [83]:
player_ids = [1382, 3450]
prop = "passing"

def build_td_line(player_ids, prop, sims, SimulationResults):

    prop_dict = {
        "receiving": 8,
        "rushing": 5,
        "passing": 1,
        "rushing and receiving": -100
    }

    assert prop in prop_dict.keys(), "prop must be in ['receiving', 'rushing', 'passing']"

    # pull out id tables
    AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
    HomePlayers = pd.DataFrame(sims["HomePlayers"])
    players_df = pd.concat([
            HomePlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]].assign(ScoringSide=1),            
            AwayPlayers[["PlayerIndex", "PlayerId", "PlayerName", "Position"]].assign(ScoringSide=2)
            ],axis=0)\
        .query(f"PlayerId in {player_ids}")\
        .copy()

    # pull out yards
    td_df = SimulationResults["ScoringPlays"]\
        .query("ScoreType == 0")\
        .assign(is_pass = (lambda x: (-np.isnan(x.ScoredByPassingPlayerIndex)).astype(int)))


    if prop == "receiving":
        td_df = td_df\
            .query(f"is_pass == 1")\
            .rename(columns={"ScoringPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()

    elif prop == "rushing":
        td_df = td_df\
            .query(f"is_pass == 0")\
            .rename(columns={"ScoringPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()

    elif prop == "passing":
        td_df = td_df\
            .query(f"is_pass == 1")\
            .rename(columns={"ScoredByPassingPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()

    elif prop == "rushing and receiving":
        td_df = td_df\
            .rename(columns={"ScoringPlayerIndex": "PlayerIndex"})\
            .merge(
                players_df,
                on = ["PlayerIndex", "ScoringSide"]
            )\
            .assign(td=1)\
            .groupby(["PlayerId", "SimIndex"])\
            .agg({"td":"sum"})\
            .reset_index()


    mu = [
        td_df.query(f"PlayerId=={player_ids[0]}")["td"].sum()/sims["NumberOfSimulations"],
        td_df.query(f"PlayerId=={player_ids[1]}")["td"].sum()/sims["NumberOfSimulations"]
    ]

    if all([mu[0] > 0, mu[1] > 0]):

        names = [
            players_df.query(f"PlayerId=={player_ids[0]}")["PlayerName"].values[0],
            players_df.query(f"PlayerId=={player_ids[1]}")["PlayerName"].values[0]
        ]

        # sort player ids so favorite 
        if np.max(mu) == mu[1]:
            mu = mu[::-1]
            player_ids = player_ids[::-1]
            names = names[::-1]

        # Join on self and join on all sims to ensure full representation
        td_df_index = pd.DataFrame({
            "SimIndex":range(sims["NumberOfSimulations"])
        })

        td_df_both = pd.merge(
            td_df.query(f"PlayerId=={player_ids[0]}")[["SimIndex", "td"]],
            td_df.query(f"PlayerId=={player_ids[1]}")[["SimIndex", "td"]],
            on = "SimIndex", suffixes=["_plr1", "_plr2"], how = 'outer'
        )
        
        td_df = td_df_index.merge(
            td_df_both,
            on = "SimIndex", how = "left"
        ).fillna(0).copy()

        td_df["total"] = td_df["td_plr1"] + td_df["td_plr2"]
        td_df["plr1_diff"] = td_df["td_plr1"] - td_df["td_plr2"]
        td_df["plr1_more"] = td_df.apply(lambda x: 1 if x.td_plr1 > x.td_plr2 else 0, axis = 1)
        td_df["plr2_more"] = td_df.apply(lambda x: 1 if x.td_plr2 > x.td_plr1 else 0, axis = 1)

        # TOTAL
        total = np.round(np.median(td_df["total"]) * 2)/2
        p_over = np.mean(td_df["total"] > total)
        p_under = np.mean(td_df["total"] < total)
        if (total).is_integer():
            if p_over < p_under:
                total -= 0.5
            else:
                total += 0.5

        # SPREAD
        spread = np.round( -1*np.median(td_df["plr1_diff"]) * 2)/2
        p_cover = np.mean(td_df["plr1_diff"] > -1*spread)
        p_not_cover = np.mean(td_df["plr1_diff"] < -1*spread)
        if (spread).is_integer():
            if p_cover < p_not_cover:
                spread += 0.5
            else:
                spread -= 0.5

        # MONEYLINE
        p_plr1_more = td_df.query("td_plr1 != td_plr2")["plr1_more"].mean()
        p_plr2_more = td_df.query("td_plr1 != td_plr2")["plr2_more"].mean()

        ## add on juice
        p_plr1_cover = np.mean(td_df["td_plr1"] + spread > td_df["td_plr2"])
        p_plr2_cover = np.mean(td_df["td_plr1"] + spread < td_df["td_plr2"])
        p_over = np.mean(td_df["total"] > total)
        p_under = np.mean(td_df["total"] < total)

        if all([not np.isnan(total), p_plr1_more > 0, p_plr1_more]):
            # format output
            matchup = f"{names[0]} vs {names[1]}"
            prop_name = f"{prop} touchdowns"
            out = pd.DataFrame({
                "game_id": sims["GameId"],
                "Matchup": matchup,
                "prop_name": prop_name,
                "Player1": names[0],
                "Player2": names[1],
                #"Prob Player 1 Moneyline": p_plr1_more,
                "Odds Player 1 Moneyline": 1/p_plr1_more,
                "Odds Player 2 Moneyline": 1/p_plr2_more,
                "Spread": spread,
                #"Prob Player1 Cover": p_plr1_cover,
                "Odds Player 1 Cover": 1/p_plr1_cover,
                "Odds Player 2 Cover": 1/p_plr2_cover,
                "Total": total,
                #"Prob Over": p_over,
                "Odds Over": 1/p_over,
                "Odds Under": 1/p_under
            },index=[0])

            return out

In [84]:
slate = pd.concat([
    pd.DataFrame(sims["AwayPlayers"])[["PlayerId", "PlayerName", "Position"]],            
    pd.DataFrame(sims["HomePlayers"])[["PlayerId", "PlayerName", "Position"]]
],axis=0).assign(join_ix = 1)

slate = slate.merge(slate, on= "join_ix", suffixes=["_1", "_2"])\
    .query("PlayerId_1 < PlayerId_2")

slate = pd.concat([
    slate.assign(prop = "rushing and receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "receiving").query("Position_1 in [2,4,5,7] and Position_2 in [2,4,5,7]"),
    slate.assign(prop = "rushing").query("Position_1 in [1,2,4,7] and Position_2 in [1,2,4,7]"),
    slate.assign(prop = "passing").query("Position_1 in [1] and Position_2 in [1]"),
],axis=0)

td_lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    td_lines.append(build_td_line([row["PlayerId_1"], row["PlayerId_2"]], row["prop"], sims, SimulationResults))
td_lines = pd.concat(td_lines, axis=0)

100%|██████████| 832/832 [01:39<00:00,  8.40it/s]


### Combine

In [85]:
lines = pd.concat([rec_lines, yards_lines, td_lines],axis=0).rename(columns={"prop_name":"Prop Type"})
lines.drop("game_id",axis=1).to_csv(f"../data/lines_{str(datetime.today())[0:10]}.csv", index=False)

In [87]:
lines[lines["Prop Type"] == "rushing yards"]

Unnamed: 0,game_id,Matchup,Prop Type,Player1,Player2,Odds Player 1 Moneyline,Odds Player 2 Moneyline,Spread,Odds Player 1 Cover,Odds Player 2 Cover,Total,Odds Over,Odds Under
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Jerick McKinnon vs Patrick Mahomes,rushing yards,Jerick McKinnon,Patrick Mahomes,1.845663,2.182504,-2.5,2.008435,1.991635,44.5,2.02061,1.979806
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Isiah Pacheco vs Patrick Mahomes,rushing yards,Isiah Pacheco,Patrick Mahomes,1.237229,5.215336,-27.5,2.005616,1.994416,70.5,1.986097,2.014099
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Patrick Mahomes,rushing yards,Miles Sanders,Patrick Mahomes,1.155669,7.423905,-36.5,2.013693,1.986492,80.5,2.010454,1.989654
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Jalen Hurts vs Patrick Mahomes,rushing yards,Jalen Hurts,Patrick Mahomes,1.227959,5.386746,-28.5,2.022245,1.978239,71.5,2.023882,1.976675
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Patrick Mahomes vs Kenneth Gainwell,rushing yards,Patrick Mahomes,Kenneth Gainwell,1.867166,2.153182,-1.5,1.973554,2.027164,39.5,2.024291,1.976285
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Isiah Pacheco vs Jerick McKinnon,rushing yards,Isiah Pacheco,Jerick McKinnon,1.267058,4.744508,-24.5,2.001601,1.998401,73.5,2.01005,1.99005
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Jerick McKinnon vs Kenneth Gainwell,rushing yards,Jerick McKinnon,Kenneth Gainwell,1.748383,2.336215,-4.5,2.028809,1.971998,42.5,2.027575,1.973165
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Jerick McKinnon,rushing yards,Miles Sanders,Jerick McKinnon,1.209336,5.777008,-34.5,1.995211,2.004812,82.5,1.983733,2.016536
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Isiah Pacheco,rushing yards,Miles Sanders,Isiah Pacheco,1.704449,2.41955,-9.5,1.9992,2.0008,109.5,1.993223,2.006823
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Jalen Hurts,rushing yards,Miles Sanders,Jalen Hurts,1.699813,2.428953,-9.5,2.023063,1.977457,109.5,2.001201,1.998801
