In [60]:
import pandas as pd
import numpy as np
import json 
from tqdm import tqdm

f = open("../data/CorrelationId_3b7de686-652a-4ba9-8ce3-f586a0119315_Timestamp_2023-01-30_21_38_15_817.json") # whatever the name of your sim json file is
sims = json.load(f)

print(sims.keys())
# SimulationResults has the goods in it

def to_df(x, name=None):
    """
    Simple function that converts
    different objects to a pandas
    DataFrame. (Probably bad code)
    """
    try:
        out = pd.DataFrame(x)
    except:
        try:
            out = pd.DataFrame(x, index = [0])
            if name is not None:
                out.columns = [f"{name}_{c}" for c in out.columns]
        except:
            out = pd.DataFrame({name:x},index=[0])
    return out

def parse_sim_results(sims, SimIndex):
    """
    Function to process a single simulation
    """
    # pull out simulation result
    result = sims["SimulationResults"][SimIndex]
    
    result_dict = {"SimResults": pd.DataFrame({"SimIndex":SimIndex},index=[0])}
    for key in result.keys():
        element_df = to_df(result[key], key)
        if element_df.shape[0] == 1:
            result_dict["SimResults"] = pd.concat([result_dict["SimResults"], element_df], axis = 1)
        else:
            if "SimIndex" not in element_df.columns:
                element_df["SimIndex"] = SimIndex
            result_dict[key] = element_df

    return result_dict

def process_simulation_results(sims):
    """
    Function to process all of the sims
    """
    n_sims = sims["NumberOfSimulations"]
    print(f"Parsing results for {n_sims} of simulations")

    sim_results = {}
    for i in tqdm(range(n_sims)):
        sim_i_result = parse_sim_results(sims, i)
        for key in sim_i_result.keys():
            if key in sim_results.keys():
                sim_results[key] = pd.concat([sim_results[key], sim_i_result[key]], axis=0)
            else:
                sim_results[key] = sim_i_result[key]

    return sim_results


# Process the Simulation Results
SimulationResults = process_simulation_results(sims)


dict_keys(['GameId', 'Solution', 'MarketIdentifiers', 'BatchingEnabled', 'HomePlayers', 'AwayPlayers', 'SimulationResults', 'NextPlayProbabilities', 'EventId', 'NumberOfSimulations', 'HomeTeamStats', 'AwayTeamStats', 'ModelInputs'])
Parsing results for 10000 of simulations


100%|██████████| 10000/10000 [03:32<00:00, 47.06it/s]


Proposition Types:
- 1 = passing yards
- 5 = rushing yards
- 6 = rushing att -- > not certain
- 8 = recieving yards
- 9 = Receptions
- 13 = Total Yards

## Any Player to Rush for 100 Yards

In [62]:
def any_player_to_rush_for_x_yards(SimulationResults, yards):

    plr_yards = SimulationResults["PlayerYards"]\
        .query("PropositionType==5").copy()
        
    plr_yards["over_x"] = [1 if x > yards else 0 for x in plr_yards["YardsGained"]]

    yards_sim = plr_yards.groupby("SimIndex").agg({"over_x":"sum"}).reset_index()
    yards_sim["over_x"] = [1 if x > 0 else 0 for x in yards_sim["over_x"]]

    return np.mean(yards_sim["over_x"])

any_player_to_rush_for_x_yards(SimulationResults, 100)

0.259

## Will Team X Rush for  TD?

- Home = Philly
- Away = KC

In [63]:
def teams_to_rush_for_td(SimulationResults):

    rush_tds = SimulationResults["SimResults"]\
        .assign(
            HomeRushingTouchdowns = lambda x: (
                x.HomeTouchdowns_FirstQuarter + x.HomeTouchdowns_SecondQuarter +
                x.HomeTouchdowns_ThirdQuarter + x.HomeTouchdowns_FourthQuarter + 
                x.HomeTouchdowns_Overtime - x.HomePassingTouchdowns
            ),
            AwayRushingTouchdowns = lambda x: (
                x.AwayTouchdowns_FirstQuarter + x.AwayTouchdowns_SecondQuarter +
                x.AwayTouchdowns_ThirdQuarter + x.AwayTouchdowns_FourthQuarter + 
                x.AwayTouchdowns_Overtime - x.AwayPassingTouchdowns
            )
        ).copy()
    rush_tds["HomeRushingTouchdown"] = [1 if x > 0 else 0 for x in rush_tds["HomeRushingTouchdowns"]]
    rush_tds["AwayRushingTouchdown"] = [1 if x > 0 else 0 for x in rush_tds["AwayRushingTouchdowns"]]

    return rush_tds.agg({"HomeRushingTouchdown":"mean","AwayRushingTouchdown":"mean"})

1/teams_to_rush_for_td(SimulationResults)

HomeRushingTouchdown    1.278282
AwayRushingTouchdown    1.732202
dtype: float64

## Yards Head to Head

In [136]:
def build_yards_line(player_ids, prop, sims, SimulationResults):

    prop_dict = {
        "receiving": 8,
        "rushing": 5,
        "passing": 1
    }


    assert prop in prop_dict.keys(), "prop must be in ['receiving', 'rushing', 'passing']"

    # pull out id tables
    AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
    HomePlayers = pd.DataFrame(sims["HomePlayers"])
    players_df = pd.concat([
            HomePlayers[["PlayerId", "PlayerName", "Position"]],            
            AwayPlayers[["PlayerId", "PlayerName", "Position"]]
         ],axis=0)\
        .query(f"PlayerId in {player_ids}")\
        .copy()

    # pull out yards
    yards_df = SimulationResults["PlayerYards"]\
        .query(f"""
            PlayerId in {player_ids}\
            and PropositionType == {prop_dict[prop]}
        """).copy()

    yards_df = yards_df.merge(players_df,on = "PlayerId").copy()

    # find average yards per player
    mu = [
        yards_df.query(f"PlayerId=={player_ids[0]}")["YardsGained"].mean(),
        yards_df.query(f"PlayerId=={player_ids[1]}")["YardsGained"].mean()
    ]

    if all([mu[0] > 0, mu[1] > 0]):

        names = [
            players_df.query(f"PlayerId=={player_ids[0]}")["PlayerName"].values[0],
            players_df.query(f"PlayerId=={player_ids[1]}")["PlayerName"].values[0]
        ]

        # sort player ids so favorite 
        if np.max(mu) == mu[1]:
            mu = mu[::-1]
            player_ids = player_ids[::-1]
            names = names[::-1]

        # join yards df to itself to stack players side by side
        yards_df = pd.merge(
            yards_df.query(f"PlayerId=={player_ids[0]}")[["SimIndex", "YardsGained"]],
            yards_df.query(f"PlayerId=={player_ids[1]}")[["SimIndex", "YardsGained"]],
            on = "SimIndex", suffixes=["_plr1", "_plr2"]
        ).copy()

        yards_df["total"] = yards_df["YardsGained_plr1"] + yards_df["YardsGained_plr2"]
        yards_df["plr1_diff"] = yards_df["YardsGained_plr1"] - yards_df["YardsGained_plr2"]
        yards_df["plr1_more"] = yards_df.apply(lambda x: 1 if x.YardsGained_plr1 > x.YardsGained_plr2 else 0, axis = 1)

        # TOTAL
        total = np.round(np.median(yards_df["total"]) * 2)/2
        if (total).is_integer():
            if(np.mean(yards_df["total"] < total)) < 0.5:
                total += 0.5
            else:
                total -= 0.5

        # SPREAD
        plr_1_spread = np.round( -1*np.median(yards_df["plr1_diff"]) * 2)/2
        if (plr_1_spread).is_integer():
            if(np.mean(yards_df["plr1_diff"] < -1*plr_1_spread)) < 0.5:
                plr_1_spread -= 0.5
            else:
                plr_1_spread += 0.5

        # MONEYLINE
        p_plr1_more = np.mean(yards_df["plr1_more"])

        ## add on juice
        p_plr1_cover = np.mean(yards_df["plr1_diff"] + plr_1_spread > 0)
        p_over = np.mean(np.mean(yards_df["total"] > total))

        # format output
        matchup = f"{names[0]} vs {names[1]}"
        prop_name = f"{prop} yards"
        out = pd.DataFrame({
            "game_id": sims["GameId"],
            "Matchup": matchup,
            "prop_name": prop_name,
            "Player1": names[0],
            "Player2": names[1],
            #"Prob Player 1 Moneyline": p_plr1_more,
            "Odds Player 1 Moneyline": 1/p_plr1_more,
            "Odds Player 2 Moneyline": 1/(1-p_plr1_more),
            "Spread": plr_1_spread,
            #"Prob Player1 Cover": p_plr1_cover,
            "Odds Player 1 Cover": 1/p_plr1_cover,
            "Odds Player 2 Cover": 1/(1-p_plr1_cover),
            "Total": total,
            #"Prob Over": p_over,
            "Odds Over": 1/p_over,
        },index=[0])

        return out

build_yards_line([1321, 222], "receiving", sims, SimulationResults)

Unnamed: 0,game_id,Matchup,prop_name,Player1,Player2,Odds Player 1 Moneyline,Odds Player 2 Moneyline,Spread,Odds Player 1 Cover,Odds Player 2 Cover,Total,Odds Over
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Travis Kelce vs Dallas Goedert,receiving yards,Travis Kelce,Dallas Goedert,1.363512,3.750938,-31.5,2.014099,1.986097,129.5,2.006421


In [137]:
slate = pd.concat([
    pd.DataFrame(sims["AwayPlayers"])[["PlayerId", "PlayerName", "Position"]],            
    pd.DataFrame(sims["HomePlayers"])[["PlayerId", "PlayerName", "Position"]]
],axis=0).assign(join_ix = 1)

slate = slate.merge(slate, on= "join_ix", suffixes=["_1", "_2"])\
    .query("PlayerId_1 < PlayerId_2")

slate = pd.concat([
    slate.assign(prop = "receiving").query("Position_1 in [2,5,7] and Position_2 in [2,5,7]"),
    slate.assign(prop = "rushing").query("Position_1 in [1,2,7] and Position_2 in [1,2,7]"),
    slate.assign(prop = "passing").query("Position_1 in [1] and Position_2 in [1]"),
],axis=0)

lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    lines.append(build_yards_line([row["PlayerId_1"], row["PlayerId_2"]], row["prop"], sims, SimulationResults))
lines = pd.concat(lines, axis=0)

100%|██████████| 259/259 [00:03<00:00, 80.32it/s] 


In [139]:
lines

Unnamed: 0,game_id,Matchup,prop_name,Player1,Player2,Odds Player 1 Moneyline,Odds Player 2 Moneyline,Spread,Odds Player 1 Cover,Odds Player 2 Cover,Total,Odds Over
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Travis Kelce vs Jerick McKinnon,receiving yards,Travis Kelce,Jerick McKinnon,1.13688,8.305648,-52.5,1.998801,2.001201,107.5,2.006421
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Travis Kelce vs Dallas Goedert,receiving yards,Travis Kelce,Dallas Goedert,1.363512,3.750938,-31.5,2.014099,1.986097,129.5,2.006421
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Dallas Goedert vs Jerick McKinnon,receiving yards,Dallas Goedert,Jerick McKinnon,1.383509,3.607504,-21.5,2.0008,1.9992,74.5,1.995211
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Isiah Pacheco vs Jerick McKinnon,rushing yards,Isiah Pacheco,Jerick McKinnon,1.2054,5.868545,-30.5,2.033347,1.967729,78.5,2.03666
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Jerick McKinnon,rushing yards,Miles Sanders,Jerick McKinnon,1.199904,6.002401,-35.5,2.010454,1.989654,82.5,2.011263
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Isiah Pacheco,rushing yards,Miles Sanders,Isiah Pacheco,1.885725,2.129019,-3.5,1.99005,2.01005,115.5,1.992429
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Miles Sanders vs Jalen Hurts,rushing yards,Miles Sanders,Jalen Hurts,1.614987,2.62605,-11.5,2.006421,1.99362,106.5,2.002804
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Jalen Hurts vs Jerick McKinnon,rushing yards,Jalen Hurts,Jerick McKinnon,1.318044,4.144219,-23.5,2.015316,1.984915,70.5,2.038736
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Isiah Pacheco vs Jalen Hurts,rushing yards,Isiah Pacheco,Jalen Hurts,1.740038,2.351281,-8.5,2.013288,1.986887,101.5,2.00441
0,232fa23f-9423-4a54-91d7-4cd614bf8267,Patrick Mahomes vs Jalen Hurts,passing yards,Patrick Mahomes,Jalen Hurts,1.47929,3.08642,-43.5,2.004812,1.995211,521.5,2.011263
