In [34]:
import pandas as pd
import numpy as np
import json 
from tqdm import tqdm

f = open("../data/CorrelationId_3b7de686-652a-4ba9-8ce3-f586a0119315_Timestamp_2023-01-30_21_38_15_817.json") # whatever the name of your sim json file is
sims = json.load(f)

print(sims.keys())
# SimulationResults has the goods in it

def to_df(x, name=None):
    """
    Simple function that converts
    different objects to a pandas
    DataFrame. (Probably bad code)
    """
    try:
        out = pd.DataFrame(x)
    except:
        try:
            out = pd.DataFrame(x, index = [0])
            if name is not None:
                out.columns = [f"{name}_{c}" for c in out.columns]
        except:
            out = pd.DataFrame({name:x},index=[0])
    return out

def parse_sim_results(sims, SimIndex):
    """
    Function to process a single simulation
    """
    # pull out simulation result
    result = sims["SimulationResults"][SimIndex]
    
    result_dict = {"SimResults": pd.DataFrame({"SimIndex":SimIndex},index=[0])}
    for key in result.keys():
        element_df = to_df(result[key], key)
        if element_df.shape[0] == 1:
            result_dict["SimResults"] = pd.concat([result_dict["SimResults"], element_df], axis = 1)
        else:
            if "SimIndex" not in element_df.columns:
                element_df["SimIndex"] = SimIndex
            result_dict[key] = element_df

    return result_dict

def process_simulation_results(sims):
    """
    Function to process all of the sims
    """
    n_sims = sims["NumberOfSimulations"]
    print(f"Parsing results for {n_sims} of simulations")

    sim_results = {}
    for i in tqdm(range(n_sims)):
        sim_i_result = parse_sim_results(sims, i)
        for key in sim_i_result.keys():
            if key in sim_results.keys():
                sim_results[key] = pd.concat([sim_results[key], sim_i_result[key]], axis=0)
            else:
                sim_results[key] = sim_i_result[key]

    return sim_results


# Process the Simulation Results
SimulationResults = process_simulation_results(sims)


dict_keys(['GameId', 'Solution', 'MarketIdentifiers', 'BatchingEnabled', 'HomePlayers', 'AwayPlayers', 'SimulationResults', 'NextPlayProbabilities', 'EventId', 'NumberOfSimulations', 'HomeTeamStats', 'AwayTeamStats', 'ModelInputs'])
Parsing results for 10000 of simulations


100%|██████████| 10000/10000 [03:28<00:00, 47.93it/s]


In [35]:
sims["EventId"]

10223102

In [36]:
SimulationResults.keys()

dict_keys(['SimResults', 'ScoringPlays', 'PlayerYards', 'HomePlayerReceptions', 'AwayPlayerReceptions'])

In [37]:
# pull out id tables
AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
HomePlayers = pd.DataFrame(sims["HomePlayers"])

Proposition Types:
- 1 = passing yards
- 5 = rushing yards
- 6 = rushing att -- > not certain
- 8 = recieving yards
- 9 = Receptions
- 13 = Total Yards

## Any Player to Rush for 100 Yards

In [43]:
def any_player_to_rush_for_x_yards(SimulationResults, yards):

    plr_yards = SimulationResults["PlayerYards"]\
        .query("PropositionType==5").copy()
        
    plr_yards["over_x"] = [1 if x > yards else 0 for x in plr_yards["YardsGained"]]

    yards_sim = plr_yards.groupby("SimIndex").agg({"over_x":"sum"}).reset_index()
    yards_sim["over_x"] = [1 if x > 0 else 0 for x in yards_sim["over_x"]]

    return np.mean(yards_sim["over_x"])

any_player_to_rush_for_x_yards(SimulationResults, 100)

0.259

## Will Team X Rush for  TD?

- Home = Philly
- Away = KC

In [44]:
def teams_to_rush_for_td(SimulationResults):

    rush_tds = SimulationResults["SimResults"]\
        .assign(
            HomeRushingTouchdowns = lambda x: (
                x.HomeTouchdowns_FirstQuarter + x.HomeTouchdowns_SecondQuarter +
                x.HomeTouchdowns_ThirdQuarter + x.HomeTouchdowns_FourthQuarter + 
                x.HomeTouchdowns_Overtime - x.HomePassingTouchdowns
            ),
            AwayRushingTouchdowns = lambda x: (
                x.AwayTouchdowns_FirstQuarter + x.AwayTouchdowns_SecondQuarter +
                x.AwayTouchdowns_ThirdQuarter + x.AwayTouchdowns_FourthQuarter + 
                x.AwayTouchdowns_Overtime - x.AwayPassingTouchdowns
            )
        ).copy()
    rush_tds["HomeRushingTouchdown"] = [1 if x > 0 else 0 for x in rush_tds["HomeRushingTouchdowns"]]
    rush_tds["AwayRushingTouchdown"] = [1 if x > 0 else 0 for x in rush_tds["AwayRushingTouchdowns"]]

    return rush_tds.agg({"HomeRushingTouchdown":"mean","AwayRushingTouchdown":"mean"})

1/teams_to_rush_for_td(SimulationResults)

HomeRushingTouchdown    1.278282
AwayRushingTouchdown    1.732202
dtype: float64

## Yards Head to Head

In [46]:
def head_to_head_yards(sims, SimulationResults, plr1, plr2, prop):

    prop_dict = {
        "receiving": 8,
        "rushing": 5,
        "passing": 1
    }


    assert prop in prop_dict.keys(), "prop must be in ['receiving', 'rushing', 'passing']"

    # pull out id tables
    AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
    HomePlayers = pd.DataFrame(sims["HomePlayers"])

    yards_df = pd.concat([
        HomePlayers[["PlayerId", "PlayerName", "Position"]]\
            .merge(
                SimulationResults["PlayerYards"],
                on = "PlayerId"
            ),
        AwayPlayers[["PlayerId", "PlayerName", "Position"]]\
            .merge(
                SimulationResults["PlayerYards"],
                on = "PlayerId"
            )
    ],axis=0)\
        .query(f"PropositionType == {prop_dict[prop]}")

    yards_df = pd.merge(
        yards_df.query(f"PlayerName=='{plr1}'")[["SimIndex", "YardsGained"]],
        yards_df.query(f"PlayerName=='{plr2}'")[["SimIndex", "YardsGained"]],
        on = "SimIndex", suffixes=["_plr1", "_plr2"]
    )

    yards_df["total"] = yards_df["YardsGained_plr1"] + yards_df["YardsGained_plr2"]
    yards_df["plr1_diff"] = yards_df["YardsGained_plr1"] - yards_df["YardsGained_plr2"]
    yards_df["plr1_more"] = yards_df.apply(lambda x: 1 if x.YardsGained_plr1 > x.YardsGained_plr2 else 0, axis = 1)

    # TOTAL
    total = np.round(np.median(yards_df["total"]) * 2)/2
    if (total).is_integer():
        if(np.mean(yards_df["total"] < total)) < 0.5:
            total += 0.5
        else:
            total -= 0.5

    # SPREAD
    plr_1_spread = np.round( -1*np.median(yards_df["plr1_diff"]) * 2)/2
    if (plr_1_spread).is_integer():
        if(np.mean(yards_df["plr1_diff"] < -1*plr_1_spread)) < 0.5:
            plr_1_spread -= 0.5
        else:
            plr_1_spread += 0.5

    # MONEYLINE
    p_plr1_more = np.mean(yards_df["plr1_more"])

    ## add on juice
    p_plr1_cover = np.mean(yards_df["plr1_diff"] + plr_1_spread > 0)
    p_over = np.mean(np.mean(yards_df["total"] > total))

    return {
        "Player1": plr1,
        "Player2": plr2,
        "Player1 Prob": p_plr1_more,
        "Spread": plr_1_spread,
        "Prob Player1 Cover": p_plr1_cover,
        "Total": total,
        "Prob Over": p_over
    }


plr1 = "Miles Sanders"
plr2 = "Christian Pocheco"
head_to_head_yards(sims, SimulationResults, plr1, plr2, "receiving")


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


{'Player1': 'Miles Sanders',
 'Player2': 'Christian Pocheco',
 'Player1 Prob': nan,
 'Spread': nan,
 'Prob Player1 Cover': nan,
 'Total': nan,
 'Prob Over': nan}

In [31]:
prop_dict = {
    "receiving": 8,
    "rushing": 5,
    "passing": 1
}

prop = 'rushing'
assert prop in prop_dict.keys(), "prop must be in ['receiving', 'rushing', 'passing']"

# pull out id tables
AwayPlayers = pd.DataFrame(sims["AwayPlayers"])
HomePlayers = pd.DataFrame(sims["HomePlayers"])

yards_df = pd.concat([
    HomePlayers[["PlayerId", "PlayerName", "Position"]]\
        .merge(
            SimulationResults["PlayerYards"],
            on = "PlayerId"
        ),
    AwayPlayers[["PlayerId", "PlayerName", "Position"]]\
        .merge(
            SimulationResults["PlayerYards"],
            on = "PlayerId"
        )
],axis=0)\
    .query(f"PropositionType == {prop_dict[prop]}")

yards_df = pd.merge(
    yards_df.query(f"PlayerName=='{plr1}'")[["SimIndex", "YardsGained"]],
    yards_df.query(f"PlayerName=='{plr2}'")[["SimIndex", "YardsGained"]],
    on = "SimIndex", suffixes=["_plr1", "_plr2"]
)

yards_df["total"] = yards_df["YardsGained_plr1"] + yards_df["YardsGained_plr2"]
yards_df["plr1_diff"] = yards_df["YardsGained_plr1"] - yards_df["YardsGained_plr2"]
yards_df["plr1_more"] = yards_df.apply(lambda x: 1 if x.YardsGained_plr1 > x.YardsGained_plr2 else 0, axis = 1)






-20.5