In [352]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt

import psycopg2
import pymysql
import sqlalchemy_redshift
from configparser import ConfigParser
from sqlalchemy import create_engine

parser = ConfigParser()
_ = parser.read("../notebook.cfg")

# Connect to zack attack
za_user = parser.get("nffddev_2", "user")
za_pwd = parser.get("nffddev_2", "password")

za_engine = create_engine(
    f"mysql+pymysql://{za_user}:{za_pwd}@nffddev.numberfire.com/zack_attack",
    connect_args = dict(host='nffddev.numberfire.com', port=3306)
)
za_conn = za_engine.connect()

# connect to redshift
red_user = parser.get("redshift", "user")
red_pwd = parser.get("redshift", "password")

red_engine = create_engine(
    f"postgresql+psycopg2://{red_user}:{red_pwd}@rs1.usdfs.fdbox.net/fanduel",
    connect_args = dict(port=5439)
)
red_conn = red_engine.connect()

Exception during reset or similar
Traceback (most recent call last):
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 763, in _finalize_fairy
    fairy._reset(pool, transaction_was_reset)
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 1038, in _reset
    pool._dialect.do_rollback(self)
  File "/Users/mason.yahr/opt/anaconda3/envs/ds_env/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 683, in do_rollback
    dbapi_connection.rollback()
psycopg2.errors.AdminShutdown: terminating connection due to session timeout
SSL connection has been closed unexpectedly



In [353]:
# Game
game_qry = '''
select 
g.*, 
a.abbrev as away_team_abbrev,
h.abbrev as home_team_abbrev
from nfl_game g
inner join nfl_team h on h.id = g.home_team_id
inner join nfl_team a on a.id = g.away_team_id
'''
game = pd.read_sql(game_qry, za_conn).rename(columns={"id":"game_id"})

# Receptions
rec_qry = '''
select 
game_id, team_id,
player_id, rec 
from nfl_statline_skill
'''
rec = pd.read_sql(rec_qry, za_conn)

Unnamed: 0,game_id,team_id,player_id,rec
0,1,19,2,0
1,2,19,2,0
2,3,19,2,0
3,4,19,2,0
4,5,19,2,0
...,...,...,...,...
121387,8284,32,55563,0
121388,8282,30,55564,0
121389,8275,3,55566,4
121390,8282,2,55570,0


In [443]:
proj_qry = '''
select
h.name as home_name,
a.name as away_name,
skill.*,
p.name as player_name,
p.position,
case
    when skill.team_id = g.home_team_id then 'a'
    when skill.team_id = g.away_team_id then 'b'
end as tm
from (
    select
    player_id, team_id, game_id, rec as mean_rec, sd_rec
    from nfl_projection_skill
    union
    select
    player_id, team_id, game_id, rec as mean_rec, sd_rec
    from nfl_projection_archive_skill
) skill
inner join nfl_player p on skill.player_id = p.id
inner join nfl_game g on skill.game_id = g.id
inner join nfl_team h on h.id = g.home_team_id
inner join nfl_team a on a.id = g.away_team_id
where p.position in ('TE', 'WR', 'RB')
'''
proj = pd.read_sql(proj_qry, za_conn)\

proj['rank'] = proj.groupby(["team_id", "game_id"])['mean_rec'].rank(method="first", ascending=False)
proj = proj.query("rank <= 10").merge(rec, on = ["game_id", "team_id", "player_id"], how = "left")
proj["var_name"] = proj["position"] + "_" + proj["rank"].astype(int).astype(str) + "_" + proj["tm"]

In [380]:
prop_qry = '''
with nfl_prop_full as (
    select
    left(gamedate,10) gamedate,
    season,week,prop_name,position,name,
    id as player_id,team, opp,
    fd, pinny, dk, czr, mgm
    from analyst_dev.nfl_prop_data
    union
    select
    left(gamedate,10) gamedate,
    season,week,prop_name,position,name,
    id as player_id,team, opp,
    fd, pinny, NULL as dk, NULL as czr, mgm
    from analyst_dev.nfl_prop_data_archive
),
prop_filt as (
    select
    gamedate, season, week,
    player_id, name, position, team, opp,
    coalesce(fd, pinny, dk, czr, mgm) as line
    from nfl_prop_full
    where prop_name in ('rec')
),
prop_rank as (
    select *,
    row_number() over (partition by season, week, team order by line) rank
    from prop_filt
    where line is not null
    and position in ('TE', 'WR', 'RB')
)
select * from prop_rank
'''

prop = pd.read_sql(prop_qry, red_conn)

# get game ids for prop
nfl_prop_game = pd.concat([
    game.rename(columns={"home_team_abbrev":"team", "away_team_abbrev":"opp", "home_team_id":"team_id"}).assign(tm='a'),
    game.rename(columns={"away_team_abbrev":"team", "home_team_abbrev":"opp", "away_team_id":"team_id"}).assign(tm='b')
],axis=0)[["season", "week", "game_id", "team_id", "team", "opp", 'tm']]

prop = prop.merge(
    nfl_prop_game,
    on = ["season", "week", "team", "opp"]
)
prop["var_name"] = prop["position"] + "_" + prop["rank"].astype(str) + "_" + prop["tm"]

prop = prop.merge(rec, on = ["game_id", "player_id", "team_id"], how = "left").fillna(0)

In [447]:
# using NF for projections
corr_mat = proj.dropna()\
    .query(f"game_id != 8297")\
    .pivot(index = 'game_id', columns="var_name", values = "mean_rec")\
    .fillna(0).corr()

In [383]:
def nearestPD(A):
    """Find the nearest positive-definite matrix to input
    A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
    credits [2].
    [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd
    [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
    matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
    """
    B = (A + A.T) / 2
    _, s, V = la.svd(B)
    H = np.dot(V.T, np.dot(np.diag(s), V))
    A2 = (B + H) / 2
    A3 = (A2 + A2.T) / 2
    if isPD(A3):
        return A3
    spacing = np.spacing(la.norm(A))
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1
    return A3

In [457]:
from numpy import linalg as la
from scipy.stats import norm, poisson, gamma, rv_discrete


def build_reception_line(player_ids, game_id, proj):

    game_proj = proj.query(f"game_id=={game_id}")

    var = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["var_name"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["var_name"].values[0]
    ])
    mu = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["mean_rec"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["mean_rec"].values[0]
    ])
    std = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["sd_rec"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["sd_rec"].values[0]
    ])
    names = np.array([
        game_proj.query(f"player_id=={player_ids[0]}")["player_name"].values[0],
        game_proj.query(f"player_id=={player_ids[1]}")["player_name"].values[0]
    ])

    # sort the list so player 1 is always the favorite
    if np.max(mu) == mu[1]:
        var = var[::-1]
        mu = mu[::-1]
        std = std[::-1]
        player_ids = player_ids[::-1]
        names = names[::-1]

    c = nearestPD(corr_mat.loc[var, var])
    L = np.linalg.cholesky(c)

    n_vars = len(var)
    s_samples = 10_000
    uncorrelated = np.random.standard_normal((n_vars, s_samples))
    correlated = np.dot(L, uncorrelated)

    plr_rec_sims = []
    for i in range(n_vars):
        var = correlated[i]
        z = (var - np.mean(var))/np.std(var)
        cdf = norm.cdf(z)
        plr_rec_sims.append(poisson(mu[i]).ppf(cdf))
        # shape = ( float(mu[i]) / float(std[i]) ) ** 2
        # scale = ( float(std[i]) ** 2) / float(mu[i])
        #plr_rec_sims.append(np.floor(gamma(a = shape, scale = scale).ppf(q=cdf)))

    sims = pd.DataFrame(plr_rec_sims).T.rename(columns={0:"plr_1",1:"plr_2"})
    sims["total"] = sims["plr_1"] +  sims["plr_2"]
    sims["plr_1_diff"] = sims["plr_1"] - sims["plr_2"]
    sims["plr_1_more"] = (sims["plr_1"] > sims["plr_2"]).astype(int)
    # TOTAL
    total = np.round(np.median(sims["total"]) * 2)/2
    if (total).is_integer():
        if(np.mean(sims["total"] < total)) < 0.5:
            total += 0.5
        else:
            total -= 0.5

    # SPREAD
    plr_1_spread = np.round( -1*np.median(sims["plr_1_diff"]) * 2)/2
    if (plr_1_spread).is_integer():
        if(np.mean(sims["plr_1_diff"] < -1*plr_1_spread)) < 0.5:
            plr_1_spread -= 0.5
        else:
            plr_1_spread += 0.5

    # MONEYLINE
    p_plr1_more = np.mean(sims["plr_1_more"])

    ## add on juice
    p_plr1_cover = np.mean(sims["plr_1_diff"] + plr_1_spread > 0)
    p_over = np.mean(np.mean(sims["total"] > total))

    matchup = f"{names[0]} vs {names[1]}"
    prop_name = 'rec'
    out = pd.DataFrame({
        "game_id": game_id,
        "Matchup": matchup,
        "prop_name": prop_name,
        "Player1": names[0],
        "Player2": names[1],
        #"Prob Player 1 Moneyline": p_plr1_more,
        "Odds Player 1 Moneyline": 1/p_plr1_more,
        "Odds Player 2 Moneyline": 1/(1-p_plr1_more),
        "Spread": plr_1_spread,
        #"Prob Player1 Cover": p_plr1_cover,
        "Odds Player 1 Cover": 1/p_plr1_cover,
        "Odds Player 2 Cover": 1/(1-p_plr1_cover),
        "Total": total,
        #"Prob Over": p_over,
        "Odds Over": 1/p_over,
    },index=[0])

    return out

build_reception_line([53461, 53425], 8296, proj)

Unnamed: 0,game_id,Matchup,prop_name,Player1,Player2,Odds Player 1 Moneyline,Odds Player 2 Moneyline,Spread,Odds Player 1 Cover,Odds Player 2 Cover,Total,Odds Over
0,8296,Christian McCaffrey vs George Kittle,rec,Christian McCaffrey,George Kittle,2.05381,1.948938,-0.5,2.05381,1.948938,9.5,2.367424


In [458]:
slate = proj[["game_id", "player_id", "player_name", "mean_rec"]].copy()
slate = slate\
    .merge(slate, on = 'game_id', suffixes=["_1", "_2"])\
    .query("player_id_1 != player_id_2")\
    .query("mean_rec_1 >= mean_rec_2")\
    [["game_id", "player_id_1", "player_id_2"]]

slate = slate.query("game_id == 8297")

lines = []
for i in tqdm(range(slate.shape[0])):
    row = slate.iloc[i,:].copy()
    lines.append(build_reception_line([row["player_id_1"], row["player_id_2"]], row["game_id"], proj))
lines = pd.concat(lines, axis=0)


 11%|█         | 21/190 [00:03<00:26,  6.29it/s]

In [456]:
game_proj

Unnamed: 0,home_name,away_name,player_id,team_id,game_id,mean_rec,sd_rec,player_name,position,tm,rank,rec,var_name
23667,Philadelphia Eagles,Kansas City Chiefs,52104,16,8297,7.06,2.65,Travis Kelce,TE,b,1.0,,TE_1_b
29512,Philadelphia Eagles,Kansas City Chiefs,52443,16,8297,3.64,2.22,Jerick McKinnon,RB,b,2.0,,RB_2_b
42210,Philadelphia Eagles,Kansas City Chiefs,53422,16,8297,1.81,1.83,JuJu Smith-Schuster,WR,b,5.0,,WR_5_b
47823,Philadelphia Eagles,Kansas City Chiefs,53795,24,8297,4.67,2.38,Dallas Goedert,TE,a,3.0,,TE_3_a
48182,Philadelphia Eagles,Kansas City Chiefs,53819,24,8297,0.24,1.2,Boston Scott,RB,a,9.0,,RB_9_a
48882,Philadelphia Eagles,Kansas City Chiefs,53868,24,8297,0.38,1.28,Zach Pascal,WR,a,8.0,,WR_8_a
49252,Philadelphia Eagles,Kansas City Chiefs,53904,16,8297,3.45,2.19,Marquez Valdes-Scantling,WR,b,3.0,,WR_3_b
49987,Philadelphia Eagles,Kansas City Chiefs,53976,16,8297,0.44,1.32,Justin Watson,WR,b,10.0,,WR_10_b
51718,Philadelphia Eagles,Kansas City Chiefs,54140,16,8297,0.98,1.56,Mecole Hardman,WR,b,8.0,,WR_8_b
52094,Philadelphia Eagles,Kansas City Chiefs,54154,24,8297,0.7,1.44,Miles Sanders,RB,a,6.0,,RB_6_a


In [454]:
lines.query("Player1=='Dallas Goedert'")

Unnamed: 0,game_id,Matchup,prop_name,Player1,Player2,Odds Player 1 Moneyline,Odds Player 2 Moneyline,Spread,Odds Player 1 Cover,Odds Player 2 Cover,Total,Odds Over
0,8297,Dallas Goedert vs Jerick McKinnon,rec,Dallas Goedert,Jerick McKinnon,1.748863,2.335357,-1.5,2.314815,1.760563,8.5,2.237136
0,8297,Dallas Goedert vs JuJu Smith-Schuster,rec,Dallas Goedert,JuJu Smith-Schuster,1.209629,5.77034,-3.5,2.578649,1.633453,6.5,2.107926
0,8297,Dallas Goedert vs Boston Scott,rec,Dallas Goedert,Boston Scott,1.022809,44.843049,-4.5,2.178649,1.848429,5.5,2.692515
0,8297,Dallas Goedert vs Zach Pascal,rec,Dallas Goedert,Zach Pascal,1.029654,34.722222,-4.5,2.298322,1.770225,5.5,2.554278
0,8297,Dallas Goedert vs Marquez Valdes-Scantling,rec,Dallas Goedert,Marquez Valdes-Scantling,1.661958,2.51067,-1.5,2.177226,1.849454,8.5,2.378687
0,8297,Dallas Goedert vs Justin Watson,rec,Dallas Goedert,Justin Watson,1.035304,29.325513,-4.5,2.352388,1.739433,5.5,2.478929
0,8297,Dallas Goedert vs Mecole Hardman,rec,Dallas Goedert,Mecole Hardman,1.085423,12.70648,-4.5,2.893519,1.528117,5.5,2.009646
0,8297,Dallas Goedert vs Miles Sanders,rec,Dallas Goedert,Miles Sanders,1.052742,19.96008,-4.5,2.568713,1.637465,5.5,2.235136
0,8297,Dallas Goedert vs Quez Watkins,rec,Dallas Goedert,Quez Watkins,1.16523,7.052186,-3.5,2.427774,1.700391,6.5,2.288853
0,8297,Dallas Goedert vs Kadarius Toney,rec,Dallas Goedert,Kadarius Toney,1.086012,12.626263,-4.5,2.794077,1.55739,6.5,2.977963
