In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt

import psycopg2
import pymysql
import sqlalchemy_redshift
from configparser import ConfigParser
from sqlalchemy import create_engine

parser = ConfigParser()
_ = parser.read("../notebook.cfg")

# Connect to zack attack
za_user = parser.get("nffddev_2", "user")
za_pwd = parser.get("nffddev_2", "password")

za_engine = create_engine(
    f"mysql+pymysql://{za_user}:{za_pwd}@nffddev.numberfire.com/zack_attack",
    connect_args = dict(host='nffddev.numberfire.com', port=3306)
)
za_conn = za_engine.connect()

# connect to redshift
red_user = parser.get("redshift", "user")
red_pwd = parser.get("redshift", "password")

red_engine = create_engine(
    f"postgresql+psycopg2://{red_user}:{red_pwd}@rs1.usdfs.fdbox.net/fanduel",
    connect_args = dict(port=5439)
)
red_conn = red_engine.connect()

In [185]:
# Game
game_qry = '''
select 
g.*, 
a.abbrev as away_team_abbrev,
h.abbrev as home_team_abbrev
from nfl_game g
inner join nfl_team h on h.id = g.home_team_id
inner join nfl_team a on a.id = g.away_team_id
'''
game = pd.read_sql(game_qry, za_conn).rename(columns={"id":"game_id"})

# Receptions
rec_qry = '''
select 
game_id, team_id,
player_id, rec 
from nfl_statline_skill
'''
rec = pd.read_sql(rec_qry, za_conn)

rec

Unnamed: 0,game_id,team_id,player_id,rec
0,1,19,2,0
1,2,19,2,0
2,3,19,2,0
3,4,19,2,0
4,5,19,2,0
...,...,...,...,...
121387,8284,32,55563,0
121388,8282,30,55564,0
121389,8275,3,55566,4
121390,8282,2,55570,0


In [206]:
proj_qry = '''
select
skill.*,
p.name as player_name,
p.position,
case
    when skill.team_id = g.home_team_id then 'a'
    when skill.team_id = g.away_team_id then 'b'
end as tm
from (
    select
    player_id, team_id, game_id, rec as mean_rec, sd_rec
    from nfl_projection_skill
    union
    select
    player_id, team_id, game_id, rec as mean_rec, sd_rec
    from nfl_projection_archive_skill
) skill
inner join nfl_player p on skill.player_id = p.id
inner join nfl_game g on skill.game_id = g.id
'''
proj = pd.read_sql(proj_qry, za_conn)\

proj['rank'] = proj.groupby(["team_id", "game_id"])['mean_rec'].rank(method="first", ascending=False)
proj = proj.query("rank <= 10").merge(rec, on = ["game_id", "team_id", "player_id"])
proj["var_name"] = proj["position"] + "_" + proj["rank"].astype(int).astype(str) + "_" + proj["tm"]

proj

Unnamed: 0,player_id,team_id,game_id,mean_rec,sd_rec,player_name,position,tm,rank,rec,var_name
0,2,30,8129,0.00,0.00,Tom Brady,QB,a,9.0,0,QB_9_a
1,2,30,8142,0.00,0.00,Tom Brady,QB,b,10.0,0,QB_10_b
2,14,14,8035,0.00,0.00,Matt Ryan,QB,b,10.0,0,QB_10_b
3,14,14,8146,0.00,0.00,Matt Ryan,QB,a,10.0,0,QB_10_a
4,14,14,8177,0.00,0.00,Matt Ryan,QB,b,10.0,0,QB_10_b
...,...,...,...,...,...,...,...,...,...,...,...
47606,55169,13,7929,1.73,1.81,Davion Davis,WR,a,7.0,1,WR_7_a
47607,55175,11,7941,2.25,1.94,Shane Zylstra,TE,a,4.0,1,TE_4_a
47608,55178,9,7990,1.08,1.60,JaQuan Hardy,RB,b,9.0,0,RB_9_b
47609,55185,33,7944,0.63,1.41,Brycen Hopkins,TE,a,8.0,1,TE_8_a


In [164]:
prop_qry = '''
with nfl_prop_full as (
    select
    left(gamedate,10) gamedate,
    season,week,prop_name,position,name,
    id as player_id,team, opp,
    fd, pinny, dk, czr, mgm
    from analyst_dev.nfl_prop_data
    union
    select
    left(gamedate,10) gamedate,
    season,week,prop_name,position,name,
    id as player_id,team, opp,
    fd, pinny, NULL as dk, NULL as czr, mgm
    from analyst_dev.nfl_prop_data_archive
),
prop_filt as (
    select
    gamedate, season, week,
    player_id, name, position, team, opp,
    coalesce(fd, pinny, dk, czr, mgm) as line
    from nfl_prop_full
    where prop_name in ('rec')
),
prop_rank as (
    select *,
    row_number() over (partition by season, week, team order by line) rank
    from prop_filt
    where line is not null
)
select * from prop_rank
'''

prop = pd.read_sql(prop_qry, red_conn)

# get game ids for prop
nfl_prop_game = pd.concat([
    game.rename(columns={"home_team_abbrev":"team", "away_team_abbrev":"opp", "home_team_id":"team_id"}).assign(tm='a'),
    game.rename(columns={"away_team_abbrev":"team", "home_team_abbrev":"opp", "away_team_id":"team_id"}).assign(tm='b')
],axis=0)[["season", "week", "game_id", "team_id", "team", "opp", 'tm']]

prop = prop.merge(
    nfl_prop_game,
    on = ["season", "week", "team", "opp"]
)
prop["var_name"] = prop["position"] + "_" + prop["rank"].astype(str) + "_" + prop["tm"]

prop = prop.merge(rec, on = ["game_id", "player_id", "team_id"], how = "left").fillna(0)

In [289]:
corr_mat = proj\
    .query(f"game_id != 8297")\
    .pivot(index = 'game_id', columns="var_name", values = "mean_rec")\
    .fillna(0).corr()

In [313]:
game_proj = proj.query('game_id == 8296').copy()
game_proj

Unnamed: 0,player_id,team_id,game_id,mean_rec,sd_rec,player_name,position,tm,rank,rec,var_name
930,53425,27,8296,4.74,2.39,Christian McCaffrey,RB,b,1.0,4,RB_1_b
1224,53461,27,8296,4.32,2.33,George Kittle,TE,b,3.0,3,TE_3_b
1638,53795,24,8296,4.23,2.31,Dallas Goedert,TE,a,3.0,5,TE_3_a
1685,53819,24,8296,0.24,1.2,Boston Scott,RB,a,7.0,0,RB_7_a
1764,53868,24,8296,0.19,1.17,Zach Pascal,WR,a,8.0,0,WR_8_a
2287,54154,24,8296,1.61,1.78,Miles Sanders,RB,a,6.0,1,RB_6_a
2303,54158,27,8296,4.47,2.35,Deebo Samuel,WR,b,2.0,3,WR_2_b
2340,54161,24,8296,5.35,2.46,A.J. Brown,WR,a,2.0,4,WR_2_a
2465,54214,24,8296,0.0,0.0,Gardner Minshew,QB,a,9.0,0,QB_9_a
2985,54555,27,8296,3.95,2.27,Brandon Aiyuk,WR,b,4.0,1,WR_4_b


In [291]:
def nearestPD(A):
    """Find the nearest positive-definite matrix to input
    A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
    credits [2].
    [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd
    [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
    matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
    """
    B = (A + A.T) / 2
    _, s, V = la.svd(B)
    H = np.dot(V.T, np.dot(np.diag(s), V))
    A2 = (B + H) / 2
    A3 = (A2 + A2.T) / 2
    if isPD(A3):
        return A3
    spacing = np.spacing(la.norm(A))
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1
    return A3

In [343]:
from numpy import linalg as la
from scipy.stats import norm, poisson, gamma, rv_discrete


def build_reception_line(plrs, game_proj):

    var = np.array([
        game_proj.query(f"player_name=='{plrs[0]}'")["var_name"].values[0],
        game_proj.query(f"player_name=='{plrs[1]}'")["var_name"].values[0]
    ])
    mu = np.array([
        game_proj.query(f"player_name=='{plrs[0]}'")["mean_rec"].values[0],
        game_proj.query(f"player_name=='{plrs[1]}'")["mean_rec"].values[0]
    ])
    std = np.array([
        game_proj.query(f"player_name=='{plrs[0]}'")["sd_rec"].values[0],
        game_proj.query(f"player_name=='{plrs[1]}'")["sd_rec"].values[0]
    ])

    if np.max(mu) == mu[1]:
        var = var[::-1]
        mu = mu[::-1]
        std = std[::-1]
        plrs = plrs[::-1]

    c = nearestPD(corr_mat.loc[var, var])
    L = np.linalg.cholesky(c)

    n_vars = len(var)
    s_samples = 10_000
    uncorrelated = np.random.standard_normal((n_vars, s_samples))
    correlated = np.dot(L, uncorrelated)

    plr_rec_sims = []
    for i in range(n_vars):
        var = correlated[i]
        z = (var - np.mean(var))/np.std(var)
        cdf = norm.cdf(z)
        plr_rec_sims.append(poisson(mu[i]).ppf(cdf))
        # shape = ( float(mu[i]) / float(std[i]) ) ** 2
        # scale = ( float(std[i]) ** 2) / float(mu[i])
        #plr_rec_sims.append(np.floor(gamma(a = shape, scale = scale).ppf(q=cdf)))

    sims = pd.DataFrame(plr_rec_sims).T.rename(columns={0:"plr_1",1:"plr_2"})
    sims["total"] = sims["plr_1"] +  sims["plr_2"]
    sims["plr_1_diff"] = sims["plr_1"] - sims["plr_2"]
    sims["plr_1_more"] = (sims["plr_1"] > sims["plr_2"]).astype(int)
    # TOTAL
    total = np.round(np.median(sims["total"]) * 2)/2
    if (total).is_integer():
        if(np.mean(sims["total"] < total)) < 0.5:
            total += 0.5
        else:
            total -= 0.5

    # SPREAD
    plr_1_spread = np.round( -1*np.median(sims["plr_1_diff"]) * 2)/2
    if (plr_1_spread).is_integer():
        if(np.mean(sims["plr_1_diff"] < -1*plr_1_spread)) < 0.5:
            plr_1_spread -= 0.5
        else:
            plr_1_spread += 0.5

    # MONEYLINE
    p_plr1_more = np.mean(sims["plr_1_more"])

    ## add on juice
    p_plr1_cover = np.mean(sims["plr_1_diff"] + plr_1_spread > 0)
    p_over = np.mean(np.mean(sims["total"] > total))

    return {
        "Player1": plrs[0],
        "Player2": plrs[1],
        "Player1 Prob": p_plr1_more,
        "Spread": plr_1_spread,
        "Prob Player1 Cover": p_plr1_cover,
        "Total": total,
        "Prob Over": p_over
    }


plrs = ["DeVonta Smith", "Christian McCaffrey"]
build_reception_line(plrs, game_proj)

{'Player1': 'DeVonta Smith',
 'Player2': 'Christian McCaffrey',
 'Player1 Prob': 0.6015,
 'Spread': -1.5,
 'Prob Player1 Cover': 0.4799,
 'Total': 11.5,
 'Prob Over': 0.4}