In [1]:
import statsmodels.api as sm
import statsmodels.formula.api as smf


from functools import partial

import altair as alt
import numpy as np
import pandas as pd
import streamlit as st
from scipy.stats import ttest_ind

In [2]:
def load_allday_data():
    df = pd.read_csv("data/current_allday_data.csv.gz")
    datecols = ["Datetime", "Date"]
    df[datecols] = df[datecols].apply(pd.to_datetime)
    return df

In [3]:
score_columns = [
    "Pass",
    "Reception",
    "Rush",
    "Strip Sack",
    "Interception",
    "Fumble Recovery",  # ~50% TD
    "Blocked Kick",  # 1/4 not td
    "Punt Return",  # all TD
    "Kick Return",  # 1/6 not td
]
td_mapping = {
    "scored_td_in_moment": "Best Guess (Moment TD)",
    "pbp_td": "Conservative (Moment TD)",
    "description_td": "Description only (Moment TD)",
    "scored_td_in_game": "Best Guess: (In-game TD)",
    "game_td": "Conservative (In-game TD)",
}

all_pos = ["All"]
offense = [
    "QB",
    "WR",
    "RB",
    "TE",
    "OL",
]
defense = [
    "DB",
    "DL",
    "LB",
]
team_pos = ["Team"]
pos_groups = ["All", "Offense", "Defense", "Team"]
positions = all_pos + offense + defense + team_pos


In [4]:
main_data = load_allday_data()

  df = pd.read_csv("data/current_allday_data.csv.gz")


In [6]:
score_data = main_data.copy()[main_data.Play_Type.isin(score_columns)].reset_index(
    drop=True
)
score_data = score_data.rename(columns=td_mapping)

In [7]:
date_range = "All Time"
play_type = "All"
how_scores = "Best Guess (Moment TD)"
position_type = "By Position"
metric = "Both"
agg_metric = "Average Sales Price ($)"

In [8]:
if date_range == "All Time":
    df = score_data.copy()
elif date_range == "2022 Full Season":
    df = score_data.copy()[main_data.Date >= "2022-09-08"]
elif date_range == "2022 Week 1":
    df = score_data.copy()[
        (score_data.Date >= "2022-09-08") & (score_data.Date < "2022-09-15")
    ]
elif date_range == "2022 Week 2":
    df = score_data.copy()[
        (score_data.Date >= "2022-09-15") & (score_data.Date < "2022-09-22")
    ]
elif date_range == "2022 Week 3":
    df = score_data.copy()[
        (score_data.Date >= "2022-09-22") & (score_data.Date < "2022-09-29")
    ]


df["Scored Touchdown?"] = df[how_scores]
if play_type != "All":
    df = df[df.Play_Type == play_type]

In [9]:
def get_position_group(x):
    if x in offense:
        return "Offense"
    if x in defense:
        return "Defense"
    if x in team_pos:
        return "Team"


df["Position Group"] = df.Position.apply(get_position_group)


In [10]:
full_model = 'Price ~ Q("Scored Touchdown?") + won_game + Play_Type + Position + Rarity'
full_model_group = 'Price ~ Q("Scored Touchdown?") + won_game + Play_Type +  Q("Position Group") + Rarity'

md = smf.mixedlm(
    full_model,
    df,
    groups=df["marketplace_id"],
)
mdf = md.fit(method=["lbfgs"])

In [11]:
summary = mdf.summary()

In [12]:
summary

0,1,2,3
Model:,MixedLM,Dependent Variable:,Price
No. Observations:,702995,Method:,REML
No. Groups:,501,Scale:,5546.5105
Min. group size:,2,Log-Likelihood:,-4030717.8955
Max. group size:,6003,Converged:,Yes
Mean group size:,1403.2,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,-296.522,831.225,-0.357,0.721,-1925.692,1332.649
"Q(""Scored Touchdown?"")[T.True]",-37.417,140.045,-0.267,0.789,-311.900,237.067
won_game[T.True],-75.258,138.632,-0.543,0.587,-346.971,196.455
Play_Type[T.Fumble Recovery],-371.334,909.073,-0.408,0.683,-2153.085,1410.417
Play_Type[T.Interception],-171.354,819.619,-0.209,0.834,-1777.779,1435.071
Play_Type[T.Kick Return],-385.433,1097.140,-0.351,0.725,-2535.788,1764.921
Play_Type[T.Pass],270.046,1003.663,0.269,0.788,-1697.096,2237.189
Play_Type[T.Punt Return],-438.271,1148.516,-0.382,0.703,-2689.322,1812.780
Play_Type[T.Reception],93.709,999.932,0.094,0.925,-1866.122,2053.539


In [9]:
# Doesnt run...

full_model_player = 'Price ~ Q("Scored Touchdown?") + won_game + Play_Type + Position + Player + Rarity'
full_model_group_player = 'Price ~ Q("Scored Touchdown?") + won_game + Play_Type +  Q("Position Group") + Player + Rarity'

md_player = smf.mixedlm(
    full_model_player,
    df,
    groups=df["marketplace_id"],
)
mdf_player = md_player.fit(method=["lbfgs"])

summary_player = mdf_player.summary()
summary_player

: 

: 