In [1]:
import numpy as np
import tls_client
import pandas as pd
import webbrowser
import requests
import re
import plotly.express as px
import seaborn as sns
from utils import merge_books, pp_delta, ud_delta

pd.set_option('display.max_columns', None)

In [2]:
# -- read in scraped data from RotoWorld

pp = pd.read_csv(r"C:\Users\mikej\Downloads\rw-prizepicks-predictions-2024-01-24.csv")         #prize picks lines
ud = pd.read_csv(r"C:\Users\mikej\Downloads\rw-underdog-predictions-2024-01-24.csv")           #underdog lines

spreads = pd.read_excel(r"C:\Users\mikej\Downloads\nfl-odds-rotowire (5).xlsx",header=1)           #game spreads and totals
spreads = (spreads
           [['Team','Spread','Over-Under']]
           .rename(columns = {'Team':'team','Spread':'spread','Over-Under':'over_under'}))

books_merged = merge_books(pp,ud)
books_merged.to_csv('data/books_merged.csv',index=False)
books_merged.sample(3)

Unnamed: 0,player_display_name,position,team,opponent,book_stat,pp_line,ud_line
11,Mecole Hardman,WR,KC,@BAL,Receiving Yards,10.5,9.5
163,Sam LaPorta,TE,DET,@SF,Receptions,,5.0
47,Travis Kelce,TE,KC,@BAL,Fantasy Score,14.5,


In [3]:
# ------- REQUEST DATA FROM NFL API -------

import nfl_data_py as nfl

years = [2018,2019,2020,2021,2022,2023]
nfl_df = nfl.import_weekly_data(years,downcast=True)

Downcasting floats.


In [4]:
##  MERGE SPORTSBOOKS AND NFL PLAYER DATA

books_merged = pd.read_csv(r"C:\Users\mikej\Desktop\mean-median\data\books_merged.csv")
all_merged = books_merged.merge(nfl_df, how='left', on='player_display_name')
print(f"{all_merged.player_display_name.nunique()} players with available spreads")

53 players with available spreads


In [5]:
###  -------------- TO RERUN START HERE!!!! ---------

# add this weeks opponent
df = all_merged.copy().reset_index(drop=True)
df['opponent_team'].fillna('None',inplace=True)

df_filtered_by_stat = df[(df.book_stat == 'Rushing Yards') | (df.book_stat == 'Passing Yards') | (df.book_stat == 'Receiving Yards')]

filtered_by_needed_columns = df_filtered_by_stat[['player_display_name', 'position_x', 'team', 'opponent',
       'book_stat', 'pp_line','ud_line', 'player_id', 'player_name','season', 'week','completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions', 'sacks','carries', 'rushing_yards', 'rushing_tds','receptions', 'targets', 'receiving_yards',
       'receiving_tds','receiving_air_yards','target_share','wopr',
       'fantasy_points','opponent_team']].rename(columns={'position_x':'position'}).reset_index(drop=True)

#    cleanup wording to match nfl data
stats_dict = {'Rushing Yards':'rushing_yards', 'Passing Yards':'passing_yards','Receiving Yards':'receiving_yards'}
filtered_by_needed_columns['book_stat'] = filtered_by_needed_columns['book_stat'].map(stats_dict)
df = filtered_by_needed_columns.copy()

# --- add 'delta' columns using functions from 'utils' module
df['pp_delta'] = df.apply(pp_delta, axis=1)
df['ud_delta'] = df.apply(ud_delta, axis=1)

# add long team name
team_full = ['Cardinals','Falcons','Ravens','Bills','Panthers','Bears','Bengals','Browns','Cowboys','Broncos','Lions','Packers','Texans','Colts','Jaguars','Chiefs','Raiders','Chargers','Rams','Dolphins','Vikings','Patriots','Saints','Giants','Jets','Eagles','Steelers','49ers','Seahawks','Buccaneers','Titans','Commanders']
team_abr = ['ARI','ATL','BAL','BUF','CAR','CHI','CIN','CLE','DAL','DEN','DET','GB','HOU','IND','JAX','KC','LV','LAC','LAR','MIA','MIN','NE','NO','NYG','NYJ','PHI','PIT','SF','SEA','TB','TEN','WAS']
zipped = list(zip(team_abr,team_full))
team_dict = dict(zipped)
df['team_long'] = df['team'].map(team_dict)

# add game spreads and totals
zipped = list(zip(spreads.team,spreads.spread))
spread_dict = dict(zipped)
df['spread'] = df['team_long'].map(spread_dict)

zipped = list(zip(spreads.team,spreads.over_under))
ou_dict = dict(zipped)
df['over_under'] = df['team_long'].map(ou_dict)

print(f"DataFrame shape: {df.shape}")
print("")
df.sample(3)

DataFrame shape: (2361, 34)



Unnamed: 0,player_display_name,position,team,opponent,book_stat,pp_line,ud_line,player_id,player_name,season,week,completions,attempts,passing_yards,passing_tds,interceptions,sacks,carries,rushing_yards,rushing_tds,receptions,targets,receiving_yards,receiving_tds,receiving_air_yards,target_share,wopr,fantasy_points,opponent_team,pp_delta,ud_delta,team_long,spread,over_under
976,Marquez Valdes-Scantling,WR,KC,@BAL,receiving_yards,14.5,14.5,00-0034272,M.Valdes-Scantling,2020.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.0,0.0,0.0,4.0,0.0,0.0,44.0,0.117647,0.308659,0.9,,-14.5,-14.5,Chiefs,3.5,44.5
31,Christian McCaffrey,RB,SF,DET,rushing_yards,0.5,88.5,00-0033280,C.McCaffrey,2019.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,26.0,1.0,7.0,10.0,72.0,0.0,20.0,0.204082,0.343656,15.8,,25.5,-62.5,49ers,-7.0,51.0
2352,Nelson Agholor,WR,BAL,KC,receiving_yards,,16.5,00-0031549,N.Agholor,2023.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,30.0,0.086957,0.217935,0.0,CLE,,-16.5,Ravens,-3.5,44.5


In [6]:
df.to_csv('data/final_data.csv',index=False)

In [None]:
qb_cols = ['player_display_name', 'position','recent_team', 'season', 'week','completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions','fantasy_points']

pass_df = nfl_df[qb_cols].melt(id_vars=['player_display_name','position','recent_team','season','week'])

In [None]:
headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
              'application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'no-cache',
    'pragma': 'no-cache',
    'sec-ch-ua': '"Not.A/Brand";v="99", "Chromium";v="91", "Google Chrome";v="91"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'none',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/91.0.4472.124 Safari/537.36'
}

requests = tls_client.Session(client_identifier="chrome112")

# Fetch data from Prizepicks API
response1 = requests.get('https://api.prizepicks.com/projections', headers=headers)
prizepicks = response1.json()
# prizepicks

# Create empty list to store player data
pplist = []

# Dictionary to store PrizePicks player name mappings
library = {}

# Process PrizePicks API data (populates player library)
for included in prizepicks['included']:
    if 'attributes' in included and 'name' in included['attributes']:
        PPname_id = included['id']
        PPname = included['attributes']['name']
        if 'team' in included['attributes']:
            ppteam = included['attributes']['team']
        else:
            ppteam = 'N/A'
        if 'league' in included['attributes']:
            ppleague = included['attributes']['league']
        else:
            ppleague = 'N/A'
        library[PPname_id] = {'name': PPname, 'team': ppteam, 'league': ppleague}
        
for ppdata in prizepicks['data']:
    if '1st Half' in ppdata['attributes']['description']:
        continue
    PPid = ppdata['relationships']['new_player']['data']['id']
    PPprop_value = ppdata['attributes']['line_score']
    PPprop_type = ppdata['attributes']['stat_type']
    ppinfo = {"name_id": PPid, "Stat": PPprop_type, "Prizepicks": PPprop_value}
    pplist.append(ppinfo)

# Iterate over the pplist array to add player names, team, and league, and remove name_id
for element in pplist:
    name_id = element['name_id']
    if name_id in library:
        player_data = library[name_id]
        element['Name'] = player_data['name']
        element['Team'] = player_data['team']
        element['League'] = player_data['league']
    else:
        element['Name'] = "Unknown"
        element['Team'] = "N/A"
        element['League'] = "N/A"
    del element['name_id']
    
pp_df = pd.DataFrame.from_dict(pd.json_normalize(pplist), orient='columns')

# ----- RENAME COLUMNS -------
pp_df.rename(columns={'Name':'player_display_name','Team':'team','Stat':'stat','Prizepicks':'pp_line', 'League':'sport'},inplace=True)


# ----- CHANGE PP PLAYER NAME TO NFL PLAYER NAME ------
name_dict = {'Kenneth Walker III':'Kenneth Walker','Michael Pittman Jr.':'Michael Pittman','D.K. Metcalf':'DK Metcalf'}
pp_df.replace(name_dict, inplace=True)

# ----- GRAB FOOTBALL ONLY ------
pp_df = pp_df[pp_df.sport == 'NFL']
print(f"Prize Pick lines: {len(pp_df)}")

In [None]:
# --- REPLACE PP STATS COLUMNS WITH NFL VERBIAGE (MAP) -----

# stats_dict = {'Pass TDs':'passing_tds','Rush+Rec TDs':'rush_plus_rec_tds','INT':'interceptions', 'Sacks':'sacks',
#        'Rushing Yards':'rushing_yards', 'Passing Yards':'passing_yards', 'Passing + Rushing Yards':'pass_plus_rush_yards', 'Passing Completions':'completions',
#        'Fantasy Score':'fantasy_points', 'Rushing Attempts':'carries', 'Passing Attempts':'attempts',
#        'Receiving Yards':'receiving_yards', 'Receptions':'receptions','Rushing + Receiving Yards':'rush_plus_rec_yards'}