In [1]:
# Code to be able to import local modules in notebooks
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
### Import dependencies
# Third party libraries
import pandas as pd
# Custom packages
from ffpackage import mfl, ourlads, analysis, predictions, viz
from appmanager import db

In [3]:
# Set global variables
CURRENT_SEASON = 2022
POS_LIST = ['QB', 'RB', 'WR', 'TE', 'PK', 'DF']

In [6]:
adpValues = mfl.get_adp()
adpValues

ValueError: Length mismatch: Expected axis has 0 elements, new values have 2 elements

In [4]:
# Retrieve basic info on all players
allPlayers = mfl.get_players()
# Retrieve Fantasy Sharks rankings
sharkRanks = mfl.get_sharkRanks()
# Retrieve Average Draft Positions
adpValues = mfl.get_adp()

# Find player ages
# Retrieve any player dobs who are already in the db
player_dobs = db.read_db('player_dobs')
# Rename the dob columns until I can get to that in the app setup
player_dobs = player_dobs.rename(columns={'PlayerID':'id_mfl', 'DOB':'dob', 'Age':'age'})
# Check for any players whose ages are not already in the db
ageNeeded = allPlayers.loc[~allPlayers['id_mfl'].isin(player_dobs['id_mfl'])]
# If there are players whose ages are not in the db, retrieve those ages from My Fantasy League
if len(ageNeeded)>0:
    # Break player list into chunks small enough for the API server
    # Specify chunk size
    n = 50
    # Slice the df where ages are needed into chunks
    list_df = [ageNeeded['id_mfl'][i:i+n] for i in range(0,ageNeeded['id_mfl'].shape[0],n)]
    # Loop over the chunks
    for i in range(len(list_df)):
        # Join the playerIDs into a string to use in building the API call
        idList = ",".join(list_df[i])
        # Retrieve the dates of birth for these players from My Fantasy League
        new_dobs = mfl.get_playerProfiles(idList)
        # Append these players to the list that already existed in the app's database
        player_dobs = player_dobs.append(new_dobs)
# Convert date columns to datetime
player_dobs['dob'] = pd.to_datetime(player_dobs['dob'])
# Convert Date of Birth to Age using datetime's relativedelta module
player_dobs['age'] = player_dobs['dob'].apply(analysis.calculate_age)
# Drop the dob column
player_dobs = player_dobs.drop(columns='dob')

# Merge all dfs from MyFantasyLeague API
allPlayers = allPlayers.merge(
    player_dobs, on='id_mfl', how='left'
).merge(
    sharkRanks, on='id_mfl', how='left'
).merge(
    adpValues, on='id_mfl', how='left'
)
# Clean sharkRank and ADP columns
# If a player is ranked low enough, they do not get a ranking in FantasySharks or ADP. Replace these with a very low rank
allPlayers['sharkRank'].fillna(3000, inplace=True)
allPlayers['adp'].fillna(3000, inplace=True)

  df.loc[:, 'playerName'] = df.loc[:, 'playerName'].str.replace(".", "")
  list_df = [ageNeeded['id_mfl'][i:i+n] for i in range(0,ageNeeded['id_mfl'].shape[0],n)]
  player_dobs = player_dobs.append(new_dobs)


In [5]:
# Retrieve position ranks from OurLads
posRanks = ourlads.scrape_depthcharts()

  df.loc[:, 'playerName'] = df.loc[:, 'playerName'].str.replace(".", "")


In [6]:
### Merge MyFantasyLeague data with OurLads data
allPlayers = allPlayers.merge(posRanks, how='left', on=['playerName', 'team'])
## Clean merged df
# Give defenses a position rank since OurLads does not include them
allPlayers.loc[allPlayers['pos']=='DF', 'posRank'] = "DF1"
# Give any players who were not on the OurLads website a rank of 3
allPlayers.loc[allPlayers['posRank'].isna(), 'posRank'] = allPlayers.loc[allPlayers['posRank'].isna(), 'pos'] + "3"
# Give any injured reserve players a rank of 3
allPlayers.loc[allPlayers['RES']=="YES", 'posRank'] = allPlayers.loc[allPlayers['RES']=="YES", 'pos'] + "3"
# Check if pos column matches posRank; if not, replace with third string MFL position
allPlayers.loc[allPlayers['pos']!=allPlayers['posRank'].str[:2], 'posRank'] = allPlayers.loc[allPlayers['pos']!=allPlayers['posRank'].str[:2], 'pos'] + "3"
# Specify all players are in current season
allPlayers['season'] = CURRENT_SEASON

In [7]:
### Fetch historical data from app database
prior1 = db.read_db('prior1')
prior2 = db.read_db('prior2')
# Rename columns
prior1 = prior1.rename(columns={'player':'playerName'})
prior2 = prior2.rename(columns={'player':'playerName'})

In [8]:
# Create current_df
# This will mean scraping the ff db site weekly
curr = pd.DataFrame(allPlayers['playerName'])
colList = ['gamesPlayed',
    'passA', 'passC', 'passY', 'passT', 'passI', 'pass2', 
    'rushA', 'rushY','rushT', 'rush2', 
    'recC', 'recY', 'recT', 'rec2', 'fum', 
    'XPA', 'XPM','FGA', 'FGM', 'FG50', 
    'defSack', 'defI', 'defSaf', 'defFum', 'defBlk','defT', 'defPtsAgainst', 'defPassYAgainst', 'defRushYAgainst','defYdsAgainst'
]
cols = pd.DataFrame(columns=colList)
curr = curr.merge(cols, how='left', left_index=True, right_index=True)
curr.fillna(0, inplace=True)

# Rename all columns in curr
colList = [(x + "_curr") for x in list(curr.columns)]
curr.columns = colList
curr = curr.rename(columns={
       'playerName_curr':'playerName',
       })
curr

Unnamed: 0,playerName,gamesPlayed_curr,passA_curr,passC_curr,passY_curr,passT_curr,passI_curr,pass2_curr,rushA_curr,rushY_curr,...,defSack_curr,defI_curr,defSaf_curr,defFum_curr,defBlk_curr,defT_curr,defPtsAgainst_curr,defPassYAgainst_curr,defRushYAgainst_curr,defYdsAgainst_curr
0,BUFFALO BILLS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,INDIANAPOLIS COLTS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,MIAMI DOLPHINS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,NEW ENGLAND PATRIOTS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,NEW YORK JETS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1113,NICK SCIBA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1114,STANLEY BERRYHILL,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1115,KYRIC MCGOWAN,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1116,STONE SMARTT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
# Merge playerdf, currentdf, prior1, and prior2
allPlayers = allPlayers.merge(curr, how='left', on='playerName').merge(prior1, how='left', on='playerName').merge(prior2, how='left', on='playerName')
# Fill data for players who do not have prior data
allPlayers.fillna(0, inplace=True)

In [10]:
# Need to resolve these duplicates, which stem from players named JOSH JOHNSON and RYAN GRIFFIN
# allPlayers.loc[allPlayers.duplicated(subset=['playerName', 'pos'], keep=False)]
# print(len(allPlayers))
allPlayers.drop_duplicates(subset=['playerName', 'pos'], inplace=True)
# print(len(allPlayers))


In [11]:
# Get schedule
schedule = db.read_db('schedule')
# Merge in opponents
allPlayers = allPlayers.merge(schedule, how='left', on='team')
allPlayers

Unnamed: 0,id_mfl,playerName,pos,team,age,sharkRank,adp,posRank,PR,KR,...,defSaf_prior2,defFum_prior2,defBlk_prior2,defT_prior2,defPtsAgainst_prior2,defPassYAgainst_prior2,defRushYAgainst_prior2,defYdsAgainst_prior2,week,opponent
0,0501,BUFFALO BILLS,DF,BUF,0.0,656.0,3000,DF1,0,0,...,0.0,0.6875,0.03125,0.15625,20.4375,202.875,116.625,319.5,1.0,LAR
1,0501,BUFFALO BILLS,DF,BUF,0.0,656.0,3000,DF1,0,0,...,0.0,0.6875,0.03125,0.15625,20.4375,202.875,116.625,319.5,3.0,MIA
2,0501,BUFFALO BILLS,DF,BUF,0.0,656.0,3000,DF1,0,0,...,0.0,0.6875,0.03125,0.15625,20.4375,202.875,116.625,319.5,4.0,BAL
3,0501,BUFFALO BILLS,DF,BUF,0.0,656.0,3000,DF1,0,0,...,0.0,0.6875,0.03125,0.15625,20.4375,202.875,116.625,319.5,6.0,KCC
4,0501,BUFFALO BILLS,DF,BUF,0.0,656.0,3000,DF1,0,0,...,0.0,0.6875,0.03125,0.15625,20.4375,202.875,116.625,319.5,9.0,NYJ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12393,16115,CALEB SHUDAK,PK,TEN,25.0,3000.0,3000,PK2,0,0,...,0.0,0.0000,0.00000,0.00000,0.0000,0.000,0.000,0.0,10.0,DEN
12394,16115,CALEB SHUDAK,PK,TEN,25.0,3000.0,3000,PK2,0,0,...,0.0,0.0000,0.00000,0.00000,0.0000,0.000,0.000,0.0,12.0,CIN
12395,16115,CALEB SHUDAK,PK,TEN,25.0,3000.0,3000,PK2,0,0,...,0.0,0.0000,0.00000,0.00000,0.0000,0.000,0.000,0.0,14.0,JAC
12396,16115,CALEB SHUDAK,PK,TEN,25.0,3000.0,3000,PK2,0,0,...,0.0,0.0000,0.00000,0.00000,0.0000,0.000,0.000,0.0,16.0,HOU


In [12]:
# Get opponent historical data
# select only defenses
allDef = allPlayers.loc[allPlayers['pos']=='DF']

# Get current defensive scores
currDef = allDef.copy()
# Select only relevant columns
currDef = currDef[['team', 'week',
       'defSack_curr', 'defI_curr',
       'defSaf_curr', 'defFum_curr', 'defBlk_curr',
       'defT_curr', 'defPtsAgainst_curr', 'defPassYAgainst_curr',
       'defRushYAgainst_curr', 'defYdsAgainst_curr']]

# Get prior defensive scores
priorDef = allDef.copy()
# Select only relevant columns
priorDef = priorDef[['team', 'week',
       'defSack_prior1', 'defI_prior1',
       'defSaf_prior1', 'defFum_prior1', 'defBlk_prior1',
       'defT_prior1', 'defPtsAgainst_prior1', 'defPassYAgainst_prior1',
       'defRushYAgainst_prior1', 'defYdsAgainst_prior1']]
# Merge the two defensive dfs
allDef = currDef.merge(priorDef, how='left', on=['team', 'week'])

# Rename all columns in allDef
colList = [(x + "_opp") for x in list(allDef.columns)]
allDef.columns = colList
allDef = allDef.rename(columns={
       'team_opp':'opponent',
       'week_opp':'week'
       })

# Connect opponents to defenses
allPlayers = allPlayers.merge(allDef, how='left', on=['opponent', 'week'])

In [13]:
# Make predictions for NFL statistics
# Create empty df to hold the data
predStats = pd.DataFrame()
# Loop through each position since each position has a unique predictive model
for pos in POS_LIST:
    # Make predictions
    predXpos = predictions.makePredictions(allPlayers, pos=pos)
    # Append each position's predictions
    predStats = pd.concat([predStats, predXpos], axis=0)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-

In [14]:
# Build a SQL query to create table
# List columns and datatypes
dataDef = ["id_mfl VARCHAR(32), season SMALLINT, week SMALLINT, team VARCHAR(32), playerName VARCHAR(32), age SMALLINT, sharkRank SMALLINT, adp SMALLINT, KR VARCHAR(32), PR VARCHAR(32), RES VARCHAR(32), pos VARCHAR(32), posRank VARCHAR(32), opponent VARCHAR(32)"] + [
    x + " FLOAT(8)" for x in predStats.columns[14:]]
dataDef = str(dataDef)
dataDef = dataDef.replace("'", "")
dataDef = dataDef.replace("[", "")
dataDef = dataDef.replace("]", "")
# Write the df to the Postgresql database
db.write_df(predStats, "predictions", dataDef)