# FantasyPros Projection Scraper

In [1]:
# To open urls
from urllib.request import urlopen

# To scrape HTML easily
from bs4 import BeautifulSoup
import requests

# For data frames
import pandas as pd

# For generating random number from normal distribution
import numpy as np

# from platform import python_version

# print(python_version())
import pulp

In [2]:
# FantasyPros URLs by position for PPR scoring, min/max projections

# QB
urlQB = 'https://www.fantasypros.com/nfl/projections/qb.php?max-yes=true&min-yes=true'

# RB
urlRB = 'https://www.fantasypros.com/nfl/projections/rb.php?max-yes=true&min-yes=true&scoring=PPR'

# WR
urlWR = 'https://www.fantasypros.com/nfl/projections/wr.php?max-yes=true&min-yes=true&scoring=PPR'

# TE
urlTE = 'https://www.fantasypros.com/nfl/projections/te.php?max-yes=true&min-yes=true&scoring=PPR'

# Def
urlDef = 'https://www.fantasypros.com/nfl/projections/dst.php?max-yes=true&min-yes=true'

# K
urlK = 'https://www.fantasypros.com/nfl/projections/k.php?max-yes=true&min-yes=true'

## Function that scrapes FantasyPros for stats

In [3]:
def FP_scrape(position_url, num_cols="9"):
    """
    This function takes in a FantasyPros url, scrapes that page for player info and projected points.
    'num_cols' parameter is necessary because each position page has a different number of columns for stats
    """
    # Get relevant data from page
    res = requests.get(position_url)
    soup = BeautifulSoup(res.text)

    pos_labels = soup.select(".player-label")
    pos_labels = [name.text for name in pos_labels]
    
    # Clean to get name and team only
    # remove 'Player'
    pos_labels = pos_labels[1:]
    # Remove 'highlow' from each string
    new_str = []
    for i in pos_labels:
        size = len(i)
        i = i[:size - 8] # last 7 characters plus space character
        new_str.append(i)
    
    # Checking for FantasyPros Defense page to map abbreviations to team names
    if position_url == 'https://www.fantasypros.com/nfl/projections/dst.php?max-yes=true&min-yes=true':
        # Initialize dict for mapping defenses to abbrev
        team_dict = {"Minnesota Vikings":"MIN", "Green Bay Packers":"GB", "Detroit Lions":"DET", "Chicago Bears":"CHI",
                    "Los Angeles Rams":"LAR", "Seattle Seahawks":"SEA", "San Francisco 49ers":"SF", "Arizona Cardinals":"ARI",
                    "Philadelphia Eagles":"PHI", "Dallas Cowboys":"DAL", "Washington Football Team":"WAS", "New York Giants":"NYG",
                    "Tampa Bay Buccaneers":"TB", "New Orleans Saints":"NO", "Atlanta Falcons":"ATL", "Carolina Panthers":"CAR",
                    "Pittsburgh Steelers":"PIT", "Cincinnati Bengals":"CIN", "Cleveland Browns":"CLE", "Baltimore Ravens":"BAL",
                    "Los Angeles Chargers":"LAC", "Denver Broncos":"DEN", "Kansas City Chiefs":"KC", "Las Vegas Raiders":"LV",
                    "Tennessee Titans":"TEN", "Indianapolis Colts":"IND", "Houston Texans":"HOU", "Jacksonville Jaguars":"JAC",
                    "New England Patriots":"NE", "New York Jets":"NYJ", "Buffalo Bills":"BUF", "Miami Dolphins":"MIA"}
        new_dict = {}
        for i in new_str:
            new_dict[i] = team_dict[i]
        
        new_str = list(map(list, new_dict.items()))
        
    else:
        # Split string into player and team on last space delimiter
        new_str = [s.rsplit(' ', 1) for s in new_str]
    
    # Get Fantasy Point Projections (Low, Median, High)
    max_points = soup.select(".center:nth-child(" + num_cols + ") .max-cell")
    max_points = [p.text for p in max_points]
    min_points = soup.select(".center:nth-child(" + num_cols + ") .min-cell")
    min_points = [p.text for p in min_points]
    median_points = soup.select(".center:nth-child(" + num_cols + ")")
    median_points = [i.text[:4] for i in median_points]

    # Create data frame for position from list of lists 
    pos_df = pd.DataFrame(new_str, columns = ['Player', 'Team'])
    pos_df['Low'] = min_points
    pos_df['Median'] = median_points
    pos_df['High'] = max_points
    
    return(pos_df)

## Create data frame for each position

In [4]:
# Print all rows
pd.set_option("display.max_rows", None, "display.max_columns", None)

# QB
qb_df = FP_scrape(urlQB, num_cols="11")
qb_df["Position"] = "QB"
# RB
rb_df = FP_scrape(urlRB)
rb_df["Position"] = "RB"
# WR
wr_df = FP_scrape(urlWR)
wr_df["Position"] = "WR"
# TE
te_df = FP_scrape(urlTE, num_cols="6")
te_df["Position"] = "TE"
# Def
def_df = FP_scrape(urlDef, num_cols="10")
def_df["Position"] = "Def"
# K
k_df = FP_scrape(urlK, num_cols="5")
k_df["Position"] = "K"

# Concatenate into one df
all_df = pd.concat([qb_df, rb_df, wr_df, te_df, def_df, k_df])
all_df['Low'] = all_df['Low'].astype(float)
all_df['Median'] = all_df['Median'].astype(float)
all_df['High'] = all_df['High'].astype(float)

# Remove low projections (< 3 points)
all_df = all_df[all_df['Median'] > 3].reset_index(drop=True)

# Assign standard deviations by position
conditions = [
    (all_df['Position'] == 'QB'),
    (all_df['Position'] == 'RB'),
    (all_df['Position'] == 'WR'),
    (all_df['Position'] == 'TE'),
    (all_df['Position'] == 'Def'),
    (all_df['Position'] == 'K')
    ]

# create a list of the values we want to assign for each condition
values = [2, 2, 4, 3, 4, 0]

# create a new column and use np.select to assign values to it using our lists as arguments
all_df['SD'] = np.select(conditions, values)

all_df['Range'] = all_df['High'] - all_df['Low']

## Scrape DraftKings Salary data from FantasyPros

In [5]:
# FantasyPros DK salary url
qbDK = 'https://www.fantasypros.com/daily-fantasy/nfl/draftkings-salary-changes.php'
res = requests.get(qbDK)
soup = BeautifulSoup(res.text)

# Player Names
dkPlayerNames = soup.select("td a")
dkPlayerNames = [name.text for name in dkPlayerNames]
# Remove empty strings from list
dkPlayerNames = [i for i in dkPlayerNames if i]

# Team
dkPlayerTeam = soup.select("small")
dkPlayerTeam = [name.text for name in dkPlayerTeam]
# Remove parentheses and whitespace
dkPlayerTeam = [s.replace('(', '') for s in dkPlayerTeam]
dkPlayerTeam = [s.replace(')', '') for s in dkPlayerTeam]
dkPlayerTeam = [s.replace(" ", "") for s in dkPlayerTeam]
dkPlayerTeam = [s.split('-') for s in dkPlayerTeam]

# Salary
dkPlayerSalary = soup.select(".salary")
dkPlayerSalary = [name.text for name in dkPlayerSalary]
# Remove '$' and ','
dkPlayerSalary = [s.replace('$', '') for s in dkPlayerSalary]
dkPlayerSalary = [s.replace(',', '') for s in dkPlayerSalary]
# Convert to int
dkPlayerSalary = [int(i) for i in dkPlayerSalary]

# Opponent
dkOpponent = soup.select(".opp-rank-tip")
dkOpponent = [name.text for name in dkOpponent]
# Remove '@'
dkOpponent = [s.replace('@', '') for s in dkOpponent]

# Create data frame
dk_df = pd.DataFrame(dkPlayerTeam, columns = ["Team", "Position"])
# insert player name as first column
dk_df.insert(0, "Player", dkPlayerNames)
# Add salary
dk_df['Salary'] = dkPlayerSalary
# Add opponent
dk_df.insert(2, 'Opponent', dkOpponent)

## Merge player projections with salary data

In [6]:
complete_df = dk_df.merge(all_df, how = 'inner', on = ['Player', 'Team'])
complete_df.drop(columns=['Position_x'], inplace = True)
complete_df.rename(columns = {'Position_y':'Position'}, inplace = True)
# Have to change this every week
main_slate_teams = ['TB', 'ATL', 'SEA', 'WAS', 'CHI', 'MIN', 'HOU', 'IND', 'DET', 'TEN', 'JAC', 'BAL', 'NE', 'MIA', 'SF',
                   'DAL', 'NYJ', 'LAR', 'PHI', 'ARI', 'KC', 'NO']

showdown_slate = ['LAC', 'LV']

# Only main slate teams
complete_df = complete_df[complete_df.Team.isin(main_slate_teams)].reset_index(drop=True)

## Write projections to .csv for excel use

In [7]:
complete_df.to_csv(r'C:\Users\punco\OneDrive\Desktop\Fantasy Football\Week 15.csv', index = False)

# Monte Carlo Simulation using these player distributions

In [8]:
# Simulate one player's points for the week
def player_sim(mu, sigma):
    return np.random.normal(mu, sigma)

# One simulation
def one_sim(df):
    """
    Generates a point projection from normal distribution for each player using their mean and sd
    """
    points_list = []
    player_list = []
    salary_list = df['Salary']
    pos_list = df['Position']
    for index, row in df.iterrows():
        points = player_sim(row['Median'], row['SD']) # generate points using mean, sd for each player
        points_list.append(points)
        player = row['Player']
        player_list.append(player)

    sim_dict = {'Player':player_list, 'Points':points_list, 'Position':pos_list, 'Salary':salary_list}
    sim_df = pd.DataFrame(sim_dict, columns = ['Player', 'Points', 'Position', 'Salary'])
    
    # indicator variables for each position
    sim_df['QB'] = [1 if x == 'QB' else 0 for x in sim_df['Position']]
    sim_df['RB'] = [1 if x == 'RB' else 0 for x in sim_df['Position']]
    sim_df['WR'] = [1 if x == 'WR' else 0 for x in sim_df['Position']]
    sim_df['TE'] = [1 if x == 'TE' else 0 for x in sim_df['Position']]
    sim_df['Def'] = [1 if x == 'Def' else 0 for x in sim_df['Position']]
    
    sim_df['QB'] = sim_df['QB'].astype(float)
    sim_df['RB'] = sim_df['RB'].astype(float)
    sim_df['WR'] = sim_df['WR'].astype(float)
    sim_df['TE'] = sim_df['TE'].astype(float)
    sim_df['Def'] = sim_df['Def'].astype(float)
    sim_df['Salary'] = sim_df['Salary'].astype(float)
    
    return(sim_df)

one_sim_df = one_sim(complete_df)
print(one_sim_df.head(5))

                 Player    Points Position  Salary   QB   RB   WR   TE  Def
0          Salvon Ahmed  5.177558       RB  4700.0  0.0  1.0  0.0  0.0  0.0
1          Adam Shaheen  5.111074       TE  3300.0  0.0  0.0  0.0  1.0  0.0
2           David Moore  9.026730       WR  3400.0  0.0  0.0  1.0  0.0  0.0
3            Dan Arnold  7.099876       TE  3500.0  0.0  0.0  0.0  1.0  0.0
4  Laviska Shenault Jr.  8.666688       WR  3800.0  0.0  0.0  1.0  0.0  0.0


## Gets optimal lineup from one sim based on lineup constraints

In [9]:
def lineup_optimizer(df):
    
    # Define pulp object
    model = pulp.LpProblem('NFL', pulp.LpMaximize)

    # Initialize dicts that will store parameters. Keys are decision variables below 
    total_points = {}
    cost = {}
    QBs = {}
    RBs = {}
    WRs = {}
    TEs = {}
    DST = {}
    number_of_players = {}

    for i, player in df.iterrows():
        var_name = 'x' + str(i) # Create variable name
        decision_var = pulp.LpVariable(var_name, cat='Binary') # Initialize Variables

        total_points[decision_var] = player["Points"] # Create PPG Dictionary
        cost[decision_var] = player["Salary"] # Create Cost Dictionary

        # Create Dictionary for Player Types
        QBs[decision_var] = player["QB"]
        RBs[decision_var] = player["RB"]
        WRs[decision_var] = player["WR"]
        TEs[decision_var] = player["TE"]
        DST[decision_var] = player["Def"]
        number_of_players[decision_var] = 1.0

    # Define objective function and add it to the model
    objective_function = pulp.LpAffineExpression(total_points)
    model += objective_function

    #Define cost constraint and add it to the model
    total_cost = pulp.LpAffineExpression(cost)
    model += (total_cost <= 50000)

    # Add player type constraints
    QB_constraint = pulp.LpAffineExpression(QBs)
    RB_constraint = pulp.LpAffineExpression(RBs)
    WR_constraint = pulp.LpAffineExpression(WRs)
    TE_constraint = pulp.LpAffineExpression(TEs)
    DST_constraint = pulp.LpAffineExpression(DST)
    total_players = pulp.LpAffineExpression(number_of_players)

    model += (QB_constraint <= 1)
    model += (RB_constraint <= 3)
    model += (WR_constraint <= 4)
    model += (TE_constraint <= 2)
    model += (DST_constraint <= 1)
    model += (total_players <= 9)

    #model.status
    model.solve()
    
    # Add variable to original data frame: = 1 if player is drafted, = 0 if not drafted
    df['is_drafted'] = 0.0
    for var in model.variables():
        # Set is drafted to the value determined by the LP
        df.iloc[int(var.name[1:]),9] = var.varValue # column index 9 = is_drafted

    # Save optimal team in a new df
    optimal_team = df[df["is_drafted"] == 1.0]
    
    return(optimal_team)

# Output from one sim
print(lineup_optimizer(one_sim_df))

                       Player     Points Position  Salary   QB   RB   WR   TE  \
49   Washington Football Team  14.758868      Def  2600.0  0.0  0.0  0.0  0.0   
63           Marvin Jones Jr.  23.413377       WR  5700.0  0.0  0.0  1.0  0.0   
73               Logan Thomas  15.614283       TE  4000.0  0.0  0.0  0.0  1.0   
84               Robert Woods  24.907066       WR  6800.0  0.0  0.0  1.0  0.0   
92              Sammy Watkins  21.095825       WR  4600.0  0.0  0.0  1.0  0.0   
137              Alvin Kamara  23.240992       RB  7400.0  0.0  1.0  0.0  0.0   
176        Patrick Mahomes II  27.163583       QB  7900.0  1.0  0.0  0.0  0.0   
192             Jakobi Meyers  20.663177       WR  4600.0  0.0  0.0  1.0  0.0   
214            T.J. Hockenson  20.940382       TE  5200.0  0.0  0.0  0.0  1.0   

     Def  is_drafted  
49   1.0         1.0  
63   0.0         1.0  
73   0.0         1.0  
84   0.0         1.0  
92   0.0         1.0  
137  0.0         1.0  
176  0.0         1.0  
192  

## Run sim n times, get optimal lineup from each sim, add players to list

In [10]:
# Produce n lineups
def n_lineups(n, df):
    """
    produces 'n' optimal lineups from 'n' independent sims, returns data frame
    """
    player_list = []
    salary_list = []
    pos_list = []
    i = 0
    while i < n:
        # run new sim
        sim = one_sim(df)
        
        # Get optimal lineup from sim
        opt = lineup_optimizer(sim)
        
        # Store players in list?
        player_list.append(list(opt['Player']))
        salary_list.append(list(opt['Salary']))
        pos_list.append(list(opt['Position']))
        
        # i += 1
        i += 1
        
    # Flatten list of lists into list - Player
    flat_list1 = []
    for sublist in player_list:
        for item in sublist:
            flat_list1.append(item)
            
    # Flatten list of lists into list - Salary
    flat_list2 = []
    for sublist in salary_list:
        for item in sublist:
            flat_list2.append(item)
            
    # Flatten list of lists into list - Position
    flat_list3 = []
    for sublist in pos_list:
        for item in sublist:
            flat_list3.append(item)
            
    return(pd.DataFrame(list(zip(flat_list1, flat_list2, flat_list3)), columns = ['Player', 'Salary', 'Position']))

# Set number of sims to run
n = 1000

# Store in df
player_list = n_lineups(n, complete_df)

## calculate 'probability' that each player is in optimal lineup

In [11]:
# Count times each player shows up in optimal lineup
counts = player_list.groupby(['Player', 'Salary', 'Position']).size()

# convert to df
counts_df = counts.to_frame(name = 'NumLineups').reset_index()

# Calculate probability player is in optimal lineup
counts_df['OptimalPercent'] = counts_df['NumLineups'] / n

# Sort by optimal percentage
counts_df = counts_df.sort_values(by = 'OptimalPercent', ascending = False).reset_index(drop = True)
print(counts_df[counts_df['Salary'] <= 4200])
print(counts_df)

                       Player  Salary Position  NumLineups  OptimalPercent
3            Emmanuel Sanders  4200.0       WR         211           0.211
10             Dallas Goedert  3900.0       TE         144           0.144
15                 Jared Cook  3400.0       TE         123           0.123
24           Seattle Seahawks  3100.0      Def          94           0.094
25             Tre'Quan Smith  3200.0       WR          94           0.094
27             Rob Gronkowski  4200.0       TE          89           0.089
34               Hayden Hurst  3300.0       TE          80           0.080
35                Jordan Reed  3200.0       TE          77           0.077
41             Michael Gallup  3500.0       WR          70           0.070
43   Washington Football Team  2600.0      Def          63           0.063
47             Dalton Schultz  3200.0       TE          58           0.058
51           Breshad Perriman  3700.0       WR          55           0.055
54              New York 

In [12]:
# Next steps: 
# calculate actual standard deviations (player distributions), need game data for each player
# add dfs ownership percentage (can be simulated by dfs contest)
# add correlation component to sims (if qb chosen, then increase wr1, te1, etc projections by x %)

In [13]:
#def generate_contest_lineups(entries):