# NFL Fantasy Football 

# 0. Import libraries

In [1]:
# Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import re
import requests
from bs4 import BeautifulSoup

# Own libraries
from library_nfl import *

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show all columns in pandas
pd.set_option('display.max_columns', 500) 

# Graphing style
plt.style.use('seaborn-colorblind')

%matplotlib inline

# 1. Scrape data

## 1.1 Player stats

In [2]:
# Links for data in each position
urls_22 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2022&scoring=PPR']

urls_21 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2021&scoring=PPR']

urls_20 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2020&scoring=PPR']

urls_19 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2019&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2019&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2019&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2019&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2019&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2019&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2019&scoring=PPR']

urls_18 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2018&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2018&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2018&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2018&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2018&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2018&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2018&scoring=PPR']

# Import 2022 data
data_stats_22 = pull_data_stats(urls_22)

# Import 2021 data
data_stats_21 = pull_data_stats(urls_21)

# Import 2020 data
data_stats_20 = pull_data_stats(urls_20)

# Import 2019 data
data_stats_19 = pull_data_stats(urls_19)

# Import 2018 data
data_stats_19 = pull_data_stats(urls_19)


In [3]:
data_stats_22['RB'].head(4)

Unnamed: 0,rank,player,att,yds,y/a,lg,20+,td,rec,tgt,yds.1,y/r,td.1,fl,g,fpts,fpts/g,pos,team
0,1,Austin Ekeler,204,915,4.5,72,10,13,107,127,722,6.7,5,3,17,372.7,21.9,RB,LAC
1,2,Christian McCaffrey,244,1139,4.7,49,14,8,85,108,741,8.7,5,0,17,356.4,21.0,RB,SF
2,3,Josh Jacobs,340,1653,4.9,86,14,12,53,64,400,7.5,0,1,17,328.3,19.3,RB,LV
3,4,Derrick Henry,349,1538,4.4,56,20,13,33,41,398,12.1,0,3,16,302.8,18.9,RB,TEN


## 1.2 Snap count

In [4]:
# URLS for last 3 years

urls_22 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2022&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2022&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2022&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2022&show=perc']

urls_21 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2021&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2021&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2021&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2021&show=perc']

urls_20 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2020&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2020&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2020&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2020&show=perc']

urls_19 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2019&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2019&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2019&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2019&show=perc']

urls_18 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2018&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2018&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2018&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2018&show=perc']


data_snapcount_22 = pull_data_snapcount(urls_22)
data_snapcount_21 = pull_data_snapcount(urls_21)
data_snapcount_20 = pull_data_snapcount(urls_20)
data_snapcount_19 = pull_data_snapcount(urls_19)
data_snapcount_18 = pull_data_snapcount(urls_18)

## Snapcount clean

In [5]:
data_snapcount_22['RB']

Unnamed: 0,player,team,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,ttl,avg_snap_pct_played,avg_snap_pct_global,pos
0,Mark Ingram II,NO,33.0,37.0,33.0,48.0,22.0,33.0,28.0,8.0,0.0,0.0,0.0,30.0,42.0,,0.0,0.0,0.0,0.0,205.0,31.400000,18.470588,RB
1,Brandon Bolden,LV,28.0,0.0,36.0,7.0,16.0,,0.0,0.0,7.0,0.0,5.0,0.0,0.0,2.0,0.0,4.0,21.0,0.0,81.0,14.000000,7.411765,RB
2,Cordarrelle Patterson,ATL,65.0,59.0,61.0,29.0,0.0,0.0,0.0,0.0,39.0,38.0,49.0,58.0,46.0,,50.0,37.0,50.0,28.0,382.0,46.846154,35.823529,RB
3,Giovani Bernard,TB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,45.0,25.0,45.000000,2.647059,RB
4,Rex Burkhead,HOU,71.0,37.0,41.0,32.0,21.0,,35.0,19.0,22.0,25.0,0.0,18.0,0.0,8.0,19.0,16.0,19.0,15.0,251.0,26.533333,23.411765,RB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,Malik Davis,DAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0,,13.0,18.0,0.0,4.0,0.0,0.0,0.0,32.0,9.0,78.0,18.166667,6.411765,RB
156,Jaylen Warren,PIT,37.0,29.0,20.0,25.0,51.0,31.0,23.0,29.0,,43.0,4.0,0.0,20.0,40.0,34.0,31.0,40.0,42.0,342.0,31.187500,29.352941,RB
157,Isiah Pacheco,KC,23.0,9.0,8.0,21.0,3.0,15.0,30.0,,22.0,56.0,40.0,51.0,45.0,43.0,35.0,49.0,45.0,36.0,351.0,31.235294,31.235294,RB
158,Julius Chestnut,TEN,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,13.0,10.0,2.0,0.0,22.0,0.0,29.0,11.750000,2.764706,RB


## 1.3 Team's target distribution by position

In [6]:
# List of abbreviations
team_abbreviations = ['KC', 'BUF', 'PHI', 'CIN', 'SEA', 'CHI', 'MIN', 'JAC', 'NYG',
                    'DET', 'LAC', 'FA', 'NYJ', 'BAL', 'MIA', 'DEN', 'NO', 'DAL', 'ARI',
                    'HOU', 'CAR', 'NE', 'WAS', 'LV', 'TEN', 'PIT', 'TB', 'ATL', 'LAR',
                    'SF', 'CLE', 'IND', 'GB']
# List of full names 
team_names = ['Kansas City Chiefs', 'Buffalo Bills', 'Philadelphia Eagles',
            'Cincinnati Bengals', 'Seattle Seahawks', 'Chicago Bears',
            'Minnesota Vikings', 'Jacksonville Jaguars', 'New York Giants',
            'Detroit Lions', 'Los Angeles Chargers', 'Free Agent', 'New York Jets',
            'Baltimore Ravens', 'Miami Dolphins', 'Denver Broncos', 'New Orleans Saints',
            'Dallas Cowboys', 'Arizona Cardinals', 'Houston Texans',
            'Carolina Panthers', 'New England Patriots', 'Washington Commanders',
            'Las Vegas Raiders', 'Tennessee Titans', 'Pittsburgh Steelers',
            'Tampa Bay Buccaneers', 'Atlanta Falcons', 'Los Angeles Rams',
            'San Francisco 49ers', 'Cleveland Browns', 'Indianapolis Colts',
            'Green Bay Packers']

# Create the corrected dictionary
nfl_teams_dict = dict(zip(team_names, team_abbreviations))

In [7]:
urls_22 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2022&start=1&end=18']

urls_21 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2021&start=1&end=18']

urls_20 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2020&start=1&end=18']

urls_20 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2019&start=1&end=18']

urls_20 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2018&start=1&end=18']

df_target_dist_22 = pull_data_target_distribution(urls_22, nfl_teams_dict)
df_target_dist_21 = pull_data_target_distribution(urls_21, nfl_teams_dict)
df_target_dist_20 = pull_data_target_distribution(urls_20, nfl_teams_dict)
df_target_dist_19 = pull_data_target_distribution(urls_19, nfl_teams_dict)
df_target_dist_18 = pull_data_target_distribution(urls_18, nfl_teams_dict)

In [8]:
df_target_dist_22.head(4)

Unnamed: 0,team_name,wr_targets,wr_pct,rb_targets,rb_pct,te_targets,te_pct,total_targets,team
0,Arizona Cardinals,404,63.7,112,17.7,118,18.6,634,ARI
1,Atlanta Falcons,228,57.3,66,16.6,104,26.1,398,ATL
2,Baltimore Ravens,198,42.6,63,13.5,204,43.9,465,BAL
3,Buffalo Bills,367,66.0,112,20.1,77,13.8,556,BUF


# 2. RB Data cleaning

In [62]:
def create_data_rb(stats_data, stats_data_1, stats_data_2, snap_data, snap_data_1, snap_data_2, targets_data, targets_data_1, targets_data_2):
    # Get initial stats dataframe and merge team 
    df_rb = stats_data['RB'].drop(columns=['pos', 'lg', 'fpts/g', 'y/a', 'y/r'])
     # Get initial stats dataframe
    df_rb_1 = stats_data_1['RB'].drop(columns=['pos', 'lg', 'fpts/g', 'team', 'y/a', 'y/r'])
     # Get initial stats dataframe
    df_rb_2 = stats_data_2['RB'].drop(columns=['pos', 'lg', 'fpts/g', 'team', 'y/a', 'y/r'])

    #///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Get snapcount data
    df_snaps = snap_data['RB'][['player', 'team', 'avg_snap_pct_played', 'avg_snap_pct_global']]
    # Get snapcount data previous year
    df_snaps_1 = snap_data_1['RB'][['player', 'team', 'avg_snap_pct_played', 'avg_snap_pct_global']]
    # Get snapcount t-2 years
    df_snaps_2 = snap_data_2['RB'][['player', 'team', 'avg_snap_pct_played', 'avg_snap_pct_global']]

    #///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Get targets dataframe by team
    df_targets = targets_data[['team', 'rb_targets', 'rb_pct', 'total_targets']]
     # Get targets dataframe by team
    df_targets_1 = targets_data_1[['team', 'rb_targets', 'rb_pct', 'total_targets']]
     # Get targets dataframe by team
    df_targets_2 = targets_data_2[['team', 'rb_targets', 'rb_pct', 'total_targets']]

    #///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Merge previous years stats
    df_rb_past = df_rb_1.merge(df_rb_2, how='outer', on='player', suffixes=('_1', '_2'))
    # Merge previous years with current
    df_rb = df_rb.merge(df_rb_past, how='left', on='player')

    #///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Merge previous snap data
    df_snaps_past = df_snaps_1.merge(df_snaps_2, how='outer', on='player', suffixes=('_1', '_2'))
    df_snaps_past = df_snaps.merge(df_snaps_past, how='outer', on='player')
    # Merge previous years with current year
    df_rb = df_rb.merge(df_snaps_past, how='left', on='player', suffixes=('', '_0'))
    
    # Clean team with multiple teams
    df_rb['team_0'] = df_rb.apply(lambda x: x['team_0'] if x['team_0'] != 'Multi' else x['team'], axis=1)
    df_rb['team_1'] = df_rb.apply(lambda x: x['team_1'] if x['team_1'] != 'Multi' else x['team_0'], axis=1)
    df_rb['team_2'] = df_rb.apply(lambda x: x['team_2'] if x['team_2'] != 'Multi' else x['team_1'], axis=1)

    # Return data
    return df_rb

## Create data for RB

In [63]:
df_rb_22 = create_data_rb(data_stats_22, data_stats_21, data_stats_20, data_snapcount_22, data_snapcount_21, data_snapcount_20, 
                          df_target_dist_22, df_target_dist_21, df_target_dist_20)

In [64]:
df_rb_22

Unnamed: 0,rank,player,att,yds,20+,td,rec,tgt,yds.1,td.1,fl,g,fpts,team,rank_1,att_1,yds_1,20+_1,td_1,rec_1,tgt_1,yds_1.1,td_1.1,fl_1,g_1,fpts_1,rank_2,att_2,yds_2,20+_2,td_2,rec_2,tgt_2,yds_2.1,td_2.1,fl_2,g_2,fpts_2,team_0,avg_snap_pct_played,avg_snap_pct_global,team_1,avg_snap_pct_played_1,avg_snap_pct_global_1,team_2,avg_snap_pct_played_2,avg_snap_pct_global_2
0,1,Austin Ekeler,204,915,10,13,107,127,722,5,3,17,372.7,LAC,2.0,206.0,911.0,3.0,12.0,70.0,94.0,647.0,8.0,3.0,16.0,343.8,26.0,116.0,530.0,2.0,1.0,54.0,65.0,403.0,2.0,0.0,10.0,165.3,LAC,61.529412,61.529412,LAC,64.562500,60.764706,LAC,56.400000,35.2500
1,2,Christian McCaffrey,244,1139,14,8,85,108,741,5,0,17,356.4,SF,38.0,99.0,442.0,0.0,1.0,37.0,41.0,343.0,1.0,0.0,7.0,127.5,54.0,59.0,225.0,0.0,5.0,17.0,19.0,149.0,1.0,0.0,3.0,90.4,SF,74.352941,70.222222,CAR,60.571429,24.941176,CAR,77.333333,14.5000
2,3,Josh Jacobs,340,1653,14,12,53,64,400,0,1,17,328.3,LV,12.0,217.0,872.0,2.0,9.0,54.0,64.0,348.0,0.0,2.0,15.0,226.0,8.0,273.0,1065.0,3.0,12.0,33.0,45.0,238.0,0.0,2.0,15.0,231.3,LV,74.117647,74.117647,LV,63.266667,55.823529,LV,61.133333,57.3125
3,4,Derrick Henry,349,1538,20,13,33,41,398,0,3,16,302.8,TEN,22.0,219.0,937.0,9.0,10.0,18.0,20.0,154.0,0.0,0.0,8.0,193.3,3.0,378.0,2027.0,30.0,17.0,19.0,31.0,114.0,0.0,2.0,16.0,333.1,TEN,66.500000,62.588235,TEN,72.000000,33.882353,TEN,65.312500,65.3125
4,5,Saquon Barkley,295,1312,16,10,57,76,338,0,0,16,284.0,NYG,30.0,162.0,593.0,6.0,2.0,41.0,57.0,263.0,2.0,1.0,14.0,148.6,120.0,19.0,34.0,0.0,0.0,6.0,9.0,60.0,0.0,0.0,2.0,15.4,NYG,79.687500,75.000000,NYG,61.692308,47.176471,NYG,49.500000,6.1875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,264,John Kelly Jr.,0,0,0,0,0,0,0,0,0,0,0.0,CLE,157.0,2.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.3,,,,,,,,,,,,,,,,CLE,4.000000,0.235294,,,
264,265,Tim Flanders,0,0,0,0,0,0,0,0,0,0,0.0,NO,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
265,266,Willie Carter,0,0,0,0,0,0,0,0,0,0,0.0,CHI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
266,267,Brennan Clay,0,0,0,0,0,0,0,0,0,0,0.0,DEN,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
