# NFL Fantasy Football 

# 0. Import libraries

In [48]:
# Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import re
import requests
from bs4 import BeautifulSoup

# Own libraries
from library_nfl import *

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show all columns in pandas
pd.set_option('display.max_columns', 500) 

# Graphing style
plt.style.use('seaborn-colorblind')

%matplotlib inline

# 1. Scrape data

## 1.1 Player stats

In [49]:
# Links for data in each position
urls_22 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2022&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2022&scoring=PPR']

urls_21 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2021&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2021&scoring=PPR']

urls_20 = ['https://www.fantasypros.com/nfl/stats/qb.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/rb.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/wr.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/te.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/k.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/dst.php?year=2020&scoring=PPR',
           'https://www.fantasypros.com/nfl/stats/db.php?year=2020&scoring=PPR']

# Import 2022 data
stats_22 = pull_data_stats(urls_22)

# Import 2021 data
stats_21 = pull_data_stats(urls_21)

# Import 2021 data
stats_20 = pull_data_stats(urls_20)


In [61]:
stats_22['RB'].head(4)

Unnamed: 0,rank,player,att,yds,y/a,lg,20+,td,rec,tgt,yds.1,y/r,td.1,fl,g,fpts,fpts/g,rost,pos,team
0,1,Austin Ekeler,204,915,4.5,72,10,13,107,127,722,6.7,5,3,17,372.7,21.9,100.0%,RB,LAC
1,2,Christian McCaffrey,244,1139,4.7,49,14,8,85,108,741,8.7,5,0,17,356.4,21.0,100.0%,RB,SF
2,3,Josh Jacobs,340,1653,4.9,86,14,12,53,64,400,7.5,0,1,17,328.3,19.3,99.9%,RB,LV
3,4,Derrick Henry,349,1538,4.4,56,20,13,33,41,398,12.1,0,3,16,302.8,18.9,99.9%,RB,TEN


## 1.2 Snap count

In [74]:
def pull_data_snapcount(urls):
    # Dictionary to store all tables
    dataframes = {}

    # Loop through each url and import data
    for url in urls: 
        # The read_html function returns a list of all the tables found on the web page
        tables = pd.read_html(url)

        # Assuming the table you want is the first one in the list, you can access it like this
        table_df = tables[0]

        # Get position from URL
        position = url.split('/')[6].split('.')[0].upper()

        # Remove %%!
        table_df = table_df.replace('\%', '', regex=True)
        # Remove byes!
        table_df = table_df.replace('bye', '200', regex=True)

        table_df.columns = [x.lower() for x in table_df.columns]

        table_df['pos'] = position

        # Keep only relevant columns
        # table_df = table_df[['player', 'team', 'pos', 'avg']]

        # Convert avg to numeric
        table_df.rename(columns={'avg': 'avg_snap_pct'}, inplace=True)

        if '18' in table_df.columns:
            int_columns = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', 'ttl','avg_snap_pct']
        else:
            int_columns = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'ttl','avg_snap_pct']


        table_df[int_columns] = table_df[int_columns].astype(float)

        table_df = table_df.replace(200, np.nan)


        dataframes[position] = table_df

    # Return dictionary 
    return dataframes

In [75]:
# URLS for last 3 years

urls_22 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2022&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2022&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2022&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2022&show=perc']

urls_21 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2021&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2021&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2021&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2021&show=perc']

urls_20 = ['https://www.fantasypros.com/nfl/reports/snap-counts/qb.php?year=2020&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/rb.php?year=2020&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/wr.php?year=2020&show=perc',
           'https://www.fantasypros.com/nfl/reports/snap-counts/te.php?year=2020&show=perc']


snap_count_22 = pull_data_snapcount(urls_22)

snap_count_21 = pull_data_snapcount(urls_21)

snap_count_20 = pull_data_snapcount(urls_20)

In [76]:
snap_count_22['RB'].head(4)

Unnamed: 0,player,team,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,ttl,avg_snap_pct,pos
0,Mark Ingram II,NO,33.0,37.0,33.0,48.0,22.0,33.0,28.0,8.0,0.0,0.0,0.0,30.0,42.0,,0.0,0.0,0.0,0.0,205.0,31.0,RB
1,Brandon Bolden,LV,28.0,0.0,36.0,7.0,16.0,,0.0,0.0,7.0,0.0,5.0,0.0,0.0,2.0,0.0,4.0,21.0,0.0,81.0,14.0,RB
2,Cordarrelle Patterson,ATL,65.0,59.0,61.0,29.0,0.0,0.0,0.0,0.0,39.0,38.0,49.0,58.0,46.0,,50.0,37.0,50.0,28.0,382.0,47.0,RB
3,Giovani Bernard,TB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,45.0,25.0,45.0,RB


## 1.3 Team's target distribution by position

In [56]:
# List of abbreviations
team_abbreviations = ['KC', 'BUF', 'PHI', 'CIN', 'SEA', 'CHI', 'MIN', 'JAC', 'NYG',
                    'DET', 'LAC', 'FA', 'NYJ', 'BAL', 'MIA', 'DEN', 'NO', 'DAL', 'ARI',
                    'HOU', 'CAR', 'NE', 'WAS', 'LV', 'TEN', 'PIT', 'TB', 'ATL', 'LAR',
                    'SF', 'CLE', 'IND', 'GB']
# List of full names 
team_names = ['Kansas City Chiefs', 'Buffalo Bills', 'Philadelphia Eagles',
            'Cincinnati Bengals', 'Seattle Seahawks', 'Chicago Bears',
            'Minnesota Vikings', 'Jacksonville Jaguars', 'New York Giants',
            'Detroit Lions', 'Los Angeles Chargers', 'Free Agent', 'New York Jets',
            'Baltimore Ravens', 'Miami Dolphins', 'Denver Broncos', 'New Orleans Saints',
            'Dallas Cowboys', 'Arizona Cardinals', 'Houston Texans',
            'Carolina Panthers', 'New England Patriots', 'Washington Commanders',
            'Las Vegas Raiders', 'Tennessee Titans', 'Pittsburgh Steelers',
            'Tampa Bay Buccaneers', 'Atlanta Falcons', 'Los Angeles Rams',
            'San Francisco 49ers', 'Cleveland Browns', 'Indianapolis Colts',
            'Green Bay Packers']

# Create the corrected dictionary
nfl_teams_dict = dict(zip(team_names, team_abbreviations))

In [52]:
def pull_data_target_distribution(urls, nfl_teams_dict):
    # Loop through each url and import data
    for url in urls: 
        # The read_html function returns a list of all the tables found on the web page
        tables = pd.read_html(url)

        # Assuming the table you want is the first one in the list, you can access it like this
        table_df = tables[0]

        # Clean column names
        table_df.columns = [x.lower() for x in table_df.columns]

        table_df.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)

        table_df.rename(columns=lambda x: x.replace('%', 'pct'), inplace=True)

        # Rename columns
        table_df.rename(columns={'team': 'team_name'}, inplace=True)

        # Create 
        table_df['team'] = table_df['team_name'].map(nfl_teams_dict)


    # Return dictionary 
    return table_df

In [53]:
urls_22 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2022&start=1&end=18']

urls_21 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2021&start=1&end=18']

urls_20 = ['https://www.fantasypros.com/nfl/reports/targets-distribution/?year=2020&start=1&end=18']

df_target_dist_22 = pull_data_target_distribution(urls_21, nfl_teams_dict)
df_target_dist_21 = pull_data_target_distribution(urls_21, nfl_teams_dict)
df_target_dist_20 = pull_data_target_distribution(urls_21, nfl_teams_dict)

In [77]:
df_target_dist_22.head(4)

Unnamed: 0,team_name,wr_targets,wr_pct,rb_targets,rb_pct,te_targets,te_pct,total_targets,team
0,Arizona Cardinals,359,63.0,103,18.1,108,18.9,570,ARI
1,Atlanta Falcons,251,45.2,146,26.3,158,28.5,555,ATL
2,Baltimore Ravens,337,56.9,83,14.0,172,29.1,592,BAL
3,Buffalo Bills,443,71.2,96,15.4,83,13.3,622,BUF
