# Scrape Fantasy Football Rankings <a id="return"></a>

1. [ESPN Superflex Rankings (Top 100 Overall) (Eric Karabell)](#superflex)
    * https://www.espn.com/fantasy/football/story/_/id/35420010/fantasy-football-superflex-rankings-2023-quarterback-running-back-wide-receiver-tight-end
2. [ESPN Expert Mock Draft; 10-Team, PPR](#espnmockdraft)
    * https://www.espn.com/fantasy/football/story/_/id/35967408/fantasy-football-mock-draft-10-team-ppr
3. [ESPN PPR Rankings by Position](#espnpprrankings)
    * https://www.espn.com/fantasy/football/story/_/id/35425170/fantasy-football-ppr-rankings-2023-quarterback-running-back-wide-receiver-tight-end
4. [PFF Dynasty Superflex Rankings](#pffsuperflexrankings)
    * https://www.pff.com/news/fantasy-football-post-2023-nfl-draft-fantasy-football-dynasty-superflex-rankings

[Return to Top](#return)

* Which sites do I want to hit?
    * https://www.espn.com/fantasy/football/story/_/id/35425170/fantasy-football-ppr-rankings-2023-quarterback-running-back-wide-receiver-tight-end
    * https://www.espn.com/fantasy/football/story/_/id/35967408/fantasy-football-mock-draft-10-team-ppr
    * https://www.espn.com/fantasy/football/story/_/id/35823568/fantasy-football-rankings-2023-nfl-rookies
    * https://www.espn.com/fantasy/football/story/_/id/35420010/fantasy-football-superflex-rankings-2023-quarterback-running-back-wide-receiver-tight-end
    * https://www.reddit.com/r/fantasyfootball/comments/zx8cgl/2023_redraft_rankings/
    * https://www.pff.com/news/fantasy-football-post-2023-nfl-draft-fantasy-football-dynasty-superflex-rankings

## Import Packages

In [2]:
# import needed packages
import numpy as np
import pandas as pd
import json
import os
import re, requests, bs4, csv, datetime
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

## Set User-Defined Fields

In [3]:
# set ESPN Superflex URL
espn_superflex_url = 'https://www.espn.com/fantasy/football/story/_/id/35420010/fantasy-football-superflex-rankings-2023-quarterback-running-back-wide-receiver-tight-end'

# set ESPN Mock Draft URL
espn_mock_draft_url = 'https://www.espn.com/fantasy/football/story/_/id/35967408/fantasy-football-mock-draft-10-team-ppr'

# set ESPN Mock Draft URL
espn_ppr_rankings_url = 'https://www.espn.com/fantasy/football/story/_/id/35425170/fantasy-football-ppr-rankings-2023-quarterback-running-back-wide-receiver-tight-end'

# set PFF Dynasty Superflex Rankings
pff_dynasty_superflex_rankings_url = 'https://www.pff.com/news/fantasy-football-post-2023-nfl-draft-fantasy-football-dynasty-superflex-rankings'

In [4]:
# create function to define how to print pandas dataframe
def print_full(pd_df: object):
    
    # set pandas print options
    with pd.option_context('display.max_rows', None
                          ,'display.max_columns', 500
                          ,'display.precision', 3
                          ,'display.colheader_justify', 'center'
                          ):
        
        # print pandas dataframe
        display(pd_df)

# ESPN Superflex Rankings (Top 100 Overall) (Eric Karabell) <a id="superflex"></a>

[Return to Top](#return)

In [15]:
# create function to scrape ESPN superflex rankings data
def scrape_espn_superflex_url(url: str):
    
    # create a GET request
    r = requests.get(url)
    
    # parse the HTML code
    soup = bs4.BeautifulSoup(r.content, 'html.parser')

    # find all b tags that contain the superflex rankings data
    content = soup.find_all(['b'])
    
    # create empty list to hold row data for dataframe
    row_data = []
    
    # loop through each b tag, clean up strings, and parse into player name, team nickname, position, position ranking, and overall ranking
    for b in contents:
        
        # check if contents contain 2 elements
        try:
            
            # create single string with b tag contents
            data = str(b.contents[0].string) + str(b.contents[1].string)
            
            # string split to create player name
            player_name = data.split(', ')[0].strip()
            
            # string split to create team nickname
            team_nickname = data.split(', ')[1].split('(')[0].strip()
            
            # string split to create position name
            position = data.split(', ')[1].split('(')[1].split(')')[0].strip()
            
            # string split to create position ranking
            position_rank = re.split(r'(^[^\d]+)', position)[1:][1]
            
            # string split to clean up position name
            position = re.split(r'(^[^\d]+)', position)[1:][0]

            # append to row data
            row_data.append([player_name, team_nickname, position, position_rank])
            
        # else contents have 1 element
        except:
            
            # create single string with b tag contents
            data = str(b.contents[0].string)
            
            # string split to create player name
            player_name = data.split(', ')[0].strip()
            
            # string split to create team nickname
            team_nickname = data.split(', ')[1].split('(')[0].strip()
            
            # string split to create position name
            position = data.split(', ')[1].split('(')[1].split(')')[0].strip()
            
            # string split to create position ranking
            position_rank = re.split(r'(^[^\d]+)', position)[1:][1]
            
            # string split to clean up position name
            position = re.split(r'(^[^\d]+)', position)[1:][0]
            
            # append to row data
            row_data.append([player_name, team_nickname, position, position_rank])
            
    # create dataframe using row_data
    df = pd.DataFrame(row_data, columns = ['player_name', 'team_nickname', 'position', 'position_rank']).reset_index()

    # rename index to overal_rank
    df.rename(columns={'index': 'overall_rank'}, inplace=True)

    # add 1 to overall ranking since index starts at 0; not 1
    df['overall_rank'] = df['overall_rank'] + 1

    # reorder columns
    df = df[['player_name', 'team_nickname', 'position', 'position_rank', 'overall_rank']]
    
    return df

In [16]:
# run scrape_espn_superflex_url
espn_superflex_df = scrape_espn_superflex_url(espn_superflex_url)

# check dataframe info and first 2 rows
print_full(espn_superflex_df.info())
print_full(espn_superflex_df.head(2))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   player_name    100 non-null    object
 1   team_nickname  100 non-null    object
 2   position       100 non-null    object
 3   position_rank  100 non-null    object
 4   overall_rank   100 non-null    int64 
dtypes: int64(1), object(4)
memory usage: 4.0+ KB


None

Unnamed: 0,player_name,team_nickname,position,position_rank,overall_rank
0,Patrick Mahomes,Chiefs,QB,1,1
1,Josh Allen,Bills,QB,2,2


# ESPN Expert Mock Draft; 10-Team, PPR <a id="espnmockdraft"></a>

* Tristan H. Cockcroft, Liz Loza, Eric Karabell, Mike Clay, Eric Moody, Kyle Soppe, Stephania Bell, Daniel Dopp, Tyler Fulghum and Matt Bowen

[Return to Top](#return)

In [53]:
# create function to scrape ESPN Expert Mock Draft
def scrape_espn_mock_draft_url(url: str):
    
    # create a GET request
    r = requests.get(url)

    # parse the HTML code
    soup = bs4.BeautifulSoup(r.content, 'html.parser')
    
    # create empty list to store data for dataframe
    data = []
    
    # find and loop through all p tags
    for p in soup.find_all('p'):
        
        # check for 'pick' and 'QB' in the HTML text to find data I want to parse
        if ('pick' in p.text.lower()) and (p.text.startswith('QB')):

            # string split on ')'
            split_str = p.text.split(')')[:-1]
            
            # remove new line character 
            split_str = [x.replace('\n', '') for x in split_str]
            
            # string split on first number to create position
            position = [re.split(r'(^[^\d]+)', x)[1] for x in split_str]

            # string split on first number and on ',' to create player name
            player_name = [re.split(r'(^[^\d]+)', x)[2].split(',')[0][2:] for x in split_str]

            # string split on first number, on ',' and on '(' to create team short name
            team_short_name = [re.split(r'(^[^\d]+)', x)[2].split(',')[1].split('(')[0].strip() for x in split_str]

            # string split on first number, on ':' and on '.' to create draft round
            draft_rnd = [int(re.split(r'(^[^\d]+)', x)[2].split(':')[1].strip().split('.')[0]) for x in split_str]

            # string split on first number, on ':' and on '.' to create draft pick
            draft_pick = [int(re.split(r'(^[^\d]+)', x)[2].split(':')[1].strip().split('.')[1]) for x in split_str]

            # zip above data and create list of lists
            zipped_data = list(zip(player_name, position, team_short_name, draft_rnd, draft_pick))

            # append each row of data to empty list created above
            for row in zipped_data:
                data.append(row)

    # create dataframe using data
    df = pd.DataFrame(data, columns = ['player_name', 'position', 'team_short_name', 'draft_rnd', 'draft_pick']).sort_values(['draft_rnd', 'draft_pick']).reset_index(drop=True)

    return df

In [54]:
# run scrape_espn_mock_draft_url
espn_mock_draft_df = scrape_espn_mock_draft_url(espn_mock_draft_url)

# check dataframe info and first 2 rows
print_full(espn_mock_draft_df.info())
print_full(espn_mock_draft_df.head(2))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   player_name      160 non-null    object
 1   position         160 non-null    object
 2   team_short_name  160 non-null    object
 3   draft_rnd        160 non-null    int64 
 4   draft_pick       160 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 6.4+ KB


None

Unnamed: 0,player_name,position,team_short_name,draft_rnd,draft_pick
0,Christian McCaffrey,RB,SF,1,1
1,Justin Jefferson,WR,Min,1,2


# ESPN PPR Rankings by Position <a id="espnpprrankings"></a>

[Return to Top](#return)

In [31]:
# create function to scrape ESPN PPR Rankings by Position
def scrape_espn_ppr_rankings_url(url: str):
    
    # create a GET request
    r = requests.get(url)

    # parse the HTML code
    soup = bs4.BeautifulSoup(r.content, 'html.parser')
    
    # create empty list to store data for dataframe
    data = []
    
    # create list of positions
    pos_list = ['QB', 'RB', 'WR', 'TE']
    
    # set initial index to 0
    index = 0

    # find and loop through all p tags
    for p in soup.find_all('p'):
        
        # check if the HTML text starts with '1'
        if p.text.startswith('1'):

            # split string on first occuring number to create position rank
            pos_rank = [int(x) for x in re.split(r'(\d+)', p.text) if x.isnumeric()]
            
            # split string on first occuring number and ',' to create player name
            player_name = [x.split(',')[0][2:] for x in re.split(r'(\d+)', p.text) if x.startswith('.')]
            
            # split string on first occuring number and ',' to create city name.  also, replaced newline character
            city_name = [x.split(',')[1].replace('\n', '')[1:] for x in re.split(r'(\d+)', p.text) if x.startswith('.')]

            # zip above data and create list of lists
            zipped_data = list(zip(player_name, [pos_list[index]] * len(player_name), city_name, pos_rank))

            # append each row of data to empty list created above
            for row in zipped_data:
                data.append(row)
                
            # increase index by 1
            index += 1
            
    # create dataframe using data
    df = pd.DataFrame(data, columns = ['player_name', 'position', 'city_name', 'position_rank']).sort_values(['position', 'position_rank']).reset_index(drop=True)

    return df

In [32]:
# run scrape_espn_ppr_rankings_url
espn_ppr_rankings_df = scrape_espn_ppr_rankings_url(espn_ppr_rankings_url)

# check dataframe info and first 2 rows
print_full(espn_ppr_rankings_df.info())
print_full(espn_ppr_rankings_df.head(2))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   player_name  207 non-null    object
 1   position     207 non-null    object
 2   city_name    207 non-null    object
 3   pos_rank     207 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 6.6+ KB


None

Unnamed: 0,player_name,position,city_name,pos_rank
0,Patrick Mahomes,QB,KC,1
1,Josh Allen,QB,BUF,2


# PFF Dynasty Superflex Rankings <a id="pffsuperflexrankings"></a>

[Return to Top](#return)

In [31]:
# create definition to scrape PFF Dynasty Superflex Rankings
def scrape_pff_dynasty_superflex_rankings_url(url: str):
    
    # create a GET request
    r = requests.get(url)

    # parse the HTML code
    soup = bs4.BeautifulSoup(r.content, 'html.parser')
    
    # create empty list to store data for dataframe
    data = []
    
    # find the tbody tag and all its tr tags
    for tr in soup.find('tbody').find_all('tr'):
        
        # grab all td tags
        tds = tr.find_all('td')
        
        # string split on first number to create position rank
        pos_rank = re.split(r'(\d+)', tds[4].text.strip())
        
        # due to first row being a header row, grab element at first or zero index
        try:

            pos_rank = pos_rank[1]
        except:

            pos_rank = pos_rank[0]
        
        # add text to empty list
        data.append([tds[0].text.strip(), tds[1].text.strip(), tds[2].text.strip(), tds[3].text.strip(), pos_rank])

    # create dataframe using data
    df = pd.DataFrame(data)
    
    # assign first row and dataframe header
    df.columns = ['overall_rank', 'position', 'player_name', 'team_nickname', 'position_rank']
    
    # remove first row
    df = df[1:]
    
    return df

In [32]:
# run scrape_espn_ppr_rankings_url
pff_dynasty_superflex_rankings_df = scrape_pff_dynasty_superflex_rankings_url(pff_dynasty_superflex_rankings_url)

# check dataframe info and first 2 rows
print_full(pff_dynasty_superflex_rankings_df.info())
print_full(pff_dynasty_superflex_rankings_df.head(2))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 1 to 300
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   overall_rank   300 non-null    object
 1   position       300 non-null    object
 2   player_name    300 non-null    object
 3   team_nickname  300 non-null    object
 4   pos_rank       300 non-null    object
dtypes: object(5)
memory usage: 11.8+ KB


None

Unnamed: 0,overall_rank,position,player_name,team_nickname,pos_rank
1,1,QB,Jalen Hurts,Eagles,1
2,2,QB,Josh Allen,Bills,2


In [38]:
data = []
counter = 0
for tr in soup.find('tbody').find_all('tr'):
    tds = tr.find_all('td')
    print(tds)

[<td class="first-column">{{ source.name }}</td>, <td>{{ source.projection }}</td>, <td>{{ source.diff }}</td>, <td><a :href="propData.prop.url + '?utm_source=fantasypros.com&amp;utm_medium=referral&amp;utm_campaign=beticon'" target="_blank">{{ source.pick }}</a></td>, <td>{{ source.cover}}</td>]


In [26]:
# Making a GET request
# r = requests.get('https://www.reddit.com/r/fantasyfootball/comments/zx8cgl/2023_redraft_rankings/')
# r = requests.get('https://www.espn.com/fantasy/football/story/_/id/35425170/fantasy-football-ppr-rankings-2023-quarterback-running-back-wide-receiver-tight-end')
# r = requests.get('https://www.espn.com/fantasy/football/story/_/id/35967408/fantasy-football-mock-draft-10-team-ppr')
# r = requests.get('https://www.espn.com/fantasy/football/story/_/id/35420010/fantasy-football-superflex-rankings-2023-quarterback-running-back-wide-receiver-tight-end')
# r = requests.get('https://www.pff.com/news/fantasy-football-post-2023-nfl-draft-fantasy-football-dynasty-superflex-rankings')
r = requests.get('https://www.fantasypros.com/nfl/rankings/ppr-cheatsheets.php')

# https://thehuddle.com/lists/2023-fantasy-football-ppr-rankings/

 
# Parsing the HTML
soup = bs4.BeautifulSoup(r.content, 'html.parser')
# print(soup.prettify())
# print(soup)

# s = soup.find('p')
# content = soup.find_all('div', class_ = 'article-body')
# content = soup.find('tbody').find_all('tr')
# content = soup.find('table', id="ranking-table")
# content = soup.find('tbody')
content = soup.find_all('div', class_ = 'rankings-app')
# content = soup.find_all('thead')
table = soup.find_all('div', class_ = 'container')
print(table)
# print(soup.prettify())
print(content)

[<div class="container" id="main-container" role="main">
<div class="main-content-wrap main-content-wrap--rankings-page">
<div class="main-content main-content--rankings-page" id="rankings-app">
<div class="banners-wrap">
<div class="live-draft-banner hide-print" v-cloak="" v-if="liveDraftEnabled">
<span>Draft Mode is ON</span>
</div>
<div class="sleeper-promo live-draft-banner hide-print" v-else="">
<a class="sleeper-promo--link" href="https://go.slpr.link/FantasyProsWeb" onclick="ga('send', 'event', 'NFL ECR', 'Sleeper Click');" target="_blank">
<img alt="Draft with Sleeper" class="sleeper-image mobile-image" src="https://images.fantasypros.com/images/promos/sleeper-wsid-promo-780x70.png"/>
<img alt="Draft with Sleeper" class="sleeper-image desktop-image" src="https://images.fantasypros.com/images/promos/sleeper-wsid-promo-1512x70.png"/></a>
</div>
</div>
<div class="options-overlay" v-cloak="" v-show="breakpoints.show_overlay &amp;&amp; isOverlayEnabled">
<div class="options-overlay

In [12]:
# Making a GET request
r = requests.get('https://www.fantasypros.com/nfl/rankings/ppr-cheatsheets.php')

# check status code for response received
# success code - 200
print(r)
 
# Parsing the HTML
soup = bs4.BeautifulSoup(r.content, 'html.parser')
# print(soup.prettify())

# print([tag.name for tag in soup.find_all()])

content = soup.find_all('table', id='ranking_table')
print(content)

<Response [200]>
[]


In [16]:
import requests
from bs4 import BeautifulSoup
from bs4 import Comment
import pandas as pd


url = 'https://www.fantasypros.com/nfl/rankings/ppr-cheatsheets.php'
response = requests.get(url)

soup = BeautifulSoup(response.text, 'html.parser')

comments = soup.find_all(string=lambda text: isinstance(text, Comment))

tables = []
for each in comments:
    if 'table' in each:
        try:
            tables.append(pd.read_html(each)[0])
        except:
            continue