In [2]:
#import dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pymongo
# SQL Alchemy
from sqlalchemy import create_engine, event
import time
from datetime import datetime

# PyMySQL 
import pymysql
pymysql.install_as_MySQLdb()

# Config variables
from config import local_db_user, local_db_pwd, local_db_endpoint, local_db_port, local_db_name
from config import remote_db_user, remote_db_pwd, remote_db_endpoint, remote_db_port, remote_db_name

https://towardsdatascience.com/scraping-nfl-stats-to-compare-quarterback-efficiencies-4989642e02fe
https://stmorse.github.io/journal/pfr-scrape-python.html
https://www.pro-football-reference.com/players/H/HenrDe00/fantasy/2020/
https://ffpredictor.thefantasytakeaway.com/

In [52]:
combined_all_df = pd.DataFrame()

for year in range(2002, 2021):
    
    print(year)
    print('---')

    url = 'https://www.pro-football-reference.com'
    maxp = 300
    
    # grab fantasy players
    r = requests.get(url + '/years/' + str(year) + '/fantasy.htm')
    soup = BeautifulSoup(r.content, 'html.parser')
    parsed_table = soup.find_all('table')[0]  

    df = []

    # first 2 rows are col headers
    for i,row in enumerate(parsed_table.find_all('tr')[2:]):
        if i % 10 == 0: print(i, end=' ')
        if i >= maxp: 
            print('\nComplete.')
            break

        try:

            name_td = row.find('td', attrs={'data-stat': 'player'})
            name = name_td.a.get_text()
            stub = name_td.a.get('href')
            stub = stub[:-4] + '/fantasy/' + str(year)
            pos = row.find('td', attrs={'data-stat': 'fantasy_pos'}).get_text()
            total_Fpts = row.find('td', attrs={'data-stat': 'fantasy_points_ppr'}).get_text()
            vbd = row.find('td', attrs={'data-stat': 'vbd'}).get_text()
            pos_rank = row.find('td', attrs={'data-stat': 'fantasy_rank_pos'}).get_text()
            ov_rank = row.find('td', attrs={'data-stat': 'fantasy_rank_overall'}).get_text()

            # grab this players stats
            tdf = pd.read_html(url + stub)[0]  

            # get rid of MultiIndex, just keep last row
            tdf.columns = tdf.columns.get_level_values(-1)

            # fix the away/home column
            tdf = tdf.rename(columns={'Unnamed: 4_level_2': 'Away'})
            tdf['Away'] = [1 if r=='@' else 0 for r in tdf['Away']]

            # drop all intermediate stats
            tdf = tdf.iloc[:,[1,2,3,4,5,-9,-8,-3]]

            # drop "Total" row
            tdf = tdf.query('Date != "Total"')

            # add other info
            tdf['Name'] = name
            tdf['Position'] = pos
            tdf['Season'] = year
            tdf['total_Fpts'] = total_Fpts
            tdf['vbd'] = vbd
            tdf['pos_rank'] = pos_rank
            tdf['ov_rank'] = ov_rank

            df.append(tdf)
        except:
            pass

    df = pd.concat(df)
    combined_all_df = combined_all_df.append(df)

combined_all_df

2002
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2003
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2004
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2005
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2006
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2007
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2008
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 
Complete.
2009
---
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170

Unnamed: 0,G#,Date,Tm,Away,Opp,Yds,TD,FantPt,Name,Position,...,total_Fpts,vbd,pos_rank,ov_rank,Att,Rec,Result,Pos,Num,Pct
0,1.0,2002-09-08,KAN,1,CLE,14.0,2.0,38.1,Priest Holmes,RB,...,440.7,220,1,1,,,,,,
1,2.0,2002-09-15,KAN,0,JAX,0.0,0.0,11.6,Priest Holmes,RB,...,440.7,220,1,1,,,,,,
2,3.0,2002-09-22,KAN,1,NWE,7.0,2.0,35.8,Priest Holmes,RB,...,440.7,220,1,1,,,,,,
3,4.0,2002-09-29,KAN,0,MIA,3.0,0.0,17.5,Priest Holmes,RB,...,440.7,220,1,1,,,,,,
4,5.0,2002-10-06,KAN,1,NYJ,0.0,0.0,35.3,Priest Holmes,RB,...,440.7,220,1,1,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,12.0,2020-12-06,NYJ,0,LVR,,,7.4,Josh Adams,RB,...,36.6,,83,,,,,,19.0,30.6%
4,13.0,2020-12-13,NYJ,1,SEA,,,3.0,Josh Adams,RB,...,36.6,,83,,,,,,12.0,22.6%
5,14.0,2020-12-20,NYJ,1,LAR,,,,Josh Adams,RB,...,36.6,,83,,,,,,1.0,1.5%
6,15.0,2020-12-27,NYJ,0,CLE,,,,Josh Adams,RB,...,36.6,,83,,,,,,0.0,0.0%


In [55]:
# saving the dataframe 
combined_all_df.to_csv('weekly_stats_all.csv') 

# Pro Reference top 300 per year scrape

In [7]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324


 


[WDM] - Trying to download new driver from http://chromedriver.storage.googleapis.com/88.0.4324.96/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\Steven\.wdm\drivers\chromedriver\win32\88.0.4324.96]


In [25]:
'https://www.pro-football-reference.com/years/{year}/fantasy.htm'

# URL of page to be scraped

combined_df = pd.DataFrame()

for year in range(2002, 2021):
    
    print(year)
    
    url = f'https://www.pro-football-reference.com/years/{year}/fantasy.htm'
    #print(url)

    # Retrieve page with the browser module
    browser.visit(url)

    # setup html parser
    html = browser.html
    fantasy_soup = BeautifulSoup(html, 'html.parser')

    # save the stats table as a df
    stats_table = fantasy_soup.find("table", {"class":"per_match_toggle"})
    stats_df = pd.read_html(f'<table>${stats_table.tbody}</table>')[0]

    #stats_df[['FF Pts','Plays','PAATT','PACMP','PAYDS','PATDS','RUATT','RUYDS','RUTDS','INT','FUM']] = stats_df[['FF Pts','Plays','PAATT','PACMP','PAYDS','PATDS','RUATT','RUYDS','RUTDS','INT','FUM']].apply(pd.to_numeric)
    #stats_df.replace('-', 0)

    stats_df['Season'] = year
    combined_df = combined_df.append(stats_df) 
        
combined_df



2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24,25,26,27,28,29,30,31,32,Season
0,1,Priest Holmes*+,KAN,RB,29,14,14,0,1,0,...,,,371,440.7,447.7,405.7,220,1,1,2002
1,2,Ricky Williams*+,MIA,RB,25,16,16,0,0,0,...,,,316,362.6,372.6,339.1,165,2,2,2002
2,3,LaDainian Tomlinson*,SDG,RB,23,16,16,0,0,0,...,,,305,384.2,391.2,344.7,155,3,3,2002
3,4,Clinton Portis,DEN,RB,21,16,12,0,0,0,...,,,283,316.2,325.2,299.7,133,4,4,2002
4,5,Marvin Harrison*+,IND,WR,30,16,16,0,0,0,...,1,,241,384.2,387.2,312.7,122,1,5,2002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
641,622,Andre Roberts*,BUF,WR,32,15,0,0,0,0,...,,,-1,3.1,5.1,1.1,,243,,2020
642,623,Nate Sudfeld,PHI,QB,27,1,0,5,12,32,...,,,-2,-1.5,0.5,-0.5,,83,,2020
643,624,Nsimba Webster,LAR,,24,16,0,0,0,0,...,,,-2,-2.0,-1.0,-2.0,,246,,2020
644,625,Dede Westbrook,JAX,WR,27,2,0,0,0,0,...,,,-2,-0.6,0.4,-1.1,,245,,2020


In [26]:
combined_df.columns = ['Rk', 'Player', 'Team', 'Pos', 'Age', 'Games Played', 'Games Started', 'Pass Comp', 'Pass Att', 'Pass Yds', 'Pass TD', 'Pass Int', 'Rush Att', 'Rush Yds', 'Rush Y/A', 'Rush TD', 'Rec Tgt', 'Receptions', 'Rec Yds', 'Rec Y/R', 'Rec TD', 'Fumbles', 'Fumbles Lost', 'Total TD', '2pt Convs Made', '2pt Conv Passes', 'FPts', 'PPR', 'DK Fpts', 'FD Fpts', 'VBD', 'Pos Rank', 'Overall Rank', 'Season']
combined_df['Player'] = combined_df['Player'].str.replace('*', '')
combined_df['Player'] = combined_df['Player'].str.replace('+', '')
combined_df['Player'] = combined_df['Player'].str.strip()
combined_df

Unnamed: 0,Rk,Player,Team,Pos,Age,Games Played,Games Started,Pass Comp,Pass Att,Pass Yds,...,2pt Convs Made,2pt Conv Passes,FPts,PPR,DK Fpts,FD Fpts,VBD,Pos Rank,Overall Rank,Season
0,1,Priest Holmes,KAN,RB,29,14,14,0,1,0,...,,,371,440.7,447.7,405.7,220,1,1,2002
1,2,Ricky Williams,MIA,RB,25,16,16,0,0,0,...,,,316,362.6,372.6,339.1,165,2,2,2002
2,3,LaDainian Tomlinson,SDG,RB,23,16,16,0,0,0,...,,,305,384.2,391.2,344.7,155,3,3,2002
3,4,Clinton Portis,DEN,RB,21,16,12,0,0,0,...,,,283,316.2,325.2,299.7,133,4,4,2002
4,5,Marvin Harrison,IND,WR,30,16,16,0,0,0,...,1,,241,384.2,387.2,312.7,122,1,5,2002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
641,622,Andre Roberts,BUF,WR,32,15,0,0,0,0,...,,,-1,3.1,5.1,1.1,,243,,2020
642,623,Nate Sudfeld,PHI,QB,27,1,0,5,12,32,...,,,-2,-1.5,0.5,-0.5,,83,,2020
643,624,Nsimba Webster,LAR,,24,16,0,0,0,0,...,,,-2,-2.0,-1.0,-2.0,,246,,2020
644,625,Dede Westbrook,JAX,WR,27,2,0,0,0,0,...,,,-2,-0.6,0.4,-1.1,,245,,2020


In [28]:
combined_df = combined_df.fillna(0)
combined_df

Unnamed: 0,Rk,Player,Team,Pos,Age,Games Played,Games Started,Pass Comp,Pass Att,Pass Yds,...,2pt Convs Made,2pt Conv Passes,FPts,PPR,DK Fpts,FD Fpts,VBD,Pos Rank,Overall Rank,Season
0,1,Priest Holmes,KAN,RB,29,14,14,0,1,0,...,0,0,371,440.7,447.7,405.7,220,1,1,2002
1,2,Ricky Williams,MIA,RB,25,16,16,0,0,0,...,0,0,316,362.6,372.6,339.1,165,2,2,2002
2,3,LaDainian Tomlinson,SDG,RB,23,16,16,0,0,0,...,0,0,305,384.2,391.2,344.7,155,3,3,2002
3,4,Clinton Portis,DEN,RB,21,16,12,0,0,0,...,0,0,283,316.2,325.2,299.7,133,4,4,2002
4,5,Marvin Harrison,IND,WR,30,16,16,0,0,0,...,1,0,241,384.2,387.2,312.7,122,1,5,2002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
641,622,Andre Roberts,BUF,WR,32,15,0,0,0,0,...,0,0,-1,3.1,5.1,1.1,0,243,0,2020
642,623,Nate Sudfeld,PHI,QB,27,1,0,5,12,32,...,0,0,-2,-1.5,0.5,-0.5,0,83,0,2020
643,624,Nsimba Webster,LAR,0,24,16,0,0,0,0,...,0,0,-2,-2.0,-1.0,-2.0,0,246,0,2020
644,625,Dede Westbrook,JAX,WR,27,2,0,0,0,0,...,0,0,-2,-0.6,0.4,-1.1,0,245,0,2020


In [30]:
# saving the dataframe 
combined_df.to_csv('all_players_yearly_stats_2002-2020.csv') 

# The huddle QB Scrape

In [4]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [C:\Users\Steven\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache


 


In [5]:
# URL of page to be scraped

combined_QB_df = pd.DataFrame()

for year in range(2012, 2021):
    
    print(year)
    print('---')
    
    for week in range(1, 18):

        #print(year)
        #print(week)
        #print('---')
        pos = 'QB'
        url = f'https://tools.thehuddle.com/weekly?week={week}&year={year}&position={pos}&formulaId=6'
        #print(url)

        # Retrieve page with the browser module
        browser.visit(url)

        # setup html parser
        html = browser.html
        fantasy_soup = BeautifulSoup(html, 'html.parser')

        # save the stats table as a df
        stats_table = fantasy_soup.find("table", {"class":"dataTable"})
        stats_df = pd.read_html(f'<table>${stats_table.tbody}</table>')[0]
        stats_df.dropna(axis = 0, how = 'all', inplace = True)
        stats_df.reset_index(inplace = True)

        #stats_df[['FF Pts','Plays','PAATT','PACMP','PAYDS','PATDS','RUATT','RUYDS','RUTDS','INT','FUM']] = stats_df[['FF Pts','Plays','PAATT','PACMP','PAYDS','PATDS','RUATT','RUYDS','RUTDS','INT','FUM']].apply(pd.to_numeric)
        #stats_df.replace('-', 0)

        stats_df['Player'] = stats_df['Player'].str[3:]
        stats_df['Week'] = week
        stats_df['Season'] = year


        combined_QB_df = combined_QB_df.append(stats_df) 
        
combined_QB_df

2012
---
2013
---
2014
---
2015
---
2016
---
2017
---
2018
---
2019
---
2020
---


Unnamed: 0,index,Player,Team,FF Pts,Plays,PAATT,PACMP,PAYDS,PATDS,RUATT,RUYDS,RUTDS,INT,FUM,Week,Season
0,0,Aaron Rodgers,GB,53.2,83,73,51,554,7,10,70,–,2,–,1,2012
1,3,Jay Cutler,CHI,38.9,68,63,41,567,4,5,22,–,1,1,1,2012
2,6,Eli Manning,NYG,38.6,81,79,49,568,5,2,-1,–,2,1,1,2012
3,9,Michael Vick,PHI,35.1,71,60,39,416,3,11,65,–,–,3,1,2012
4,12,Andy Dalton,CIN,33.7,84,74,46,478,3,10,26,1,3,–,1,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38,114,Nate Sudfeld,PHI,0.5,14,12,5,32,–,2,12,–,1,1,17,2020
39,117,Jacoby Brissett,IND,0.3,2,–,–,–,–,2,3,–,–,–,17,2020
40,120,Jameis Winston,NO,-0.1,1,–,–,–,–,1,-1,–,–,–,17,2020
41,123,Brandon Allen,CIN,-1.9,22,21,6,48,–,1,2,–,2,–,17,2020


In [14]:
# saving the dataframe 
combined_QB_df.to_csv('weekly_QB_stats.csv') 


# The huddle RB Scrape

In [7]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [C:\Users\Steven\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache


 


In [10]:
# URL of page to be scraped

combined_RB_df = pd.DataFrame()

for year in range(2012, 2021):
    
    print(year)
    print('---')
    
    for week in range(1, 18):

        #print(year)
        #print(week)
        #print('---')
        pos = 'RB'
        url = f'https://tools.thehuddle.com/weekly?week={week}&year={year}&position={pos}&formulaId=6'
        #print(url)

        # Retrieve page with the browser module
        browser.visit(url)

        # setup html parser
        html = browser.html
        fantasy_soup = BeautifulSoup(html, 'html.parser')

        # save the stats table as a df
        stats_table = fantasy_soup.find("table", {"class":"dataTable"})
        stats_df = pd.read_html(f'<table>${stats_table.tbody}</table>')[0]
        stats_df.dropna(axis = 0, how = 'all', inplace = True)
        stats_df.reset_index(inplace = True)

        stats_df['Player'] = stats_df['Player'].str[3:]
        stats_df['Week'] = week
        stats_df['Season'] = year


        combined_RB_df = combined_RB_df.append(stats_df) 
        
combined_RB_df

2012
---
2013
---
2014
---
2015
---
2016
---
2017
---
2018
---
2019
---
2020
---


Unnamed: 0,index,Player,Team,FF Pts,Plays,RUATT,RUYDS,RUTDS,TRGT,REC,RCYDS,RCTDS,INT,FUM,Week,Season
0,0,Ahmad Bradshaw,NYG,36.6,53,43,239,1,10,7,67,–,–,1,1,2012
1,3,Alfred Morris,WAS,30.4,42,39,228,1,3,2,16,–,–,–,1,2012
2,6,LeSean McCoy,PHI,28,48,39,176,–,9,7,44,1,–,1,1,2012
3,9,Willis McGahee,DEN,19.5,25,19,112,1,6,6,23,–,–,–,1,2012
4,12,T. Richardson,CLE,18.8,24,17,81,1,7,5,47,–,–,–,1,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,330,. S. Phillips,HOU,0,–,–,–,–,–,–,–,–,–,–,17,2020
111,333,. LeVante Bellamy,DEN,0,1,1,–,–,–,–,–,–,–,–,17,2020
112,336,. Xavier Jones,LAR,0,–,–,–,–,–,–,–,–,–,–,17,2020
113,339,. J.J. Taylor,NE,0,–,–,–,–,–,–,–,–,–,–,17,2020


In [13]:
# saving the dataframe 
combined_RB_df.to_csv('weekly_RB_stats.csv') 

# The huddle WR Scrape

In [15]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [C:\Users\Steven\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache


 


In [17]:
# URL of page to be scraped

combined_WR_df = pd.DataFrame()

for year in range(2012, 2021):
    
    print(year)
    print('---')
    
    for week in range(1, 18):

        #print(year)
        #print(week)
        #print('---')
        pos = 'WR'
        url = f'https://tools.thehuddle.com/weekly?week={week}&year={year}&position={pos}&formulaId=6'
        #print(url)

        # Retrieve page with the browser module
        browser.visit(url)

        # setup html parser
        html = browser.html
        fantasy_soup = BeautifulSoup(html, 'html.parser')

        # save the stats table as a df
        stats_table = fantasy_soup.find("table", {"class":"dataTable"})
        stats_df = pd.read_html(f'<table>${stats_table.tbody}</table>')[0]
        stats_df.dropna(axis = 0, how = 'all', inplace = True)
        stats_df.reset_index(inplace = True)

        stats_df['Player'] = stats_df['Player'].str[3:]
        stats_df['Week'] = week
        stats_df['Season'] = year


        combined_WR_df = combined_WR_df.append(stats_df) 
        
combined_WR_df

2012
---
2013
---
2014
---
2015
---
2016
---
2017
---
2018
---
2019
---
2020
---


Unnamed: 0,index,Player,Team,FF Pts,Plays,TRGT,REC,RCYDS,RCTDS,RUATT,RUYDS,RUTDS,INT,FUM,Week,Season
0,0,B. Marshall,CHI,40.2,25,25,19,282,2,–,–,–,–,–,1,2012
1,3,Victor Cruz,NYG,39.9,21,21,14,159,4,–,–,–,–,–,1,2012
2,6,James Jones,GB,34.2,13,13,9,102,4,–,–,–,–,–,1,2012
3,9,A.J. Green,CIN,30.2,22,22,15,182,2,–,–,–,–,1,1,2012
4,12,Reggie Wayne,IND,27.2,20,20,13,212,1,–,–,–,–,–,1,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,477,. Chris Rowland,ATL,0,–,–,–,–,–,–,–,–,–,–,17,2020
160,480,. Dan Chisena,MIN,0,–,–,–,–,–,–,–,–,–,–,17,2020
161,483,. Matt Cole,SF,0,–,–,–,–,–,–,–,–,–,–,17,2020
162,486,. Isaiah Wright,WAS,0,–,–,–,–,–,–,–,–,–,–,17,2020


In [18]:
# saving the dataframe 
combined_WR_df.to_csv('weekly_WR_stats.csv') 

# The huddle TE Scrape

In [None]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [20]:
# URL of page to be scraped

combined_TE_df = pd.DataFrame()

for year in range(2012, 2021):
    
    print(year)
    print('---')
    
    for week in range(1, 18):

        #print(year)
        #print(week)
        #print('---')
        pos = 'TE'
        url = f'https://tools.thehuddle.com/weekly?week={week}&year={year}&position={pos}&formulaId=6'
        #print(url)

        # Retrieve page with the browser module
        browser.visit(url)

        # setup html parser
        html = browser.html
        fantasy_soup = BeautifulSoup(html, 'html.parser')

        # save the stats table as a df
        stats_table = fantasy_soup.find("table", {"class":"dataTable"})
        stats_df = pd.read_html(f'<table>${stats_table.tbody}</table>')[0]
        stats_df.dropna(axis = 0, how = 'all', inplace = True)
        stats_df.reset_index(inplace = True)

        stats_df['Player'] = stats_df['Player'].str[3:]
        stats_df['Week'] = week
        stats_df['Season'] = year


        combined_TE_df = combined_TE_df.append(stats_df) 
        
combined_TE_df

2012
---
2013
---
2014
---
2015
---
2016
---
2017
---
2018
---
2019
---
2020
---


Unnamed: 0,index,Player,Team,FF Pts,Plays,TRGT,REC,RCYDS,RCTDS,RUATT,RUYDS,RUTDS,INT,FUM,Week,Season
0,0,Tony Gonzalez,ATL,18.3,14,14,13,123,1,–,–,–,–,–,1,2012
1,3,Jason Witten,DAL,17.2,14,14,13,112,1,–,–,–,–,–,1,2012
2,6,Brent Celek,PHI,12.6,10,10,7,66,1,–,–,–,–,–,1,2012
3,9,Fred Davis,WAS,12.4,10,10,9,124,–,–,–,–,–,–,1,2012
4,12,Marcedes Lewis,JAC,11.6,10,10,8,56,1,–,–,–,–,–,1,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,288,Colby Parkinson,SEA,0,–,–,–,–,–,–,–,–,–,–,17,2020
97,291,Tyler Davis,JAC,0,–,–,–,–,–,–,–,–,–,–,17,2020
98,294,Hunter Bryant,DET,0,1,1,–,–,–,–,–,–,–,–,17,2020
99,297,. Sean McKeon,DAL,0,–,–,–,–,–,–,–,–,–,–,17,2020


In [21]:
# saving the dataframe 
combined_TE_df.to_csv('weekly_TE_stats.csv') 

# The huddle DEF Scrape

In [25]:
# URL of page to be scraped

combined_DEF_df = pd.DataFrame()

for year in range(2012, 2021):
    
    print(year)
    print('---')
    
    for week in range(1, 18):

        #print(year)
        #print(week)
        #print('---')
        pos = 'DF'
        url = f'https://tools.thehuddle.com/weekly?week={week}&year={year}&position={pos}&formulaId=6'
        #print(url)

        # Retrieve page with the browser module
        browser.visit(url)

        # setup html parser
        html = browser.html
        fantasy_soup = BeautifulSoup(html, 'html.parser')

        # save the stats table as a df
        stats_table = fantasy_soup.find("table", {"class":"dataTable"})
        stats_df = pd.read_html(f'<table>${stats_table.tbody}</table>')[0]
        stats_df.dropna(axis = 0, how = 'all', inplace = True)
        stats_df.reset_index(inplace = True)

        stats_df['Player'] = stats_df['Player'].str[3:]
        stats_df['Week'] = week
        stats_df['Season'] = year


        combined_DEF_df = combined_DEF_df.append(stats_df) 
        
combined_DEF_df

2012
---
2013
---
2014
---
2015
---
2016
---
2017
---
2018
---
2019
---
2020
---


Unnamed: 0,index,Player,Team,FF Pts,SCK,DEFFR,DEFINT,DEFTD,DEFSAF,Week,Season
0,0,Broncos,DEN,13,5,–,1,1,–,1,2012
1,3,Steelers,PIT,4,2,1,–,–,–,1,2012
0,0,Steelers,PIT,4,2,1,–,–,–,2,2012
1,3,Jets,NYJ,3,3,–,–,–,–,2,2012
0,0,Raiders,LVR,5,1,2,–,–,–,3,2012
...,...,...,...,...,...,...,...,...,...,...,...
27,81,Chiefs,KC,3,3,–,–,–,–,17,2020
28,84,Raiders,LVR,2,2,–,–,–,–,17,2020
29,87,Panthers,CAR,2,2,–,–,–,–,17,2020
30,90,Bengals,CIN,2,–,–,1,–,–,17,2020


In [26]:
# saving the dataframe 
combined_DEF_df.to_csv('weekly_DEF_stats.csv') 