### 0) Description

This notebook contains code to pull player data from https://www.sports-reference.com/cfb/players player pages.
    - Example: https://www.sports-reference.com/cfb/players/gino-guidugli-1.html

### 1) Import packages

In [3]:
import requests                   # to make html requests
from bs4 import BeautifulSoup     # to pull data from html websites
import pandas as pd               # to create and concat dataframes + read html tables
import time                       # to set delays between requests
import numpy as np                # to choose random elements from list
from functools import reduce      # to merge multiple dataframes
import re                         # to perform regular expression operations

### 2) Set parameters

In [4]:
# set year to scrape
year = 2020

# open csv
df = pd.read_csv('nfl_combine_2000_2020.csv')

# filter by year
df=dt[df['Year']==year]

# drop duplicates
url_list=df['NCAA_Link'].dropna().unique().tolist()

#url_list = ['http://www.sports-reference.com/cfb/players/kaulana-noa-1.html','http://www.sports-reference.com/cfb/players/anthony-lucas-1.html','http://www.sports-reference.com/cfb/players/thomas-hamner-1.html','http://www.sports-reference.com/cfb/players/john-frank-2.html','http://www.sports-reference.com/cfb/players/sherrod-gideon-1.html','http://www.sports-reference.com/cfb/players/jason-gavadza-1.html','http://www.sports-reference.com/cfb/players/jeff-harris-2.html','http://www.sports-reference.com/cfb/players/ethan-howell-1.html','http://www.sports-reference.com/cfb/players/patrick-lee-1.html','http://www.sports-reference.com/cfb/players/dante-fowler-jr-1.html','http://www.sports-reference.com/cfb/players/rashard-robinson-1.html','http://www.sports-reference.com/cfb/players/christopher-herndon-iv-1.html','http://www.sports-reference.com/cfb/players/richie-james-1.html','http://www.sports-reference.com/cfb/players/ahmed-plummer-1.html','http://www.sports-reference.com/cfb/players/erik-flowers-1.html','http://www.sports-reference.com/cfb/players/darren-howard-1.html','http://www.sports-reference.com/cfb/players/mark-roman-1.html','http://www.sports-reference.com/cfb/players/mike-brown-18.html','http://www.sports-reference.com/cfb/players/raynoch-thompson-1.html','http://www.sports-reference.com/cfb/players/rogers-beckett-1.html','http://www.sports-reference.com/cfb/players/jason-webster-1.html','http://www.sports-reference.com/cfb/players/dwayne-goodrich-1.html','http://www.sports-reference.com/cfb/players/william-bartee-1.html','http://www.sports-reference.com/cfb/players/travares-tillman-1.html','http://www.sports-reference.com/cfb/players/jacoby-shepherd-1.html','http://www.sports-reference.com/cfb/players/darrell-jackson-1.html','http://www.sports-reference.com/cfb/players/jeff-ulbrich-1.html','http://www.sports-reference.com/cfb/players/david-macklin-1.html','http://www.sports-reference.com/cfb/players/lewis-sanders-1.html','http://www.sports-reference.com/cfb/players/david-barrett-2.html','http://www.sports-reference.com/cfb/players/leonardo-carson-1.html','http://www.sports-reference.com/cfb/players/marcus-bell-2.html','http://www.sports-reference.com/cfb/players/bobby-myers-2.html','http://www.sports-reference.com/cfb/players/gary-berry-1.html','http://www.sports-reference.com/cfb/players/peter-sirmon-1.html','http://www.sports-reference.com/cfb/players/paul-smith-6.html','http://www.sports-reference.com/cfb/players/robert-bean-1.html','http://www.sports-reference.com/cfb/players/mao-tosi-1.html','http://www.sports-reference.com/cfb/players/todd-franz-1.html','http://www.sports-reference.com/cfb/players/tony-scott-4.html','http://www.sports-reference.com/cfb/players/robaire-smith-1.html','http://www.sports-reference.com/cfb/players/matt-bowen-1.html','http://www.sports-reference.com/cfb/players/sekou-sanyika-1.html','http://www.sports-reference.com/cfb/players/rashidi-barnes-1.html','http://www.sports-reference.com/cfb/players/erik-olson-1.html','http://www.sports-reference.com/cfb/players/danny-clark-2.html','http://www.sports-reference.com/cfb/players/john-henderson-1.html','http://www.sports-reference.com/cfb/players/ingle-martin-1.html','http://www.sports-reference.com/cfb/players/anthony-castonzo-1.html','http://www.sports-reference.com/cfb/players/stephen-schilling-1.html','http://www.sports-reference.com/cfb/players/lane-johnson-1.html','http://www.sports-reference.com/cfb/players/dj-fluker-1.html','http://www.sports-reference.com/cfb/players/larry-warford-1.html','http://www.sports-reference.com/cfb/players/tyrann-mathieu-1.html','http://www.sports-reference.com/cfb/players/david-bakhtiari-1.html','http://www.sports-reference.com/cfb/players/jordan-mills-1.html','http://www.sports-reference.com/cfb/players/carson-wentz-1.html','http://www.sports-reference.com/cfb/players/james-bradberry-1.html','http://www.sports-reference.com/cfb/players/isaiah-jones-2.html','http://www.sports-reference.com/cfb/players/joel-iyiegbuniwe-1.html','http://www.sports-reference.com/cfb/players/rj-mcintosh-1.html','http://www.sports-reference.com/cfb/players/mike-white-6.html','http://www.sports-reference.com/cfb/players/kahlil-mckenzie-1.html','http://www.sports-reference.com/cfb/players/jordan-thomas-6.html','http://www.sports-reference.com/cfb/players/tracy-walker-1.html']

#url_list = ['http://www.sports-reference.com/cfb/players/courtney-brown-1.html','http://www.sports-reference.com/cfb/players/jamal-lewis-2.html','http://www.sports-reference.com/cfb/players/sebastian-janikowski-1.html','http://www.sports-reference.com/cfb/players/stockar-mcdougle-1.html','http://www.sports-reference.com/cfb/players/john-engelberger-1.html','http://www.sports-reference.com/cfb/players/mark-simoneau-1.html','http://www.sports-reference.com/cfb/players/reuben-droughns-1.html','http://www.sports-reference.com/cfb/players/ben-kelly-1.html','http://www.sports-reference.com/cfb/players/corey-moore-1.html','http://www.sports-reference.com/cfb/players/terrelle-smith-1.html','http://www.sports-reference.com/cfb/players/anthony-malbrough-1.html','http://www.sports-reference.com/cfb/players/shane-lechler-1.html','http://www.sports-reference.com/cfb/players/windrell-hayes-1.html','http://www.sports-reference.com/cfb/players/chad-morton-1.html','http://www.sports-reference.com/cfb/players/spergon-wynn-1.html','http://www.sports-reference.com/cfb/players/tim-rattay-1.html','http://www.sports-reference.com/cfb/players/jarious-jackson-1.html','http://www.sports-reference.com/cfb/players/ronney-jenkins-1.html','http://www.sports-reference.com/cfb/players/zeron-flemister-1.html','http://www.sports-reference.com/cfb/players/casey-hampton-1.html','http://www.sports-reference.com/cfb/players/victor-leyva-1.html','http://www.sports-reference.com/cfb/players/chukky-okobi-1.html','http://www.sports-reference.com/cfb/players/ellis-wyms-1.html','http://www.sports-reference.com/cfb/players/mike-williams-6.html','http://www.sports-reference.com/cfb/players/bryant-mckinnie-1.html','http://www.sports-reference.com/cfb/players/plaxico-burress-1.html','http://www.sports-reference.com/cfb/players/travis-taylor-1.html','http://www.sports-reference.com/cfb/players/ron-dayne-1.html','http://www.sports-reference.com/cfb/players/deltha-oneal-1.html','http://www.sports-reference.com/cfb/players/rob-morris-1.html','http://www.sports-reference.com/cfb/players/trung-canidate-1.html','http://www.sports-reference.com/cfb/players/todd-pinkston-1.html','http://www.sports-reference.com/cfb/players/kenoy-kennedy-1.html','http://www.sports-reference.com/cfb/players/lloyd-harrison-1.html','http://www.sports-reference.com/cfb/players/curtis-keaton-1.html','http://www.sports-reference.com/cfb/players/gari-scott-1.html','http://www.sports-reference.com/cfb/players/tyrone-carter-1.html','http://www.sports-reference.com/cfb/players/dave-stachelski-1.html','http://www.sports-reference.com/cfb/players/michael-wiley-1.html','http://www.sports-reference.com/cfb/players/sammy-morris-1.html','http://www.sports-reference.com/cfb/players/jay-tant-1.html','http://www.sports-reference.com/cfb/players/troy-walters-1.html','http://www.sports-reference.com/cfb/players/marc-bulger-1.html','http://www.sports-reference.com/cfb/players/tom-brady-1.html','http://www.sports-reference.com/cfb/players/charles-lee-1.html','http://www.sports-reference.com/cfb/players/bashir-yamini-1.html','http://www.sports-reference.com/cfb/players/casey-crawford-1.html','http://www.sports-reference.com/cfb/players/chris-coleman-1.html','http://www.sports-reference.com/cfb/players/marcus-knight-1.html','http://www.sports-reference.com/cfb/players/scottie-montgomery-1.html','http://www.sports-reference.com/cfb/players/trevor-insley-1.html','http://www.sports-reference.com/cfb/players/troy-hambrick-1.html','http://www.sports-reference.com/cfb/players/ben-hamilton-1.html','http://www.sports-reference.com/cfb/players/cedric-scott-1.html','http://www.sports-reference.com/cfb/players/watts-sanderson-1.html','http://www.sports-reference.com/cfb/players/chris-samuels-2.html','http://www.sports-reference.com/cfb/players/corey-simon-1.html','http://www.sports-reference.com/cfb/players/chris-mcintosh-1.html','http://www.sports-reference.com/cfb/players/r-jay-soward-1.html','http://www.sports-reference.com/cfb/players/jerry-porter-1.html','http://www.sports-reference.com/cfb/players/barrett-green-1.html','http://www.sports-reference.com/cfb/players/cosey-coleman-1.html','http://www.sports-reference.com/cfb/players/ron-dugans-1.html','http://www.sports-reference.com/cfb/players/erron-kinney-1.html','http://www.sports-reference.com/cfb/players/chris-cole-2.html','http://www.sports-reference.com/cfb/players/jr-redmond-1.html','http://www.sports-reference.com/cfb/players/danny-farmer-1.html','http://www.sports-reference.com/cfb/players/brandon-short-1.html','http://www.sports-reference.com/cfb/players/trevor-gaylor-1.html','http://www.sports-reference.com/cfb/players/frank-moreau-1.html','http://www.sports-reference.com/cfb/players/deon-dyer-1.html','http://www.sports-reference.com/cfb/players/brian-young-1.html','http://www.sports-reference.com/cfb/players/adalius-thomas-1.html','http://www.sports-reference.com/cfb/players/todd-husak-1.html','http://www.sports-reference.com/cfb/players/mondriel-fulcher-1.html','http://www.sports-reference.com/cfb/players/joe-hamilton-1.html','http://www.sports-reference.com/cfb/players/shyrone-stith-1.html','http://www.sports-reference.com/cfb/players/doug-johnson-3.html','http://www.sports-reference.com/cfb/players/kevin-mcdougal-2.html','http://www.sports-reference.com/cfb/players/kwame-cavil-1.html','http://www.sports-reference.com/cfb/players/larry-foster-1.html','http://www.sports-reference.com/cfb/players/jamal-reynolds-1.html','http://www.sports-reference.com/cfb/players/ross-kolodziej-1.html','http://www.sports-reference.com/cfb/players/derrick-dockery-1.html','http://www.sports-reference.com/cfb/players/dan-klecko-1.html','http://www.sports-reference.com/cfb/players/brett-romberg-1.html','http://www.sports-reference.com/cfb/players/eric-steinbach-1.html','http://www.sports-reference.com/cfb/players/rien-long-1.html','http://www.sports-reference.com/cfb/players/darnell-dockett-1.html','http://www.sports-reference.com/cfb/players/constantin-ritzmann-1.html','http://www.sports-reference.com/cfb/players/derrick-johnson-4.html','http://www.sports-reference.com/cfb/players/michael-munoz-1.html','http://www.sports-reference.com/cfb/players/rex-hadnot-1.html','http://www.sports-reference.com/cfb/players/jammal-brown-1.html','http://www.sports-reference.com/cfb/players/alex-barron-1.html','http://www.sports-reference.com/cfb/players/dave-ball-1.html','http://www.sports-reference.com/cfb/players/erasmus-james-1.html','http://www.sports-reference.com/cfb/players/greg-eslinger-1.html','http://www.sports-reference.com/cfb/players/brian-calhoun-2.html','http://www.sports-reference.com/cfb/players/jason-brown-2.html','http://www.sports-reference.com/cfb/players/ben-wilkerson-1.html','http://www.sports-reference.com/cfb/players/marcus-lawrence-1.html','http://www.sports-reference.com/cfb/players/winston-justice-1.html','http://www.sports-reference.com/cfb/players/sam-baker-1.html','http://www.sports-reference.com/cfb/players/dan-mozes-1.html','http://www.sports-reference.com/cfb/players/michael-oher-1.html','http://www.sports-reference.com/cfb/players/phil-loadholt-1.html','http://www.sports-reference.com/cfb/players/antoine-caldwell-1.html','http://www.sports-reference.com/cfb/players/duke-robinson-1.html','http://www.sports-reference.com/cfb/players/lavar-arrington-1.html','http://www.sports-reference.com/cfb/players/peter-warrick-1.html','http://www.sports-reference.com/cfb/players/thomas-jones-1.html','http://www.sports-reference.com/cfb/players/brian-urlacher-1.html','http://www.sports-reference.com/cfb/players/bubba-franks-1.html','http://www.sports-reference.com/cfb/players/chad-pennington-1.html','http://www.sports-reference.com/cfb/players/shaun-alexander-1.html','http://www.sports-reference.com/cfb/players/anthony-becht-1.html','http://www.sports-reference.com/cfb/players/dennis-northcutt-1.html','http://www.sports-reference.com/cfb/players/ian-gold-1.html','http://www.sports-reference.com/cfb/players/ike-charlton-1.html','http://www.sports-reference.com/cfb/players/deon-grant-1.html','http://www.sports-reference.com/cfb/players/travis-prentice-1.html','http://www.sports-reference.com/cfb/players/dez-white-1.html','http://www.sports-reference.com/cfb/players/chris-redman-1.html','http://www.sports-reference.com/cfb/players/hank-poteat-1.html','http://www.sports-reference.com/cfb/players/laveranues-coles-1.html','http://www.sports-reference.com/cfb/players/jajuan-dawson-1.html','http://www.sports-reference.com/cfb/players/doug-chapman-1.html','http://www.sports-reference.com/cfb/players/aaron-shea-1.html','http://www.sports-reference.com/cfb/players/aric-morris-1.html','http://www.sports-reference.com/cfb/players/ralph-brown-2.html','http://www.sports-reference.com/cfb/players/james-whalen-1.html','http://www.sports-reference.com/cfb/players/austin-wheatley-1.html','http://www.sports-reference.com/cfb/players/tee-martin-1.html','http://www.sports-reference.com/cfb/players/james-williams-5.html','http://www.sports-reference.com/cfb/players/mario-edwards-3.html','http://www.sports-reference.com/cfb/players/mike-green-4.html','http://www.sports-reference.com/cfb/players/rondell-mealey-1.html','http://www.sports-reference.com/cfb/players/john-baker-3.html','http://www.sports-reference.com/cfb/players/john-jones-2.html','http://www.sports-reference.com/cfb/players/leonard-davis-1.html','http://www.sports-reference.com/cfb/players/steve-hutchinson-1.html','http://www.sports-reference.com/cfb/players/aaron-schobel-1.html','http://www.sports-reference.com/cfb/players/dominic-raiola-1.html','http://www.sports-reference.com/cfb/players/chris-brown-7.html','http://www.sports-reference.com/cfb/players/gabe-carimi-1.html','http://www.sports-reference.com/cfb/players/dwight-freeney-1.html','http://www.sports-reference.com/cfb/players/andre-gurode-1.html','http://www.sports-reference.com/cfb/players/lecharles-bentley-1.html','http://www.sports-reference.com/cfb/players/terrence-metcalf-1.html','http://www.sports-reference.com/cfb/players/jon-stinchcomb-1.html','http://www.sports-reference.com/cfb/players/nate-potter-1.html','http://www.sports-reference.com/cfb/players/ryan-bartholomew-1.html','http://www.sports-reference.com/cfb/players/jake-grove-1.html','http://www.sports-reference.com/cfb/players/nick-leckey-1.html','http://www.sports-reference.com/cfb/players/jared-clauss-1.html','http://www.sports-reference.com/cfb/players/david-baas-1.html','http://www.sports-reference.com/cfb/players/charles-sims-2.html','http://www.sports-reference.com/cfb/players/chance-warmack-1.html','http://www.sports-reference.com/cfb/players/david-bakhtiari-1.html','http://www.sports-reference.com/cfb/players/jake-matthews-1.html','http://www.sports-reference.com/cfb/players/james-brown-1.html','http://www.sports-reference.com/cfb/players/ben-emanuel-1.html','http://www.sports-reference.com/cfb/players/max-jean-gilles-1.html','http://www.sports-reference.com/cfb/players/jonathan-scott-1.html','http://www.sports-reference.com/cfb/players/seantrel-henderson-1.html','http://www.sports-reference.com/cfb/players/donovan-smith-1.html','http://www.sports-reference.com/cfb/players/mitch-morse-1.html','http://www.sports-reference.com/cfb/players/andrew-donnal-1.html','http://www.sports-reference.com/cfb/players/david-yankey-1.html','http://www.sports-reference.com/cfb/players/corey-linsley-1.html','http://www.sports-reference.com/cfb/players/pat-odonnell-2.html','http://www.sports-reference.com/cfb/players/matt-paradis-1.html','http://www.sports-reference.com/cfb/players/cedric-ogbuehi-1.html','http://www.sports-reference.com/cfb/players/jake-fisher-1.html','http://www.sports-reference.com/cfb/players/john-miller-5.html','http://www.sports-reference.com/cfb/players/daryl-williams-1.html','http://www.sports-reference.com/cfb/players/arie-kouandjio-1.html','http://www.sports-reference.com/cfb/players/andy-gallik-1.html','http://www.sports-reference.com/cfb/players/bobby-hart-2.html','http://www.sports-reference.com/cfb/players/austin-shepherd-1.html','http://www.sports-reference.com/cfb/players/justin-blalock-1.html','http://www.sports-reference.com/cfb/players/taylor-lewan-1.html','http://www.sports-reference.com/cfb/players/xavier-sua-filo-1.html','http://www.sports-reference.com/cfb/players/cyril-richardson-1.html','http://www.sports-reference.com/cfb/players/zach-fulton-1.html','http://www.sports-reference.com/cfb/players/germain-ifedi-1.html','http://www.sports-reference.com/cfb/players/avery-young-1.html','http://www.sports-reference.com/cfb/players/joseph-cheek-1.html','http://www.sports-reference.com/cfb/players/caleb-benenoch-1.html','http://www.sports-reference.com/cfb/players/christian-westerman-1.html','http://www.sports-reference.com/cfb/players/halapoulivaati-vaitai-1.html','http://www.sports-reference.com/cfb/players/vadal-alexander-1.html','http://www.sports-reference.com/cfb/players/james-hurst-1.html','http://www.sports-reference.com/cfb/players/jon-feliciano-1.html','http://www.sports-reference.com/cfb/players/terry-poole-1.html','http://www.sports-reference.com/cfb/players/max-garcia-1.html','http://www.sports-reference.com/cfb/players/jarvis-harrison-1.html','http://www.sports-reference.com/cfb/players/laurence-gibson-1.html','http://www.sports-reference.com/cfb/players/tyler-marz-1.html','http://www.sports-reference.com/cfb/players/ryan-ramczyk-1.html','http://www.sports-reference.com/cfb/players/ethan-pocic-1.html','http://www.sports-reference.com/cfb/players/jermaine-eluemunor-1.html','http://www.sports-reference.com/cfb/players/roderick-johnson-5.html','http://www.sports-reference.com/cfb/players/nila-kasitati-1.html','http://www.sports-reference.com/cfb/players/garrett-bolles-1.html','http://www.sports-reference.com/cfb/players/david-sharpe-1.html','http://www.sports-reference.com/cfb/players/will-holden-1.html','http://www.sports-reference.com/cfb/players/conor-mcdermott-2.html','http://www.sports-reference.com/cfb/players/collin-buchanan-1.html','http://www.sports-reference.com/cfb/players/jon-toth-1.html','http://www.sports-reference.com/cfb/players/braden-smith-2.html','http://www.sports-reference.com/cfb/players/david-bright-1.html','http://www.sports-reference.com/cfb/players/michael-jordan-3.html','http://www.sports-reference.com/cfb/players/kyle-murphy-1.html','http://www.sports-reference.com/cfb/players/jake-brendel-1.html','http://www.sports-reference.com/cfb/players/dan-feeney-1.html','http://www.sports-reference.com/cfb/players/dorian-johnson-1.html','http://www.sports-reference.com/cfb/players/orlando-brown-2.html','http://www.sports-reference.com/cfb/players/tyrell-crosby-1.html','http://www.sports-reference.com/cfb/players/rod-taylor-3.html','http://www.sports-reference.com/cfb/players/jaryd-jones-smith-1.html','http://www.sports-reference.com/cfb/players/kj-malone-1.html','http://www.sports-reference.com/cfb/players/sean-welsh-1.html','http://www.sports-reference.com/cfb/players/jonah-williams-1.html','http://www.sports-reference.com/cfb/players/greg-little-2.html','http://www.sports-reference.com/cfb/players/cody-ford-1.html','http://www.sports-reference.com/cfb/players/erik-mccoy-1.html','http://www.sports-reference.com/cfb/players/justin-senior-1.html','http://www.sports-reference.com/cfb/players/mason-cole-1.html','http://www.sports-reference.com/cfb/players/javon-patterson-1.html','http://www.sports-reference.com/cfb/players/andrew-thomas-2.html','http://www.sports-reference.com/cfb/players/josh-jones-4.html','http://www.sports-reference.com/cfb/players/lloyd-cushenberry-iii-1.html','http://www.sports-reference.com/cfb/players/solomon-kindley-1.html','http://www.sports-reference.com/cfb/players/charlie-heck-1.html','http://www.sports-reference.com/cfb/players/ben-bredeson-1.html','http://www.sports-reference.com/cfb/players/hakeem-adeniji-1.html','http://www.sports-reference.com/cfb/players/netane-muti-1.html','http://www.sports-reference.com/cfb/players/matt-kalil-1.html','http://www.sports-reference.com/cfb/players/david-decastro-1.html','http://www.sports-reference.com/cfb/players/kevin-zeitler-1.html','http://www.sports-reference.com/cfb/players/toby-weathersby-1.html','http://www.sports-reference.com/cfb/players/dennis-daley-1.html','http://www.sports-reference.com/cfb/players/david-long-2.html','http://www.sports-reference.com/cfb/players/jedrick-wills-jr-1.html','http://www.sports-reference.com/cfb/players/shane-lemieux-1.html','http://www.sports-reference.com/cfb/players/barrett-jones-1.html','http://www.sports-reference.com/cfb/players/cyrus-kouandjio-1.html','http://www.sports-reference.com/cfb/players/jack-mewhort-1.html','http://www.sports-reference.com/cfb/players/trystan-colon-castillo-1.html','http://www.sports-reference.com/cfb/players/prince-tega-wanogho-1.html','http://www.sports-reference.com/cfb/players/cohl-cabral-1.html','http://www.sports-reference.com/cfb/players/john-urschel-1.html','http://www.sports-reference.com/cfb/players/jon-halapio-1.html','http://www.sports-reference.com/cfb/players/laken-tomlinson-1.html','http://www.sports-reference.com/cfb/players/jeremaih-poutasi-1.html','http://www.sports-reference.com/cfb/players/saahdiq-charles-1.html','http://www.sports-reference.com/cfb/players/shon-coleman-1.html','http://www.sports-reference.com/cfb/players/fahn-cooper-1.html','http://www.sports-reference.com/cfb/players/pearce-slater-2.html','http://www.sports-reference.com/cfb/players/nico-siragusa-1.html','http://www.sports-reference.com/cfb/players/avery-gennesy-1.html','http://www.sports-reference.com/cfb/players/chad-wheeler-1.html','http://www.sports-reference.com/cfb/players/will-hernandez-1.html','http://www.sports-reference.com/cfb/players/sam-jones-1.html','http://www.sports-reference.com/cfb/players/william-clapp-1.html','http://www.sports-reference.com/cfb/players/brett-toth-1.html','http://www.sports-reference.com/cfb/players/dmitri-flowers-1.html','http://www.sports-reference.com/cfb/players/kc-mcdermott-1.html','http://www.sports-reference.com/cfb/players/chuma-edoga-1.html','http://www.sports-reference.com/cfb/players/alex-bars-1.html','http://www.sports-reference.com/cfb/players/andre-james-1.html','http://www.sports-reference.com/cfb/players/emmanuel-butler-1.html','http://www.sports-reference.com/cfb/players/fred-johnson-3.html','http://www.sports-reference.com/cfb/players/keenan-brown-2.html','http://www.sports-reference.com/cfb/players/paul-adams-3.html','http://www.sports-reference.com/cfb/players/isaiah-wilson-1.html','http://www.sports-reference.com/cfb/players/jack-driscoll-2.html','http://www.sports-reference.com/cfb/players/nick-harris-3.html']

print(len(url_list))

280


### 3) Define functions and pull data

In [5]:
def pullTable(url):
    tableID = ['defense','rushing','receiving','passing','punt_ret','kick_ret','kicking','punting']
    delays = [5, 6, 7, 8, 9, 10]

    headers = {
        'accept': '*/*',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9',
        'referer': 'http://www.google.com/',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
    }
       
    delay = np.random.choice(delays)
    time.sleep(delay)
    res = requests.get(url, headers = headers)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.findAll('table', id = tableID)
    table_list = pd.read_html(str(tables))

    # iterate over tables
    for table in table_list:
        headers=table.columns.tolist()
        if headers[6][0] == 'Receiving':
            table.columns = table.columns.droplevel(0) # drop multilevel columns
            table.columns = ['Year','School','Conf','Class','Pos','G','Rec','Rec_Yds','Rec_Avg','Rec_TD','Rush_Att','Rush_Yds','Rush_Avg','Rush_TD','Scrim_Plays','Scrim_Yds','Scrim_Avg','Scrim_TD']
        elif headers[6][0] == 'Rushing':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Rush_Att','Rush_Yds','Rush_Avg','Rush_TD','Rec','Rec_Yds','Rec_Avg','Rec_TD','Scrim_Plays','Scrim_Yds','Scrim_Avg','Scrim_TD']
        elif headers[6][0] == 'Passing':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Pass_Cmp','Pass_Att','Pass_Pct','Pass_Yds','Pass_Y/A','Pass_AY/A','Pass_TD','Pass_Int','Pass_Rate']
        elif headers[6][0] == 'Punt Ret':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Punt_Ret','Punt_ret_Yds','Punt_ret_Avg','Punt_ret_TD','Kick_Ret','Kick_ret_Yds','Kick_ret_Avg','Kick_ret_TD']
        elif headers[6][0] == 'Kick Ret':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Kick_Ret','Kick_ret_Yds','Kick_ret_Avg','Kick_ret_TD','Punt_Ret','Punt_ret_Yds','Punt_ret_Avg','Punt_ret_TD']
        elif headers[6][0] == 'Tackles':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Def_Solo_Tackles','Def_Ast_Tackles','Def_Tot_Tackles','Def_Loss_Tackles','Def_Sk','Def_Int','Def_Int_Yds','Def_Int_Avg','Def_Int_TD','Def_Int_PD','Def_FR','Def_Yds_Fumbles','Def_TD_Fumbles','Def_FF']
        elif headers[6][0] == 'Kicking':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Kick_XPM','Kick_XPA','Kick_XP_pct','Kick_FGM','Kick_FGA','Kick_FG_pct','Kick_Pts','Punts','Punt_Yds','Punt_Avg']
        elif headers[6][0] == 'Punting':
            table.columns = table.columns.droplevel(0)
            table.columns = ['Year','School','Conf','Class','Pos','G','Punts','Punt_Yds','Punt_Avg','Kick_XPM','Kick_XPA','Kick_XP_pct','Kick_FGM','Kick_FGA','Kick_FG_pct','Kick_Pts']
    
    # merge tables into one dataframe
    final = reduce(lambda x, y: pd.merge(x, y, on = ['Year','School','Conf','Class','Pos','G']), table_list)
    # insert ncaa link
    final.insert(0, 'NCAA_Link', url)
    
    return(final)

In [6]:
dfs = []
error_list = []

# iterate over list of urls
for url in url_list:
    try:
        table_data = pullTable(url)
        dfs.append(table_data)
    except Exception as e:
        # store the url and the error it causes in a list
        error = [url, e] 
        # append it to the list of errors
        error_list.append(error) 

# print errors
print(error_list)
# merge data into a single dataframe
combines_df = pd.concat(dfs, ignore_index=False, sort=False)
# save as csv
combines_df.to_csv('ncaa_player_ind_stats_draft.csv', index=False) 


[['http://www.sports-reference.com/cfb/players/corey-moore-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/casey-hampton-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/victor-leyva-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/chukky-okobi-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/ellis-wyms-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/mike-williams-6.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/bryant-mckinnie-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/ben-hamilton-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/cedric-scott-1.html', ValueError('No tables found')], ['http://www.sports-reference.com/cfb/players/watts-sanderson-1.html', ValueError('No tables foun

### 4) Merge csv files

In [7]:
dfs = []
df_2000 = pd.read_csv('ncaa_player_ind_stats_2000.csv')
df_2001 = pd.read_csv('ncaa_player_ind_stats_2001.csv')
df_2002 = pd.read_csv('ncaa_player_ind_stats_2002.csv')
df_2003 = pd.read_csv('ncaa_player_ind_stats_2003.csv')
df_2004 = pd.read_csv('ncaa_player_ind_stats_2004.csv')
df_2005 = pd.read_csv('ncaa_player_ind_stats_2005.csv')
df_2006 = pd.read_csv('ncaa_player_ind_stats_2006.csv')
df_2007 = pd.read_csv('ncaa_player_ind_stats_2007.csv')
df_2008 = pd.read_csv('ncaa_player_ind_stats_2008.csv')
df_2009 = pd.read_csv('ncaa_player_ind_stats_2009.csv')
df_2010 = pd.read_csv('ncaa_player_ind_stats_2010.csv')
df_2011 = pd.read_csv('ncaa_player_ind_stats_2011.csv')
df_2012 = pd.read_csv('ncaa_player_ind_stats_2012.csv')
df_2013 = pd.read_csv('ncaa_player_ind_stats_2013.csv')
df_2014 = pd.read_csv('ncaa_player_ind_stats_2014.csv')
df_2015 = pd.read_csv('ncaa_player_ind_stats_2015.csv')
df_2016 = pd.read_csv('ncaa_player_ind_stats_2016.csv')
df_2017 = pd.read_csv('ncaa_player_ind_stats_2017.csv')
df_2018 = pd.read_csv('ncaa_player_ind_stats_2018.csv')
df_2019 = pd.read_csv('ncaa_player_ind_stats_2019.csv')
df_2020 = pd.read_csv('ncaa_player_ind_stats_2020.csv')
df_leftover = pd.read_csv('ncaa_player_ind_stats_cnc_error.csv')
df_draft_leftover = pd.read_csv('ncaa_player_ind_stats_draft_leftover.csv')
df_draft_leftover_2 = pd.read_csv('ncaa_player_ind_stats_draft_leftover_2.csv') 


dfs.append(df_2001)
dfs.append(df_2002)
dfs.append(df_2003)
dfs.append(df_2004)
dfs.append(df_2005)
dfs.append(df_2006)
dfs.append(df_2007)
dfs.append(df_2008)
dfs.append(df_2009)
dfs.append(df_2010)
dfs.append(df_2011)
dfs.append(df_2012)
dfs.append(df_2013)
dfs.append(df_2014)
dfs.append(df_2015)
dfs.append(df_2016)
dfs.append(df_2017)
dfs.append(df_2018)
dfs.append(df_2019)
dfs.append(df_2020)
dfs.append(df_leftover)
dfs.append(df_draft_leftover)
dfs.append(df_draft_leftover_2)

combines_df = pd.concat(dfs, ignore_index=False, sort=False)
combines_df.to_csv('ncaa_all_player_ind_stats_extended_v2.csv', index=False) 



### URLs with errors (ConnectionError or ValueError: table not found)

In [None]:
'''
url_list.append('https://www.sports-reference.com/cfb/players/david-greene-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/matt-grootegoed-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/gino-guidugli-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/tyjuan-hagler-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/walter-thurmond-iii-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/jj-watt-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/roy-helu-jr-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/donta-hightower-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/keshawn-martin-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/doug-martin-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/jr-sweezy-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/louis-nix-iii-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/boom-williams-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/dimitri-flowers-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/byron-cowart-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/jakobi-meyers-1.html')
url_list.append('https://www.sports-reference.com/cfb/players/derrek-thomas-1.html')
'''

### Obsolete code

In [3]:
'''
def PullTAble(url, tableID, header = False):
    delays = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]


    headers = {
        'accept': '*/*',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9',
        'referer': 'http://www.google.com/',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
    }
    
    delay = np.random.choice(delays)
    time.sleep(delay)
    res = requests.get(url, headers = headers)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.findAll('table', id = tableID)
    df = pd.read_html(str(tables))[0]

    # remove multilevel headers
    df.columns = df.columns.droplevel(0)
    
    return(df)
'''

In [9]:
'''
## Pull individual player statistics

#url_list = pd.read_csv('temp_url_list.csv')
#url_list = listVals(url_list, 'cfb_reference')
url_list = ['https://www.sports-reference.com/cfb/players/ceedee-lamb-1.html','https://www.sports-reference.com/cfb/players/joe-burrow-1.html']
#url_list = ['https://www.sports-reference.com/cfb/players/ceedee-lamb-1.html']
#stat_list = []
tables = ['defense','rushing','receiving','kick_ret','punt_ret','passing']
#tables = ['defense', 'punt_ret']
#tables = ['defense', 'punt_ret', 'passing']
final2=[]

player_list = []
for url in url_list:
    stat_list = []
    for table in tables:
        try:
            table_data = PullTAble(url, table)
            if table == 'defense':
                table_data.columns = ['Year','School','Conf','Class','Pos','G','Def_Solo_Tackles','Def_Ast_Tackles','Def_Tot_Tackles','Def_Loss_Tackles','Def_Sk','Def_Int','Def_Int_Yds','Def_Int_Avg','Def_Int_TD','Def_Int_PD','Def_FR','Def_Yds_Fumbles','Def_TD_Fumbles','Def_FF']
            elif table == 'punt_ret':
                table_data.columns = ['Year','School','Conf','Class','Pos','G','Punt_Ret','Punt_ret_Yds','Punt_ret_Avg','Punt_ret_TD','Kick_Ret','Kick_ret_Yds','Kick_ret_Avg','Kick_ret_TD']
            elif table == 'kick_ret':
                table_data.columns = ['Year','School','Conf','Class','Pos','G','Kick_Ret','Kick_ret_Yds','Kick_ret_Avg','Kick_ret_TD','Punt_Ret','Punt_ret_Yds','Punt_ret_Avg','Punt_ret_TD']
            elif table == 'passing':
                table_data.columns = ['Year','School','Conf','Class','Pos','G','Pass_Cmp','Pass_Att','Pass_Pct','Pass_Yds','Pass_Y/A','Pass_AY/A','Pass_TD','Pass_Int','Pass_Rate']
            elif table == 'receiving':
                table_data.columns = ['Year','School','Conf','Class','Pos','G','Rec','Rec_Yds','Rec_Avg','Rec_TD','Rush_Att','Rush_Yds','Rush_Avg','Rush_TD','Scrim_Plays','Scrim_Yds','Scrim_Avg','Scrim_TD']
            elif table == 'rushing':
                table_data.columns = ['Year','School','Conf','Class','Pos','G','Rush_Att','Rush_Yds','Rush_Avg','Rush_TD','Rec','Rec_Yds','Rec_Avg','Rec_TD','Scrim_Plays','Scrim_Yds','Scrim_Avg','Scrim_TD']
        except ValueError:
            continue
        
        #print(table_data)
        #stat_list = pd.concat([table_data], axis=1)
        stat_list.append(table_data)
        #stat_list.insert(0, 'NCAA_Link', url)
        #stat_list = stat_list.loc[:,~stat_list.columns.duplicated()]
        #player_list.append(stat_list)
        
#final = pd.concat(player_list, ignore_index=True, sort=False)
    final = reduce(lambda x, y: pd.merge(x, y, on = ['Year','School','Conf','Class','Pos','G']), stat_list)
    final.insert(0, 'NCAA_Link', url)
    final2.append(final)
        
#print(table_data)
#print(playerstats)
#print(stat_list)
final_df = pd.concat(final2, ignore_index=True, sort=False)
final_df.to_csv('ncaa_player_stats_test15.csv', index=False)
'''