# Scraping College Stats from Pro Football Reference (defense)
we are scraping defense only stats right now from profootball reference college stats player pages.

Issues:
- when searching for tables, only 1 comes up.  defense
- this would be fine, but some defensive players return punts/kicks too
- the punts/kicks stats are commented out
- We have to decide what stats to keep from each player... do we append all years to our final dataset? just career stats?

In [None]:
import requests
import re
from bs4 import BeautifulSoup, Comment
import pandas as pd
import numpy as np

# Function: take url and return soup page

In [27]:
def take_url_return_page(url):
  # this function takes a url and returns the soup content for it
  r = requests.get(url)
  page = BeautifulSoup(r.content, 'html.parser')
  return page

# Function: take soup page return defense & punt tables

In [None]:
def take_page_return_def_and_punts(page):
  # this function takes a soup page and looks for the table id defense and returns it.  same for the punt_ret table (which is hidden in the comments)
  def_table = page.find('table', {'id': 'defense'})
  # finding where punt return info is located
  all_punt_ret = page.find(id="all_punt_ret")
  # getting the info inside the comments
  punt_ret = all_punt_ret(text=lambda text: isinstance(text, Comment))[0]
  # making the data parsable by BS
  punt_soup=BeautifulSoup(punt_ret, 'html.parser')
  punt_table = punt_soup.find('table', {'id': 'punt_ret'})
  return def_table, punt_table

# Function: grab stats from table return dataframe
- takes table and columns as input

In [None]:
def get_stats(table, columns):
  # this function takes the stats table and returns a dataframe of the stats
  
  # define the DataFrame
  df = pd.DataFrame(columns=columns)  

  # finds all rows in the table
  rows = table.find_all('tr')

  # this counter will be used along with total_games to get the total games played in the final row of the dataframe.
  row_counter = 3
  total_games = 0

  # loop over each row in the table, skipping over header rows (0,1)
  for row in rows[2:]:
    # creating a list where we will store the stats for each row.  We will then append this list to our df
    stat_list = []
    # this part is for the first rows of our table.  these rows contain the year in an 'a' tag
    if row_counter<(len(rows)):
      # creating cells which contains all the relevant stats inside td tags, which we pull using text.
      cells = row.find_all('td')
      # this variable will be used to find cell 5.  cell 5 contains the total games.
      cell_counter=0
      # appending the year to our list... the year is contained in an 'a' tag
      stat_list.append(row.find('a').text)
      # loop for pulling all the relevant stats from cells
      for cell in cells:
        # when the cell count is 4, that cell contains the games played... we want to total this for use in the last column 
        if cell_counter == 4:
          games = cell.text
          # try except for catch nulls error
          try:
            games = int(games)
            total_games += games
          except ValueError:
            total_games += 0
          stat_list.append(games)
          cell_counter +=1
        # this is for all the other cells
        else:
          stat_list.append(cell.text)
          cell_counter +=1
        
    # this part is for the last row of our table we are scrapping. it contains career stats
    else:
      cells = row.find_all('td')
      cell_counter=0
      # this last column does not contain year.
      stat_list.append('Career')
      for cell in cells:
        # cell 4 contains total games
        if cell_counter == 4:
          stat_list.append(total_games)
          cell_counter +=1
        else:
          stat_list.append(cell.text)
          cell_counter +=1
    df = df.append(pd.DataFrame([stat_list], columns=columns))
    row_counter+=1
  return df


# Column Definitions

In [None]:
def_columns = ['year', 'school', 'conference', 'class', 'position', 'games', 'solo_tackles', 'assisted_tackles', 'total_tackles', 'tackles_loss', 'sacks', 'interceptions', 'int_yards', 'int_yards_per_int', 'int_td', 'passes_defended', 'fumbles_recovered', 'fumble_yards', 'fumble_td', 'forced_fumbles']
punt_columns = ['year','school','conference','class','position','games','punt_returns','punt_ret_yds','punt_ret_avg','punt_td','kick_returns','kick_ret_yds','kick_ret_avg','kick_ret_td']

# Testing work so far

## testing take url return page

In [None]:
url = 'https://www.sports-reference.com/cfb/players/derek-stingley-jr-1.html'

In [None]:
# sending url and getting page
page = take_url_return_page(url)

In [None]:
print(page.prettify())

<!DOCTYPE html>
<html class="no-js" data-root="/home/cfb/build" data-version="klecko-" lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="ie=edge" http-equiv="x-ua-compatible"/>
  <meta content="width=device-width, initial-scale=1.0, maximum-scale=2.0" name="viewport">
   <link href="https://cdn.ssref.net/req/202303021" rel="dns-prefetch"/>
   <!-- Quantcast Choice. Consent Manager Tag v2.0 (for TCF 2.0) -->
   <script async="true" type="text/javascript">
    (function() {
	var host = window.location.hostname;
	var element = document.createElement('script');
	var firstScript = document.getElementsByTagName('script')[0];
	var url = 'https://cmp.quantcast.com'
	    .concat('/choice/', 'XwNYEpNeFfhfr', '/', host, 
		    '/choice.js?tag_version=V2');
	var uspTries = 0;
	var uspTriesLimit = 3;
	element.async = true;
	element.type = 'text/javascript';
	element.src = url;
	
	firstScript.parentNode.insertBefore(element, firstScript);
	
	function makeStub() {
	    var TCF_LOCATOR_NAM

## testing take soup page get def/punt tables

In [None]:
def_table, punt_table = take_page_return_def_and_punts(page)

In [None]:
def_table

<table class="sortable stats_table" data-cols-to-freeze=",1" id="defense">
<caption>Defense &amp; Fumbles Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="5" data-stat=""></th><th></th>
<th aria-label="" class="over_header center" colspan="5" data-stat="header_tackles">Tackles</th>
<th aria-label="" class="over_header center" colspan="5" data-stat="header_def_int">Def Int</th>
<th aria-label="" class="over_header center" colspan="4" data-stat="header_fumbles">Fumbles</th>
</tr>
<tr>
<th aria-label="Year" class="poptip sort_default_asc right" data-stat="year_id" scope="col">Year</th>
<th aria-label="School" class="poptip sort_default_asc left" data-stat="school_name" data-tip="School Name" scope="col">School</th>
<th aria-label="Conf" class="poptip sort_default_asc left" data-stat="conf_abbr" s

In [None]:
punt_table

<table class="sortable stats_table" data-cols-to-freeze=",1" id="punt_ret">
<caption>Punt &amp; Kick Returns Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="5" data-stat=""></th><th></th>
<th aria-label="" class="over_header center" colspan="4" data-stat="header_punt_returns">Punt Ret</th>
<th aria-label="" class="over_header center" colspan="4" data-stat="header_kick_returns">Kick Ret</th>
</tr>
<tr>
<th aria-label="Year" class="poptip sort_default_asc right" data-stat="year_id" scope="col">Year</th>
<th aria-label="School" class="poptip sort_default_asc left" data-stat="school_name" data-tip="School Name" scope="col">School</th>
<th aria-label="Conf" class="poptip sort_default_asc left" data-stat="conf_abbr" scope="col">Conf</th>
<th aria-label="Class" class="poptip sort_default_asc center" data-stat="class" scope="col">Class

## testing get_stats function

In [None]:
def_df = get_stats(def_table, def_columns)

  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))


In [None]:
def_df

Unnamed: 0,year,school,conference,class,position,games,solo_tackles,assisted_tackles,total_tackles,tackles_loss,sacks,interceptions,int_yards,int_yards_per_int,int_td,passes_defended,fumbles_recovered,fumble_yards,fumble_td,forced_fumbles
0,2019,LSU,SEC,FR,CB,15.0,31.0,7.0,38.0,1.0,0.0,6.0,17.0,2.8,0.0,15.0,1.0,,,0.0
0,2020,LSU,SEC,SO,CB,7.0,19.0,8.0,27.0,2.5,0.0,0.0,0.0,,0.0,5.0,1.0,,,1.0
0,2021,LSU,SEC,JR,CB,3.0,6.0,2.0,8.0,3.5,0.0,0.0,0.0,,0.0,0.0,0.0,,,1.0
0,2022,LSU,SEC,JR,DB,,,,,,,,,,,,,,,
0,Career,LSU,,,,25.0,56.0,17.0,73.0,7.0,0.0,6.0,17.0,2.8,0.0,20.0,2.0,,,2.0


In [None]:
punt_df = get_stats(punt_table, punt_columns)

  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))


In [None]:
punt_df

Unnamed: 0,year,school,conference,class,position,games,punt_returns,punt_ret_yds,punt_ret_avg,punt_td,kick_returns,kick_ret_yds,kick_ret_avg,kick_ret_td
0,2019,LSU,SEC,FR,CB,15.0,17.0,163.0,9.6,0.0,0.0,0.0,,0.0
0,2020,LSU,SEC,SO,CB,7.0,6.0,97.0,16.2,0.0,0.0,0.0,,0.0
0,2021,LSU,SEC,JR,CB,3.0,0.0,0.0,,0.0,0.0,0.0,,0.0
0,2022,LSU,SEC,JR,DB,,,,,,,,,
0,Career,LSU,,,,25.0,23.0,260.0,11.3,0.0,0.0,0.0,,0.0


## Testing for a player without Return Stats

In [None]:
url='https://www.sports-reference.com/cfb/players/patrick-surtain-ii-1.html'

In [None]:
page = take_url_return_page(url)
def_table, punt_table = take_page_return_def_and_punts(page)

TypeError: ignored

### Result & potential solution:
So we get a TypeError in our take_page_return_def_and_punts function if they don't have punts.  I believe the solution will be making this function return a 0 if they don't have return stats.  Then we will check if punt_table == 0, we won't run the get_stats function on it.

# Fixing take_page_return_def_and_punts
- we are going to change the function so that now if punt_returns DNE, we will return punt_table == 0

In [None]:
def take_page_return_def_and_punts(page):
  # this function takes a soup page and looks for the table id defense and returns it.  same for the punt_ret table (which is hidden in the comments)
  def_table = page.find('table', {'id': 'defense'})
  # finding where punt return info is located
  all_punt_ret = page.find(id="all_punt_ret")
  # adding an exception.  If all_punt_ret is empty, we return 0
  try:
    # getting the info inside the comments
    punt_ret = all_punt_ret(text=lambda text: isinstance(text, Comment))[0]
    # making the data parsable by BS
    punt_soup=BeautifulSoup(punt_ret, 'html.parser')
    punt_table = punt_soup.find('table', {'id': 'punt_ret'})
    return def_table, punt_table
  except TypeError:
    return def_table, 0

## Testing new version

In [None]:
def_table, punt_table = take_page_return_def_and_punts(page)

In [None]:
def_table

<table class="sortable stats_table" data-cols-to-freeze=",1" id="defense">
<caption>Defense &amp; Fumbles Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="5" data-stat=""></th><th></th>
<th aria-label="" class="over_header center" colspan="5" data-stat="header_tackles">Tackles</th>
<th aria-label="" class="over_header center" colspan="5" data-stat="header_def_int">Def Int</th>
<th aria-label="" class="over_header center" colspan="4" data-stat="header_fumbles">Fumbles</th>
</tr>
<tr>
<th aria-label="Year" class="poptip sort_default_asc right" data-stat="year_id" scope="col">Year</th>
<th aria-label="School" class="poptip sort_default_asc left" data-stat="school_name" data-tip="School Name" scope="col">School</th>
<th aria-label="Conf" class="poptip sort_default_asc left" data-stat="conf_abbr" s

In [None]:
punt_table

0

# Redefining Goal
after exploring the offensive stat pages, I've realized that I can scrape all the pages using the same means, so I'm going to adjust the code below to accomplish that

# Scraping all Pages Offense and Defense

The notebook should be able to be run from this point on:

In [3]:
import requests
import re
from bs4 import BeautifulSoup, Comment
import pandas as pd
import numpy as np

# Defining new Columns

In [4]:
def_columns = ['year', 'school', 'conference', 'class', 'position', 'games', 'solo_tackles', 'assisted_tackles', 'total_tackles', 'tackles_loss', 'sacks', 'interceptions', 'int_yards', 'int_yards_per_int', 'int_td', 'passes_defended', 'fumbles_recovered', 'fumble_yards', 'fumble_td', 'forced_fumbles']
punt_columns = ['year','school','conference','class','position','games','punt_returns','punt_ret_yds','punt_ret_avg','punt_td','kick_returns','kick_ret_yds','kick_ret_avg','kick_ret_td']
pass_columns = ['year','school','conference','class','position','games','completions','attempts','comp_pct','passing_yds','yds_per_att','adj_yds_per_att','passing_td','interceptions','pass_eff_rate']
rush_columns = ['year','school','conference','class','position','games','rush_attempts','rush_yds','rush_avg','rush_td','receptions','rec_yds','rec_avg','rec_td','total_plays','total_yds','total_avg','total_td']
rec_columns = ['year','school','conference','class','position','games','receptions','rec_yds','rec_avg','rec_td','rush_attempts','rush_yds','rush_avg','rush_td','total_plays','total_yds','total_avg','total_td']

In [5]:
all_columns=['year', 'school', 'conference', 'class', 'position', 'games',
             'completions','attempts','comp_pct','passing_yds','yds_per_att','adj_yds_per_att','passing_td','interceptions','pass_eff_rate',
             'rush_attempts','rush_yds','rush_avg','rush_td','receptions','rec_yds','rec_avg','rec_td','total_plays','total_yds','total_avg','total_td',
             'solo_tackles', 'assisted_tackles', 'total_tackles', 'tackles_loss', 'sacks', 'interceptions', 'int_yards', 'int_yards_per_int', 'int_td', 'passes_defended', 'fumbles_recovered', 'fumble_yards', 'fumble_td', 'forced_fumbles'
             'punt_returns','punt_ret_yds','punt_ret_avg','punt_td','kick_returns','kick_ret_yds','kick_ret_avg','kick_ret_td']

# Function: take url and return soup page
same code as before

In [6]:
def take_url_return_page(url):
  # this function takes a url and returns the soup content for it
  r = requests.get(url)
  page = BeautifulSoup(r.content, 'html.parser')
  return page

# Function: take soup return tables

this function will be an adjusted version of the return defense & punts

In [None]:
def take_page_return_tables(page):
  # this function takes a soup page and looks for all the tables
  # we will look for all the tables in every page
  # passing 
  pass_table = page.find('table', {'id':'passing'})
  # rushing
  rush_table = page.find('table', {'id':'rushing'})
  # receiving
  rec_table = page.find('table', {'id':'receiving'})
  # defense
  def_table = page.find('table', {'id': 'defense'})
  
  # punting
  # finding where punt return info is located
  all_punt_ret = page.find(id="all_punt_ret")
  try:
    # getting the info inside the comments
    punt_ret = all_punt_ret(text=lambda text: isinstance(text, Comment))[0]
    # making the data parsable by BS
    punt_soup=BeautifulSoup(punt_ret, 'html.parser')
    punt_table = punt_soup.find('table', {'id': 'punt_ret'})
  except TypeError:
    punt_table = None
  
  # qb rushing
  all_rush = page.find(id="all_rushing")
  try:
    # getting the info inside the comments
    qb_rush = all_rush(text=lambda text: isinstance(text, Comment))[0]
    # making the data parsable by BS
    qb_rush_soup=BeautifulSoup(qb_rush, 'html.parser')
    qb_rush_table = qb_rush_soup.find('table', {'id': 'rushing'})
  except TypeError:
    qb_rush_table = None
  return pass_table, rush_table, rec_table, def_table, punt_table, qb_rush_table
# if the table exists, it should return the table
# if the table does not it will return an null variable

# Function: get stats from tables

So we will be return a mix of tables that either are null or have soup content in them.

we need to:
- check if the table is null or not
-- if null create a dataframe of nulls
-- not null we have the code to return the populated dataframe
- the end goal is dataframes for all different scenarios
-- I think the easiest thing is to always send the same dataframes back
-- and then we can always put them together in the same way, null or not

## updated get stats Function


In [None]:
def get_stats(table, columns):
  # this function takes the stats table and returns a dataframe of the stats
  
  # define the DataFrame
  df = pd.DataFrame(columns=columns)  

  # finds all rows in the table
  rows = table[0].find_all('tr')

  # this counter will be used along with total_games to get the total games played in the final row of the dataframe.
  row_counter = 3
  total_games = 0

  # loop over each row in the table, skipping over header rows (0,1)
  for row in rows[2:]:
    # creating a list where we will store the stats for each row.  We will then append this list to our df
    stat_list = []
    # this part is for the first rows of our table.  these rows contain the year in an 'a' tag
    if row_counter<(len(rows)):
      # creating cells which contains all the relevant stats inside td tags, which we pull using text.
      cells = row.find_all('td')
      # this variable will be used to find cell 5.  cell 5 contains the total games.
      cell_counter=0
      # appending the year to our list... the year is contained in an 'a' tag
      stat_list.append(row.find('a').text)
      # loop for pulling all the relevant stats from cells
      for cell in cells:
        # adding a section so the last line, career will also have the conf, class and pos
        if cell_counter == 1:
          stat_list.append(cell.text)
          conf = cell.text
          cell_counter +=1
        elif cell_counter == 2:
          stat_list.append(cell.text)
          clas = cell.text
          cell_counter +=1
        elif cell_counter == 3:
          stat_list.append(cell.text)
          position = cell.text
          cell_counter +=1

        # when the cell count is 4, that cell contains the games played... we want to total this for use in the last column 
        elif cell_counter == 4:
          games = cell.text
          # try except for catch nulls error
          try:
            games = int(games)
            total_games += games
          except ValueError:
            total_games += 0
          stat_list.append(games)
          cell_counter +=1
        # this is for all the other cells
        else:
          stat_list.append(cell.text)
          cell_counter +=1
        
    # this part is for the last row of our table we are scrapping. it contains career stats
    else:
      cells = row.find_all('td')
      cell_counter=0
      # this last column does not contain year.
      stat_list.append('Career')
      for cell in cells:
        if cell_counter == 1:
          stat_list.append(conf)
          cell_counter +=1
        elif cell_counter == 2:
          stat_list.append(clas)
          cell_counter +=1
        elif cell_counter == 3:
          stat_list.append(position)
          cell_counter +=1
        # cell 4 contains total games
        elif cell_counter == 4:
          stat_list.append(total_games)
          cell_counter +=1
        else:
          stat_list.append(cell.text)
          cell_counter +=1
    df = df.append(pd.DataFrame([stat_list], columns=columns))
    row_counter+=1
  return df

## Function: test_tables then get stats

In [None]:
def test_tables_then_get_stats(pass_table, rush_table, rec_table, def_table, punt_table, qb_rush_table, pass_columns, rush_columns, rec_columns, def_columns, punt_columns):
  if pass_table != None:
    print('Dave')
    pass_df = get_stats(pass_table, pass_columns)
    print('Mike')
    rush_df = get_stats(qb_rush_table, rush_columns)
    rec_df= None
    def_df = None
    punt_df = None
  elif rush_table != None:
    pass_df = None
    rush_df = get_stats(rush_table, rush_columns)
    rec_df = None
    def_df = None
    if punt_table != None:
      punt_df = get_stats(punt_table, punt_columns)
    else:
      punt_df = None
  elif rec_table != None:
    pass_df = None
    rush_df = None
    rec_df = get_stats(rec_table, rec_columns)
    def_df = None
    if punt_table != None:
      punt_df = get_stats(punt_table, punt_columns)
    else:
      punt_df = None
  else:
    pass_df = None
    rush_df = None
    rec_df = None
    def_df = get_stats(def_table, def_columns)
    if punt_table != None:
      punt_df = get_stats(punt_table, punt_columns)
    else:
      punt_df = None
  return pass_df, rush_df, rec_df, def_df, punt_df


## Working version Get_Stats

In [29]:
def get_stats(table, columns):
  # this function takes the stats table and returns a dataframe of the stats
  
  # define the DataFrame
  df = pd.DataFrame(columns=columns)  

  # finds all rows in the table
  rows = table[0].find_all('tr')

  # this counter will be used along with total_games to get the total games played in the final row of the dataframe and to index the dataframe to exit the loop.
  row_counter = 0
  total_games = 0

  # loop over each row in the table, skipping over header rows (0,1)
  for row in rows[2:]:
    # creating a list where we will store the stats for each row.  We will then append this list to our df
    stat_list = []
    
    if row.find('th').text == 'Career':
      stat_list.append(row.find('th').text)
    else:
    # this part is for the first rows of our table.  these rows contain the year in an 'a' tag
      try: 
        stat_list.append(row.find('a').text)
      except AttributeError:
        # after year values, we find the text 'Career' inside a th tag.
        try:
          stat_list.append(row.find('th').text)
        # if neither 'a' nor 'th' tags are present
        except AttributeError:
          break
        
    # cells will contain all the data inside our table td tags
    cells = row.find_all('td')
    
    # counter to be used for total games
    cell_counter=0
      
    # loop for pulling all the relevant stats from cells
    for cell in cells:
      # when the cell count is 4, that cell contains the games played... we want to total this for use in the last row
      if cell_counter == 4:
        games = cell.text
        # try/except for catch nulls error
        try:
          games = int(games)
          total_games += games
        except ValueError:
          total_games += 0
        stat_list.append(games)
        cell_counter +=1
      
      # this is for all the other cells
      else:
        stat_list.append(cell.text)
        cell_counter +=1
    
    df = df.append(pd.DataFrame([stat_list], columns=columns))
    # this is our break to stop the issue when a player has multiple colleges and they are tallied AFTER career
    if df.iloc[row_counter,0]=='Career':
      for i in range(2,5):
        df.iloc[row_counter,i]= df.iloc[row_counter-1,i]
      df.iloc[row_counter,5] = total_games
      break
    row_counter+=1
  return df

# Testing Functions

In [None]:
url = 'https://www.sports-reference.com/cfb/players/trevor-lawrence-1.html'

In [None]:
page = take_url_return_page(url)
pass_table, rush_table, rec_table, def_table, punt_table, qb_rush_table = take_page_return_tables(page)
#pass_df, rush_df, rec_df, def_df, punt_df = test_tables_then_get_stats(pass_table, rush_table, rec_table, def_table, punt_table, qb_rush_table, pass_columns, rush_columns, rec_columns, def_columns, punt_columns)


In [None]:
pass_table

<table class="sortable stats_table" data-cols-to-freeze=",1" id="passing">
<caption>Passing Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="5" data-stat=""></th><th></th>
<th aria-label="" class="over_header center" colspan="9" data-stat="header_passing">Passing</th>
</tr>
<tr>
<th aria-label="Year" class="poptip sort_default_asc right" data-stat="year_id" scope="col">Year</th>
<th aria-label="School" class="poptip sort_default_asc left" data-stat="school_name" data-tip="School Name" scope="col">School</th>
<th aria-label="Conf" class="poptip sort_default_asc left" data-stat="conf_abbr" scope="col">Conf</th>
<th aria-label="Class" class="poptip sort_default_asc center" data-stat="class" scope="col">Class</th>
<th aria-label="Pos" class="poptip sort_default_asc center" data-stat="pos" data-tip="Position" scope="col">Pos</t

In [None]:
def_df

Unnamed: 0,year,school,conference,class,position,games,solo_tackles,assisted_tackles,total_tackles,tackles_loss,sacks,interceptions,int_yards,int_yards_per_int,int_td,passes_defended,fumbles_recovered,fumble_yards,fumble_td,forced_fumbles
0,2019,LSU,SEC,FR,CB,15.0,31.0,7.0,38.0,1.0,0.0,6.0,17.0,2.8,0.0,15.0,1.0,,,0.0
0,2020,LSU,SEC,SO,CB,7.0,19.0,8.0,27.0,2.5,0.0,0.0,0.0,,0.0,5.0,1.0,,,1.0
0,2021,LSU,SEC,JR,CB,3.0,6.0,2.0,8.0,3.5,0.0,0.0,0.0,,0.0,0.0,0.0,,,1.0
0,2022,LSU,SEC,JR,DB,,,,,,,,,,,,,,,
0,Career,LSU,,,,25.0,56.0,17.0,73.0,7.0,0.0,6.0,17.0,2.8,0.0,20.0,2.0,,,2.0


In [None]:
punt_df

Unnamed: 0,year,school,conference,class,position,games,punt_returns,punt_ret_yds,punt_ret_avg,punt_td,kick_returns,kick_ret_yds,kick_ret_avg,kick_ret_td
0,2019,LSU,SEC,FR,CB,15.0,17.0,163.0,9.6,0.0,0.0,0.0,,0.0
0,2020,LSU,SEC,SO,CB,7.0,6.0,97.0,16.2,0.0,0.0,0.0,,0.0
0,2021,LSU,SEC,JR,CB,3.0,0.0,0.0,,0.0,0.0,0.0,,0.0
0,2022,LSU,SEC,JR,DB,,,,,,,,,
0,Career,LSU,,,,25.0,23.0,260.0,11.3,0.0,0.0,0.0,,0.0


In [None]:
pass_df = get_stats(pass_table, pass_columns)

  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))
  df = df.append(pd.DataFrame([stat_list], columns=columns))


AttributeError: ignored

In [None]:
pass_table

<table class="sortable stats_table" data-cols-to-freeze=",1" id="passing">
<caption>Passing Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="5" data-stat=""></th><th></th>
<th aria-label="" class="over_header center" colspan="9" data-stat="header_passing">Passing</th>
</tr>
<tr>
<th aria-label="Year" class="poptip sort_default_asc right" data-stat="year_id" scope="col">Year</th>
<th aria-label="School" class="poptip sort_default_asc left" data-stat="school_name" data-tip="School Name" scope="col">School</th>
<th aria-label="Conf" class="poptip sort_default_asc left" data-stat="conf_abbr" scope="col">Conf</th>
<th aria-label="Class" class="poptip sort_default_asc center" data-stat="class" scope="col">Class</th>
<th aria-label="Pos" class="poptip sort_default_asc center" data-stat="pos" data-tip="Position" scope="col">Pos</t

# New Approach

In [23]:
df_org = pd.read_csv('https://raw.githubusercontent.com/michaelseman/Portfolio/main/PythonML_NFL_Draft/BioPlusConfBMI.csv')

In [24]:
df_org.head()

Unnamed: 0,year,round,draft_pick,draft_team,player,player_url,position,age,college,college_stats_url,...,bday,hs,height,weight,height_decimal,birthplace_town,birthplace_state,home_state,conference,bmi
0,2022,1,1,JAX,Travon Walker,https://www.pro-football-reference.com/players...,DE,21.0,Georgia,http://www.sports-reference.com/cfb/players/tr...,...,12/18/2000,"Upson,GA",6'5,275.0,6.42,Thomaston,"GA,",GA,SEC,32.6
1,2022,1,2,DET,Aidan Hutchinson,https://www.pro-football-reference.com/players...,DE,22.0,Michigan,http://www.sports-reference.com/cfb/players/ai...,...,8/9/2000,"Divine Child,MI",6'7,265.0,6.58,MI,,MI,BIG10,29.9
2,2022,1,3,HOU,Derek Stingley Jr.,https://www.pro-football-reference.com/players...,CB,21.0,LSU,http://www.sports-reference.com/cfb/players/de...,...,6/20/2001,"Dunham,LA",6'1,195.0,6.08,Baton Rouge,"LA,",LA,SEC,25.8
3,2022,1,4,NYJ,Sauce Gardner,https://www.pro-football-reference.com/players...,CB,22.0,Cincinnati,http://www.sports-reference.com/cfb/players/ah...,...,8/31/2000,"Martin Luther King,MI",6'3,200.0,6.25,Detroit,"MI,",MI,AAC,25.0
4,2022,1,5,NYG,Kayvon Thibodeaux,https://www.pro-football-reference.com/players...,DE,21.0,Oregon,http://www.sports-reference.com/cfb/players/ka...,...,12/15/2000,"Susan Miller Dorsey,CA,Oaks Christian,CA",6'5,250.0,6.42,CA,,CA,PAC12,29.6


In [25]:
df_test = df_org.head(5).copy()

In [12]:
df_test.head()

Unnamed: 0,year,round,draft_pick,draft_team,player,player_url,position,age,college,college_stats_url,...,bday,hs,height,weight,height_decimal,birthplace_town,birthplace_state,home_state,conference,bmi
0,2022,1,1,JAX,Travon Walker,https://www.pro-football-reference.com/players...,DE,21.0,Georgia,http://www.sports-reference.com/cfb/players/tr...,...,12/18/2000,"Upson,GA",6'5,275.0,6.42,Thomaston,"GA,",GA,SEC,32.6
1,2022,1,2,DET,Aidan Hutchinson,https://www.pro-football-reference.com/players...,DE,22.0,Michigan,http://www.sports-reference.com/cfb/players/ai...,...,8/9/2000,"Divine Child,MI",6'7,265.0,6.58,MI,,MI,BIG10,29.9
2,2022,1,3,HOU,Derek Stingley Jr.,https://www.pro-football-reference.com/players...,CB,21.0,LSU,http://www.sports-reference.com/cfb/players/de...,...,6/20/2001,"Dunham,LA",6'1,195.0,6.08,Baton Rouge,"LA,",LA,SEC,25.8
3,2022,1,4,NYJ,Sauce Gardner,https://www.pro-football-reference.com/players...,CB,22.0,Cincinnati,http://www.sports-reference.com/cfb/players/ah...,...,8/31/2000,"Martin Luther King,MI",6'3,200.0,6.25,Detroit,"MI,",MI,AAC,25.0
4,2022,1,5,NYG,Kayvon Thibodeaux,https://www.pro-football-reference.com/players...,DE,21.0,Oregon,http://www.sports-reference.com/cfb/players/ka...,...,12/15/2000,"Susan Miller Dorsey,CA,Oaks Christian,CA",6'5,250.0,6.42,CA,,CA,PAC12,29.6


In [22]:
df_org['position'].unique()

array(['DE', 'CB', 'T', 'OL', 'WR', 'DT', 'S', 'G', 'QB', 'LB', 'DB',
       'RB', 'TE', 'OLB', 'K', 'P', 'DL', 'C', 'FB', 'LS', 'ILB'],
      dtype=object)

In [None]:
defense = ['DE','CB','DT','LB','DB','OLB','DL','ILB']
offense = ['WR','RB','TE','FB']

In [20]:
for index, row in df_test.iterrows():
  url, position = row['college_stats_url'], row['position']
  

http://www.sports-reference.com/cfb/players/travon-walker-1.html DE
http://www.sports-reference.com/cfb/players/aidan-hutchinson-1.html DE
http://www.sports-reference.com/cfb/players/derek-stingley-jr-1.html CB
http://www.sports-reference.com/cfb/players/ahmad-gardner-1.html CB
http://www.sports-reference.com/cfb/players/kayvon-thibodeaux-1.html DE
