In [140]:
#imports

from bs4 import BeautifulSoup
import urllib
import re
import pandas as pd
import numpy as np

In [141]:
#this block of code extracts team names that will be used for urls

teams_url = 'http://www.footballdb.com/teams/index.html'
teams_page = urllib.urlopen(teams_url)

In [142]:
#preview of the html

teams_soup = BeautifulSoup(teams_page, 'html.parser')
print(teams_soup.prettify()[0:1000])

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="initial-scale=1.0, width=device-width" name="viewport"/>
  <title>
   Teams | The Football Database
  </title>
  <meta content="View stats and statistics, rosters, results, draft results and transactions for current and defunct NFL teams" name="description"/>
  <meta content="index,follow" name="robots"/>
  <meta content="FootballDB.com" property="og:site_name">
   <meta content="Teams | The Football Database" property="og:title"/>
   <meta content="website" property="og:type"/>
   <meta content="View stats and statistics, rosters, results, draft results and transactions for current and defunct NFL teams" property="og:description"/>
   <meta content="http://www.footballdb.com/images/footballdb_200x200.png" property="og:image"/>
   <meta content="http://www.footballdb.com/teams/index.html" property="og:url"/>
   <meta content="summary" name="twitte

In [143]:
#list with html tags

teams_list = teams_soup.find_all('a', attrs = {'href': re.compile('/teams/nfl/')})[0:31]
teams_list

[<a href="/teams/nfl/buffalo-bills">Buffalo Bills</a>,
 <a href="/teams/nfl/miami-dolphins">Miami Dolphins</a>,
 <a href="/teams/nfl/new-england-patriots">New England Patriots</a>,
 <a href="/teams/nfl/new-york-jets">New York Jets</a>,
 <a href="/teams/nfl/baltimore-ravens">Baltimore Ravens</a>,
 <a href="/teams/nfl/cincinnati-bengals">Cincinnati Bengals</a>,
 <a href="/teams/nfl/cleveland-browns">Cleveland Browns</a>,
 <a href="/teams/nfl/pittsburgh-steelers">Pittsburgh Steelers</a>,
 <a href="/teams/nfl/houston-texans">Houston Texans</a>,
 <a href="/teams/nfl/indianapolis-colts">Indianapolis Colts</a>,
 <a href="/teams/nfl/jacksonville-jaguars">Jacksonville Jaguars</a>,
 <a href="/teams/nfl/tennessee-titans">Tennessee Titans</a>,
 <a href="/teams/nfl/denver-broncos">Denver Broncos</a>,
 <a href="/teams/nfl/kansas-city-chiefs">Kansas City Chiefs</a>,
 <a href="/teams/nfl/los-angeles-chargers">Los Angeles Chargers</a>,
 <a href="/teams/nfl/oakland-raiders">Oakland Raiders</a>,
 <a href

In [144]:
#list wihtout html

teams_list_clean = []

for i in range(0, len(teams_list)):
    teams_list_clean.append(re.sub('<[^<]+?>', '', str(teams_list[i])).lower().replace(' ', '-'))
    
teams_list_clean

['buffalo-bills',
 'miami-dolphins',
 'new-england-patriots',
 'new-york-jets',
 'baltimore-ravens',
 'cincinnati-bengals',
 'cleveland-browns',
 'pittsburgh-steelers',
 'houston-texans',
 'indianapolis-colts',
 'jacksonville-jaguars',
 'tennessee-titans',
 'denver-broncos',
 'kansas-city-chiefs',
 'los-angeles-chargers',
 'oakland-raiders',
 'dallas-cowboys',
 'new-york-giants',
 'philadelphia-eagles',
 'washington-redskins',
 'chicago-bears',
 'detroit-lions',
 'green-bay-packers',
 'minnesota-vikings',
 'atlanta-falcons',
 'carolina-panthers',
 'new-orleans-saints',
 'tampa-bay-buccaneers',
 'arizona-cardinals',
 'los-angeles-rams',
 'san-francisco-49ers']

In [145]:
#instantiate dataframes. the columns are what is available on the website of interest

passing_cols = ['player', 'pass_attempts', 'completions', 'compl_percent', 'passing_yards', 'yards_per_pass', 'passing_touchdowns', 'touchdown_percentage', 'interceptions', 'int_percentage', 'long', 'sack', 'loss', 'passer_rating']
rushing_cols = ['player', 'games_played', 'rush_attempts', 'rush_yards', 'yards_per_rush', 'yards_per_game', 'long', 'rush_touchdowns', 'first_downs']
receiving_cols = ['player', 'games', 'receptions', 'receiving_yards', 'yards_per_catch', 'yards_per_game', 'long', 'receiving_touchdowns', 'first_downs', 'targets', 'yards_after_catch']
kick_returns_cols = ['player', 'kick_returns', 'kick_return_yards', 'kick_return_average', 'fair_catches', 'long', 'touchdowns']
punt_returns_cols = ['player', 'punt_returns', 'punt_return_yards', 'punt_return_average', 'fair_catches', 'long', 'touchdowns']
punts_cols = ['player', 'punts', 'yards', 'average', 'long', 'touchbacks', 'inside_twenty', 'out_of_bounds', 'fair_catches', 'downed', 'blocked', 'net', 'return', 'return_yards', 'touchdowns']
kicking_cols = ['player', 'pat', 'field_goals', '0_19', '20_29', '30-39', '40-49', '50_plus', 'long', 'points']
kickoffs_cols = ['player', 'number', 'yards', 'average', 'long', 'touchbacks', 'out_of_bounds', 'returns', 'return_yards', 'touchdowns', 'osk', 'osr']
defense_cols = ['player', 'interceptions', 'yards', 'average', 'long', 'touchdowns', 'solo_tackles', 'assists',  'total_tackles', 'sacks', 'yards_for_loss']

passing_df = pd.DataFrame(columns = passing_cols)
rushing_df = pd.DataFrame(columns = rushing_cols)
receiving_df = pd.DataFrame(columns = receiving_cols)
kick_returns_df = pd.DataFrame(columns = kick_returns_cols)
punt_returns_df = pd.DataFrame(columns = punt_returns_cols)
punts_df = pd.DataFrame(columns = punts_cols)
kicking_df = pd.DataFrame(columns = kicking_cols)
kickoffs_df = pd.DataFrame(columns = kickoff_cols)
defense_df = pd.DataFrame(columns = defense_cols)

In [146]:
#all stats

for i in range(0, len(teams_list_clean)):
    url = 'http://www.footballdb.com/teams/nfl/' + teams_list_clean[i] + '/stats'
    page = urllib.urlopen(url)
    soup = BeautifulSoup(page, 'html.parser')
    all = soup.find_all('td')
    
    #passing
    
    passing_html = soup.find('div', attrs = {'class': 'divToggle_offense', 'id': 'divToggle_P'})
    passing_players = str(passing_html).count('/players/') / 2
    passing_count = int(14 * ((str(passing_html).count('/players/') / 2) + 2))
    
    passing = all[0:passing_count]
    passing_clean = []
    
    for x in range(0, len(passing)):
        passing_clean.append(re.sub('<[^<]+?>', '', str(passing[x])))
    
    passing_df2 = pd.DataFrame(np.array(passing_clean).reshape(passing_players + 2, len(passing_cols)), columns = passing_cols)
    passing_df2['team'] = teams_list_clean[i]
    passing_df = passing_df.append(passing_df2)
    
    print('successfuly imported passing stats for the ' + teams_list_clean[i])
    
    #rushing
    
    rushing_html = soup.find('div', attrs = {'class': 'divToggle_offense', 'id': 'divToggle_R'})
    rushing_players = str(rushing_html).count('/players/') / 2
    rushing_count = int(9 * ((str(rushing_html).count('/players/') / 2) + 2))
    
    rushing = all[passing_count:rushing_count + passing_count] #update this
    rushing_clean = []
    
    for x in range(0, len(rushing)):
        rushing_clean.append(re.sub('<[^<]+?>', '', str(rushing[x])))
        
    rushing_df2 = pd.DataFrame(np.array(rushing_clean).reshape(rushing_players + 2, len(rushing_cols)), columns = rushing_cols)
    rushing_df2['team'] = teams_list_clean[i]
    rushing_df = rushing_df.append(rushing_df2)
    
    print('successfuly imported rushing stats for the ' + teams_list_clean[i])
    
    #receiving
    
    receiving_html = soup.find('div', attrs = {'class': 'divToggle_offense', 'id': 'divToggle_C'})
    receiving_players = str(receiving_html).count('/players/') / 2
    receiving_count = int(11 * ((str(receiving_html).count('/players/') / 2) + 2))
    
    receiving = all[passing_count + rushing_count:receiving_count + rushing_count + passing_count]
    receiving_clean = []
    
    for x in range(0, len(receiving)):
        receiving_clean.append(re.sub('<[^<]+?>', '', str(receiving[x])))

    receiving_df2 = pd.DataFrame(np.array(receiving_clean).reshape(receiving_players + 2, len(receiving_cols)), columns = receiving_cols)
    receiving_df2['team'] = teams_list_clean[i]
    receiving_df = receiving_df.append(receiving_df2)
    
    print('successfuly imported receiving stats for the ' + teams_list_clean[i])
    
    #kick returns
    
    kick_returns_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_KR'})
    kick_returns_players = str(kick_returns_html).count('/players/') / 2
    kick_returns_count = int(7 * ((str(kick_returns_html).count('/players/') / 2) + 2))
    
    kick_returns = all[passing_count + rushing_count + receiving_count:kick_returns_count + receiving_count + rushing_count + passing_count]
    kick_returns_clean = []
    
    for x in range(0, len(kick_returns)):
        kick_returns_clean.append(re.sub('<[^<]+?>', '', str(kick_returns[x])))

    kick_returns_df2 = pd.DataFrame(np.array(kick_returns_clean).reshape(kick_returns_players + 2, len(kick_returns_cols)), columns = kick_returns_cols)
    kick_returns_df2['team'] = teams_list_clean[i]
    kick_returns_df = kick_returns_df.append(kick_returns_df2)
    
    print('successfuly imported kick return stats for the ' + teams_list_clean[i])
    
    #punt returns
    
    punt_returns_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_PR'})
    punt_returns_players = str(punt_returns_html).count('/players/') / 2
    punt_returns_count = int(7 * ((str(punt_returns_html).count('/players/') / 2) + 2))
    
    punt_returns = all[passing_count + rushing_count + receiving_count + kick_returns_count:punt_returns_count + kick_returns_count + receiving_count + rushing_count + passing_count]
    punt_returns_clean = []
    
    for x in range(0, len(punt_returns)):
        punt_returns_clean.append(re.sub('<[^<]+?>', '', str(punt_returns[x])))
        
    punt_returns_df2 = pd.DataFrame(np.array(punt_returns_clean).reshape(punt_returns_players + 2, len(punt_returns_cols)), columns = punt_returns_cols)
    punt_returns_df2['team'] = teams_list_clean[i]
    punt_returns_df = punt_returns_df.append(punt_returns_df2)
    
    print('successfuly imported punt return stats for the ' + teams_list_clean[i])
    
    #punting
    
    punts_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_PR'})
    punts_players = str(punt_returns_html).count('/players/') / 2
    punts_count = int(15 * ((str(punt_returns_html).count('/players/') / 2) + 2))
    
    punts = all[passing_count + rushing_count + receiving_count + kick_returns_count + punt_returns_count:punts_count + punt_returns_count + kick_returns_count + receiving_count + rushing_count + passing_count]
    punts_clean = []
    
    for x in range(0, len(punts)):
        punts_clean.append(re.sub('<[^<]+?>', '', str(punts[x])))
        
    punts_df2 = pd.DataFrame(np.array(punts_clean).reshape(punts_players + 2, len(punts_cols)), columns = punts_cols)
    punts_df2['team'] = teams_list_clean[i]
    punts_df = punts_df.append(punts_df2)
    
    print('successfuly imported punting stats for the ' + teams_list_clean[i])
    
    #kicking
    
    kicking_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_U'})
    kicking_players = str(kicking_html).count('/players/') / 2
    kicking_count = int(10 * ((str(kicking_html).count('/players/') / 2) + 2))
    
    kicking = all[passing_count + rushing_count + receiving_count + kick_returns_count + punt_returns_count + punts_count:kicking_count + punts_count + punt_returns_count + kick_returns_count + receiving_count + rushing_count + passing_count]
    kicking_clean = []
    
    for x in range(0, len(kicking)):
        kicking_clean.append(re.sub('<[^<]+?>', '', str(kicking[x])))
        
    kicking_df2 = pd.DataFrame(np.array(kicking_clean).reshape(kicking_players + 2, len(kicking_cols)), columns = kicking_cols)
    kicking_df2['team'] = teams_list_clean[i]
    kicking_df = kicking_df.append(kicking_df2)
    
    print('successfuly imported kicking stats for the ' + teams_list_clean[i])
    
    #kickoffs
    
    kickoffs_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_O'})
    kickoffs_players = str(kickoffs_html).count('/players/') / 2
    kickoffs_count = int(12 * ((str(kickoffs_html).count('/players/') / 2) + 2))
    
    kickoffs = all[passing_count + rushing_count + receiving_count + kick_returns_count + punt_returns_count + punts_count + kicking_count:kickoffs_count + kicking_count + punts_count + punt_returns_count + kick_returns_count + receiving_count + rushing_count + passing_count]
    kickoffs_clean = []
    
    for x in range(0, len(kickoffs)):
        kickoffs_clean.append(re.sub('<[^<]+?>', '', str(kickoffs[x])))
        
    kickoffs_df2 = pd.DataFrame(np.array(kickoffs_clean).reshape(kickoffs_players + 2, len(kickoffs_cols)), columns = kickoffs_cols)
    kickoffs_df2['team'] = teams_list_clean[i]
    kickoffs_df = kickoffs_df.append(kickoffs_df2)
    
    print('successfuly imported kickoff stats for the ' + teams_list_clean[i])
    
    #defense
    
    defense_html = soup.find('div', attrs = {'class': 'divToggle_defense hidden-xs', 'id': 'divToggle_D'})
    defense_players = str(defense_html).count('/players/') / 2
    defense_count = int(11 * ((str(defense_html).count('/players/') / 2) + 2))
    
    defense = all[passing_count + rushing_count + receiving_count + kick_returns_count + punt_returns_count + punts_count + kicking_count + kickoffs_count:defense_count + kickoffs_count + kicking_count + punts_count + punt_returns_count + kick_returns_count + receiving_count + rushing_count + passing_count]
    defense_clean = []
    
    for x in range(0, len(defense)):
        defense_clean.append(re.sub('<[^<]+?>', '', str(defense[x])))
        
    defense_df2 = pd.DataFrame(np.array(defense_clean).reshape(defense_players + 2, len(defense_cols)), columns = defense_cols)
    defense_df2['team'] = teams_list_clean[i]
    defense_df = defense_df.append(defense_df2)
    
    print('successfuly imported defense stats for the ' + teams_list_clean[i])

successfuly imported passing stats for the buffalo-bills
successfuly imported rushing stats for the buffalo-bills
successfuly imported receiving stats for the buffalo-bills
successfuly imported kick return stats for the buffalo-bills
successfuly imported punt return stats for the buffalo-bills
successfuly imported punting stats for the buffalo-bills
successfuly imported kicking stats for the buffalo-bills
successfuly imported kickoff stats for the buffalo-bills
successfuly imported defense stats for the buffalo-bills
successfuly imported passing stats for the miami-dolphins
successfuly imported rushing stats for the miami-dolphins
successfuly imported receiving stats for the miami-dolphins
successfuly imported kick return stats for the miami-dolphins
successfuly imported punt return stats for the miami-dolphins
successfuly imported punting stats for the miami-dolphins
successfuly imported kicking stats for the miami-dolphins
successfuly imported kickoff stats for the miami-dolphins
suc

In [None]:
#output to csv files

passing_df.to_csv('nfl_passing.csv')
rushing_df.to_csv('nfl.rushing.csv')
receiving_df.to_csv('nfl_receiving.csv')
kick_returns_df.to_csv('nfl_kick_returns.csv')
punt_returns_df.to_csv('nfl_punt_returns.csv')
kicking_df.to_csv('nfl_kicking.csv')
kickoffs_df.to_csv('nfl_kickoff.csv')
defense_df.to_csv('nfl_defense.csv')

In [104]:
#all code below was used for testing (more output to understand what is going on)

url = 'http://www.footballdb.com/teams/nfl/san-francisco-49ers/stats'

In [105]:
page = urllib.urlopen(url)

In [106]:
soup = BeautifulSoup(page, 'html.parser')
print(soup.prettify()[0:1000])

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="initial-scale=1.0, width=device-width" name="viewport"/>
  <title>
   San Francisco 49ers Statistics | The Football Database
  </title>
  <meta content="San Francisco 49ers stats and statistics for the 2017 NFL season, including rushing, passing, receiving, kickoff returns, punt returns, punting, kicking and defense" name="description"/>
  <meta content="index,follow" name="robots"/>
  <meta content="FootballDB.com" property="og:site_name">
   <meta content="San Francisco 49ers Statistics | The Football Database" property="og:title"/>
   <meta content="website" property="og:type"/>
   <meta content="San Francisco 49ers stats and statistics for the 2017 NFL season, including rushing, passing, receiving, kickoff returns, punt returns, punting, kicking and defense" property="og:description"/>
   <meta content="http://www.footballdb.com/images/footbal

In [107]:
passing_html = soup.find('div', attrs = {'class': 'divToggle_offense', 'id': 'divToggle_P'})
passing_html

<div class="divToggle_offense" id="divToggle_P">\n<a name="p"></a>\n<h2>Passing</h2>\n<table class="statistics scrollable">\n<thead>\n<tr class="header right">\n<th class="left" width="120">Player</th>\n<th>Att</th>\n<th>Cmp</th>\n<th>Pct</th>\n<th>Yds</th>\n<th>YPA</th>\n<th>TD</th>\n<th>TD%</th>\n<th>Int</th>\n<th>Int%</th>\n<th>Lg</th>\n<th>Sack</th>\n<th>Loss</th>\n<th>Rate</th>\n</tr>\n</thead>\n<tbody>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/cj-beathard-beathcj01">C.J. Beathard</a></span><span class="visible-xs"><a href="/players/cj-beathard-beathcj01">C.\xa0Beathard</a></span></td><td>224</td><td>123</td><td>54.9</td><td>1,430</td><td>6.4</td><td>4</td><td>1.8</td><td>6</td><td>2.7</td><td>83t</td><td>19</td><td>141</td><td>69.2</td></tr>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/jimmy-garoppolo-garopji01">Jimmy Garoppolo</a></span><span class="visible-xs"><a href="/players/jimmy-garoppolo-ga

In [108]:
#calculate number of players and total values

passing_players = str(passing_html).count('/players/') / 2
passing_count = int(14 * ((str(passing_html).count('/players/') / 2) + 2))
passing_count

70

In [109]:
print(type(passing_count))

<type 'int'>


In [110]:
all = soup.find_all('td')
passing = all[0:passing_count]
passing

[<td class="left"><span class="hidden-xs"><a href="/players/cj-beathard-beathcj01">C.J. Beathard</a></span><span class="visible-xs"><a href="/players/cj-beathard-beathcj01">C.\xa0Beathard</a></span></td>,
 <td>224</td>,
 <td>123</td>,
 <td>54.9</td>,
 <td>1,430</td>,
 <td>6.4</td>,
 <td>4</td>,
 <td>1.8</td>,
 <td>6</td>,
 <td>2.7</td>,
 <td>83t</td>,
 <td>19</td>,
 <td>141</td>,
 <td>69.2</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/jimmy-garoppolo-garopji01">Jimmy Garoppolo</a></span><span class="visible-xs"><a href="/players/jimmy-garoppolo-garopji01">J.\xa0Garoppolo</a></span></td>,
 <td>145</td>,
 <td>100</td>,
 <td>69.0</td>,
 <td>1,268</td>,
 <td>8.7</td>,
 <td>5</td>,
 <td>3.4</td>,
 <td>3</td>,
 <td>2.1</td>,
 <td>61</td>,
 <td>8</td>,
 <td>57</td>,
 <td>98.9</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/brian-hoyer-hoyerbr01">Brian Hoyer</a></span><span class="visible-xs"><a href="/players/brian-hoyer-hoyerbr01">B.\xa0Hoyer</a></span><

In [111]:
rushing_html = soup.find('div', attrs = {'class': 'divToggle_offense', 'id': 'divToggle_R'})
rushing_html

<div class="divToggle_offense" id="divToggle_R">\n<a name="r"></a>\n<h2>Rushing</h2>\n<table class="statistics scrollable">\n<thead>\n<tr class="header right"><th class="left" width="120">Player</th><th>Gms</th><th>Att</th><th>Yds</th><th>Avg</th><th>YPG</th><th>Lg</th><th>TD</th><th>FD</th></tr>\n</thead>\n<tbody>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/carlos-hyde-hydeca01">Carlos Hyde</a></span><span class="visible-xs"><a href="/players/carlos-hyde-hydeca01">C.\xa0Hyde</a></span></td><td>15</td><td>225</td><td>850</td><td>3.78</td><td>56.7</td><td>61</td><td>6</td><td>39</td></tr>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/matt-breida-breidma01">Matt Breida</a></span><span class="visible-xs"><a href="/players/matt-breida-breidma01">M.\xa0Breida</a></span></td><td>15</td><td>93</td><td>393</td><td>4.23</td><td>26.2</td><td>33t</td><td>2</td><td>18</td></tr>\n<tr class="row0 right">\n<td class="left

In [112]:
rushing_players = str(rushing_html).count('/players/') / 2
rushing_count = int(9 * ((str(rushing_html).count('/players/') / 2) + 2))
rushing_count

90

In [113]:
rushing = all[passing_count:rushing_count + passing_count]
rushing

[<td class="left"><span class="hidden-xs"><a href="/players/carlos-hyde-hydeca01">Carlos Hyde</a></span><span class="visible-xs"><a href="/players/carlos-hyde-hydeca01">C.\xa0Hyde</a></span></td>,
 <td>15</td>,
 <td>225</td>,
 <td>850</td>,
 <td>3.78</td>,
 <td>56.7</td>,
 <td>61</td>,
 <td>6</td>,
 <td>39</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/matt-breida-breidma01">Matt Breida</a></span><span class="visible-xs"><a href="/players/matt-breida-breidma01">M.\xa0Breida</a></span></td>,
 <td>15</td>,
 <td>93</td>,
 <td>393</td>,
 <td>4.23</td>,
 <td>26.2</td>,
 <td>33t</td>,
 <td>2</td>,
 <td>18</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/cj-beathard-beathcj01">C.J. Beathard</a></span><span class="visible-xs"><a href="/players/cj-beathard-beathcj01">C.\xa0Beathard</a></span></td>,
 <td>7</td>,
 <td>26</td>,
 <td>136</td>,
 <td>5.23</td>,
 <td>19.4</td>,
 <td>16</td>,
 <td>3</td>,
 <td>9</td>,
 <td class="left"><span class="hidden-xs"><a href

In [114]:
receiving_html = soup.find('div', attrs = {'class': 'divToggle_offense', 'id': 'divToggle_C'})
receiving_html

<div class="divToggle_offense" id="divToggle_C">\n<a name="c"></a>\n<h2>Receiving</h2>\n<table class="statistics scrollable">\n<thead>\n<tr class="header right"><th class="left" width="120">Player</th><th>Gms</th><th>Rec</th><th>Yds</th><th>Avg</th><th>YPG</th><th>Lg</th><th>TD</th><th>FD</th><th>Tar</th><th>YAC</th></tr>\n</thead>\n<tbody>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/carlos-hyde-hydeca01">Carlos Hyde</a></span><span class="visible-xs"><a href="/players/carlos-hyde-hydeca01">C.\xa0Hyde</a></span></td><td>15</td><td>57</td><td>340</td><td>5.96</td><td>22.7</td><td>18</td><td>0</td><td>17</td><td>84</td><td>304</td></tr>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/marquise-goodwin-goodwma02">Marquise Goodwin</a></span><span class="visible-xs"><a href="/players/marquise-goodwin-goodwma02">M.\xa0Goodwin</a></span></td><td>15</td><td>54</td><td>934</td><td>17.30</td><td>62.3</td><td>83t</td><td

In [115]:
receiving_players = str(receiving_html).count('/players/') / 2
receiving_count = int(11 * ((str(receiving_html).count('/players/') / 2) + 2))
receiving_count

176

In [116]:
receiving = all[passing_count + rushing_count:receiving_count + rushing_count + passing_count]
receiving

[<td class="left"><span class="hidden-xs"><a href="/players/carlos-hyde-hydeca01">Carlos Hyde</a></span><span class="visible-xs"><a href="/players/carlos-hyde-hydeca01">C.\xa0Hyde</a></span></td>,
 <td>15</td>,
 <td>57</td>,
 <td>340</td>,
 <td>5.96</td>,
 <td>22.7</td>,
 <td>18</td>,
 <td>0</td>,
 <td>17</td>,
 <td>84</td>,
 <td>304</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/marquise-goodwin-goodwma02">Marquise Goodwin</a></span><span class="visible-xs"><a href="/players/marquise-goodwin-goodwma02">M.\xa0Goodwin</a></span></td>,
 <td>15</td>,
 <td>54</td>,
 <td>934</td>,
 <td>17.30</td>,
 <td>62.3</td>,
 <td>83t</td>,
 <td>1</td>,
 <td>44</td>,
 <td>101</td>,
 <td>185</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/trent-taylor-taylotr04">Trent Taylor</a></span><span class="visible-xs"><a href="/players/trent-taylor-taylotr04">T.\xa0Taylor</a></span></td>,
 <td>14</td>,
 <td>41</td>,
 <td>401</td>,
 <td>9.78</td>,
 <td>28.6</td>,
 <td>33</td>,


In [117]:
kick_returns_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_KR'})
kick_returns_html

<div class="divToggle_special hidden-xs" id="divToggle_KR">\n<a name="kr"></a>\n<h2>Kickoff Returns</h2>\n<table class="statistics scrollable">\n<thead>\n<tr class="header right"><th class="left" width="120">Player</th><th>Num</th><th>Yds</th><th>Avg</th><th>FC</th><th>Lg</th><th>TD</th></tr>\n</thead>\n<tbody>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/victor-bolden-boldevi01">Victor Bolden</a></span><span class="visible-xs"><a href="/players/victor-bolden-boldevi01">V.\xa0Bolden</a></span></td><td>19</td><td>396</td><td>20.84</td><td>0</td><td>34</td><td>0</td></tr>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/raheem-mostert-mostera01">Raheem Mostert</a></span><span class="visible-xs"><a href="/players/raheem-mostert-mostera01">R.\xa0Mostert</a></span></td><td>5</td><td>83</td><td>16.60</td><td>0</td><td>21</td><td>0</td></tr>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/

In [118]:
kick_returns_players = str(kick_returns_html).count('/players/') / 2
kick_returns_count = int(7 * ((str(kick_returns_html).count('/players/') / 2) + 2))
kick_returns_count

49

In [119]:
kick_returns = all[passing_count + rushing_count + receiving_count:kick_returns_count + receiving_count + rushing_count + passing_count]
kick_returns

[<td class="left"><span class="hidden-xs"><a href="/players/victor-bolden-boldevi01">Victor Bolden</a></span><span class="visible-xs"><a href="/players/victor-bolden-boldevi01">V.\xa0Bolden</a></span></td>,
 <td>19</td>,
 <td>396</td>,
 <td>20.84</td>,
 <td>0</td>,
 <td>34</td>,
 <td>0</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/raheem-mostert-mostera01">Raheem Mostert</a></span><span class="visible-xs"><a href="/players/raheem-mostert-mostera01">R.\xa0Mostert</a></span></td>,
 <td>5</td>,
 <td>83</td>,
 <td>16.60</td>,
 <td>0</td>,
 <td>21</td>,
 <td>0</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/matt-breida-breidma01">Matt Breida</a></span><span class="visible-xs"><a href="/players/matt-breida-breidma01">M.\xa0Breida</a></span></td>,
 <td>5</td>,
 <td>83</td>,
 <td>16.60</td>,
 <td>0</td>,
 <td>28</td>,
 <td>0</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/trent-taylor-taylotr04">Trent Taylor</a></span><span class="visible

In [120]:
punt_returns_html = soup.find('div', attrs = {'class': 'divToggle_special hidden-xs', 'id': 'divToggle_PR'})
punt_returns_html

<div class="divToggle_special hidden-xs" id="divToggle_PR">\n<a name="pr"></a>\n<h2>Punt Returns</h2>\n<table class="statistics scrollable">\n<thead>\n<tr class="header right"><th class="left" width="120">Player</th><th>Num</th><th>Yds</th><th>Avg</th><th>FC</th><th>Lg</th><th>TD</th></tr>\n</thead>\n<tbody>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/trent-taylor-taylotr04">Trent Taylor</a></span><span class="visible-xs"><a href="/players/trent-taylor-taylotr04">T.\xa0Taylor</a></span></td><td>27</td><td>249</td><td>9.22</td><td>15</td><td>39</td><td>0</td></tr>\n<tr class="row0 right">\n<td class="left"><span class="hidden-xs"><a href="/players/victor-bolden-boldevi01">Victor Bolden</a></span><span class="visible-xs"><a href="/players/victor-bolden-boldevi01">V.\xa0Bolden</a></span></td><td>4</td><td>23</td><td>5.75</td><td>1</td><td>16</td><td>0</td></tr>\n<tr class="header right">\n<td class="left">San Francisco</td><td>31</td><td>272</td><t

In [121]:
punt_returns_players = str(punt_returns_html).count('/players/') / 2
punt_returns_count = int(7 * ((str(punt_returns_html).count('/players/') / 2) + 2))
punt_returns_count

28

In [122]:
punt_returns = all[passing_count + rushing_count + receiving_count + kick_returns_count:punt_returns_count + kick_returns_count + receiving_count + rushing_count + passing_count]
punt_returns

[<td class="left"><span class="hidden-xs"><a href="/players/trent-taylor-taylotr04">Trent Taylor</a></span><span class="visible-xs"><a href="/players/trent-taylor-taylotr04">T.\xa0Taylor</a></span></td>,
 <td>27</td>,
 <td>249</td>,
 <td>9.22</td>,
 <td>15</td>,
 <td>39</td>,
 <td>0</td>,
 <td class="left"><span class="hidden-xs"><a href="/players/victor-bolden-boldevi01">Victor Bolden</a></span><span class="visible-xs"><a href="/players/victor-bolden-boldevi01">V.\xa0Bolden</a></span></td>,
 <td>4</td>,
 <td>23</td>,
 <td>5.75</td>,
 <td>1</td>,
 <td>16</td>,
 <td>0</td>,
 <td class="left">San Francisco</td>,
 <td>31</td>,
 <td>272</td>,
 <td>8.77</td>,
 <td>16</td>,
 <td>39</td>,
 <td>0</td>,
 <td class="left">Opponents</td>,
 <td>31</td>,
 <td>132</td>,
 <td>4.26</td>,
 <td>23</td>,
 <td>61t</td>,
 <td>1</td>]

In [123]:
passing_clean = []
rushing_clean = []
receiving_clean = []
kick_returns_clean = []
punt_returns_clean = []

for i in range(0, len(passing)):
    passing_clean.append(re.sub('<[^<]+?>', '', str(passing[i])))
    
for i in range(0, len(rushing)):
    rushing_clean.append(re.sub('<[^<]+?>', '', str(rushing[i])))
    
for i in range(0, len(receiving)):
    receiving_clean.append(re.sub('<[^<]+?>', '', str(receiving[i])))
    
for i in range(0, len(kick_returns)):
    kick_returns_clean.append(re.sub('<[^<]+?>', '', str(kick_returns[i])))

for i in range(0, len(punt_returns)):
    punt_returns_clean.append(re.sub('<[^<]+?>', '', str(punt_returns[i])))

In [124]:
passing_clean[0:10]

['C.J. BeathardC.\xc2\xa0Beathard',
 '224',
 '123',
 '54.9',
 '1,430',
 '6.4',
 '4',
 '1.8',
 '6',
 '2.7']

In [125]:
rushing_clean[0:10]

['Carlos HydeC.\xc2\xa0Hyde',
 '15',
 '225',
 '850',
 '3.78',
 '56.7',
 '61',
 '6',
 '39',
 'Matt BreidaM.\xc2\xa0Breida']

In [126]:
receiving_clean[0:10]

['Carlos HydeC.\xc2\xa0Hyde',
 '15',
 '57',
 '340',
 '5.96',
 '22.7',
 '18',
 '0',
 '17',
 '84']

In [127]:
kick_returns_clean[0:10]

['Victor BoldenV.\xc2\xa0Bolden',
 '19',
 '396',
 '20.84',
 '0',
 '34',
 '0',
 'Raheem MostertR.\xc2\xa0Mostert',
 '5',
 '83']

In [128]:
punt_returns_clean[0:10]

['Trent TaylorT.\xc2\xa0Taylor',
 '27',
 '249',
 '9.22',
 '15',
 '39',
 '0',
 'Victor BoldenV.\xc2\xa0Bolden',
 '4',
 '23']

In [129]:
passing_cols = ['player', 'pass_attempts', 'completions', 'compl_percent', 'passing_yards', 'yards_per_pass', 'passing_touchdowns', 'touchdown_percentage', 'interceptions', 'int_percentage', 'long', 'sack', 'loss', 'passer_rating']
rushing_cols = ['player', 'games_played', 'rush_attempts', 'rush_yards', 'yards_per_rush', 'yards_per_game', 'long', 'rush_touchdowns', 'first_downs']
receiving_cols = ['player', 'games', 'receptions', 'receiving_yards', 'yards_per_catch', 'yards_per_game', 'long', 'receiving_touchdowns', 'first_downs', 'targets', 'yards_after_catch']
kick_returns_cols = ['player', 'kick_returns', 'kick_return_yards', 'kick_return_average', 'fair_catches', 'long', 'touchdowns']
punt_returns_cols = ['player', 'punt_returns', 'punt_return_yards', 'punt_return_average', 'fair_catches', 'long', 'touchdowns']

In [130]:
passing_df = pd.DataFrame(np.array(passing_clean).reshape(passing_players + 2, len(passing_cols)), columns = passing_cols)
rushing_df = pd.DataFrame(np.array(rushing_clean).reshape(rushing_players + 2, len(rushing_cols)), columns = rushing_cols)
receiving_df = pd.DataFrame(np.array(receiving_clean).reshape(receiving_players + 2, len(receiving_cols)), columns = receiving_cols)
kick_returns_df = pd.DataFrame(np.array(kick_returns_clean).reshape(kick_returns_players + 2, len(kick_returns_cols)), columns = kick_returns_cols)
punt_returns_df = pd.DataFrame(np.array(punt_returns_clean).reshape(punt_returns_players + 2, len(punt_returns_cols)), columns = punt_returns_cols)

In [131]:
passing_df.head()

Unnamed: 0,player,pass_attempts,completions,compl_percent,passing_yards,yards_per_pass,passing_touchdowns,touchdown_percentage,interceptions,int_percentage,long,sack,loss,passer_rating
0,C.J. BeathardC. Beathard,224,123,54.9,1430,6.4,4,1.8,6,2.7,83t,19,141,69.2
1,Jimmy GaroppoloJ. Garoppolo,145,100,69.0,1268,8.7,5,3.4,3,2.1,61,8,57,98.9
2,Brian HoyerB. Hoyer,205,119,58.0,1245,6.1,4,2.0,4,2.0,59,16,112,74.1
3,San Francisco,574,342,59.6,3943,6.9,13,2.3,13,2.3,83t,43,310,78.5
4,Opponents,510,324,63.5,3774,7.4,27,5.3,10,2.0,72t,27,159,95.3


In [132]:
rushing_df.head()

Unnamed: 0,player,games_played,rush_attempts,rush_yards,yards_per_rush,yards_per_game,long,rush_touchdowns,first_downs
0,Carlos HydeC. Hyde,15,225,850,3.78,56.7,61,6,39
1,Matt BreidaM. Breida,15,93,393,4.23,26.2,33t,2,18
2,C.J. BeathardC. Beathard,7,26,136,5.23,19.4,16,3,9
3,Marquise GoodwinM. Goodwin,15,3,34,11.33,2.3,18,0,2
4,Kyle JuszczykK. Juszczyk,13,7,31,4.43,2.4,12,0,4


In [133]:
receiving_df.head()

Unnamed: 0,player,games,receptions,receiving_yards,yards_per_catch,yards_per_game,long,receiving_touchdowns,first_downs,targets,yards_after_catch
0,Carlos HydeC. Hyde,15,57,340,5.96,22.7,18,0,17,84,304
1,Marquise GoodwinM. Goodwin,15,54,934,17.3,62.3,83t,1,44,101,185
2,Trent TaylorT. Taylor,14,41,401,9.78,28.6,33,2,25,58,179
3,Pierre GarconP. Garcon,8,40,500,12.5,62.5,59,0,25,67,190
4,George KittleG. Kittle,14,39,415,10.64,29.6,31,2,19,57,191


In [134]:
kick_returns_df.head()

Unnamed: 0,player,kick_returns,kick_return_yards,kick_return_average,fair_catches,long,touchdowns
0,Victor BoldenV. Bolden,19,396,20.84,0,34,0
1,Raheem MostertR. Mostert,5,83,16.6,0,21,0
2,Matt BreidaM. Breida,5,83,16.6,0,28,0
3,Trent TaylorT. Taylor,1,8,8.0,0,8,0
4,Ronald BlairR. Blair,1,0,0.0,0,0,0


In [135]:
punt_returns_df.head()

Unnamed: 0,player,punt_returns,punt_return_yards,punt_return_average,fair_catches,long,touchdowns
0,Trent TaylorT. Taylor,27,249,9.22,15,39,0
1,Victor BoldenV. Bolden,4,23,5.75,1,16,0
2,San Francisco,31,272,8.77,16,39,0
3,Opponents,31,132,4.26,23,61t,1
