In [1]:
# import Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
from sqlalchemy import create_engine
import psycopg2

In [2]:
# create a function that can be reused for scraping the data
def get_html_data(url):
    response = requests.get(url)
    return BeautifulSoup(response.text, "html5lib")

In [3]:
hall_fo_fame_url = 'https://en.wikipedia.org/wiki/List_of_Pro_Football_Hall_of_Fame_inductees'

hf_response = get_html_data(hall_fo_fame_url)
print(hf_response.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of Pro Football Hall of Fame inductees - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"6c4e2823-410c-4abf-8348-ad7fe82fcd71","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_Pro_Football_Hall_of_Fame_inductees","wgTitle":"List of Pro Football Hall of Fame inductees","wgCurRevisionId":965559266,"wgRevisionId":965559266,"wgArticleId":8802088,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Articles with hCards","Pro 

In [4]:
# results are returned as an iterable list
hf_results = hf_response.find_all('table', class_="wikitable")

print(hf_results[1].prettify())      

<table class="wikitable sortable">
 <caption>
  Biographies and statistics can be Referenced at the Hall of Fame web site.
  <sup class="reference" id="cite_ref-Members_by_team_18-0">
   <a href="#cite_note-Members_by_team-18">
    [18]
   </a>
  </sup>
 </caption>
 <tbody>
  <tr>
   <th>
    Inductee
   </th>
   <th>
    Class
   </th>
   <th>
    Position
   </th>
   <th>
    Team(s)
   </th>
   <th>
    Years
   </th>
  </tr>
  <tr style="border-top: 2px solid gray">
   <td rowspan="2">
    <span data-sort-value="Adderley, Herb">
     <span class="vcard">
      <span class="fn">
       <a href="/wiki/Herb_Adderley" title="Herb Adderley">
        Herb Adderley
       </a>
      </span>
     </span>
    </span>
   </td>
   <td rowspan="2">
    1980
   </td>
   <td data-sort-value="Cornerback 1980" rowspan="2">
    Cornerback
   </td>
   <td>
    <a href="/wiki/List_of_Green_Bay_Packers_in_the_Pro_Football_Hall_of_Fame" title="List of Green Bay Packers in the Pro Football Hall of Fame"

In [10]:
hf_df = pd.read_html(str(hf_results[1]))
hf_df = hf_df[0]
hf_df

Unnamed: 0,Inductee,Class,Position,Team(s),Years
0,Herb Adderley,1980,Cornerback,Green Bay Packers,1961–1969
1,Herb Adderley,1980,Cornerback,Dallas Cowboys,1970–1972
2,Troy Aikman**,2006,Quarterback,Dallas Cowboys,1989–2000
3,George Allen[3],2002,Coach,Los Angeles Rams,1966–1970
4,George Allen[3],2002,Coach,Washington Redskins,1971–1977
...,...,...,...,...,...
697,Steve Young**,2005,Quarterback,Tampa Bay Buccaneers,1985–1986
698,Steve Young**,2005,Quarterback,San Francisco 49ers,1987–1999
699,Jack Youngblood,2001,Defensive end,Los Angeles Rams,1971–1984
700,Gary Zimmerman,2008,Offensive tackle,Minnesota Vikings,1986–1992


In [11]:
# remove all the non-word charachaters from the winning QB and losing QB columns

# hf_df['Inductee'] = hf_df['Inductee'].str.replace('\W', ' ')
hf_df['Inductee'] = hf_df['Inductee'].str.replace(r"[^A-Za-z]+", ' ')
hf_df

Unnamed: 0,Inductee,Class,Position,Team(s),Years
0,Herb Adderley,1980,Cornerback,Green Bay Packers,1961–1969
1,Herb Adderley,1980,Cornerback,Dallas Cowboys,1970–1972
2,Troy Aikman,2006,Quarterback,Dallas Cowboys,1989–2000
3,George Allen,2002,Coach,Los Angeles Rams,1966–1970
4,George Allen,2002,Coach,Washington Redskins,1971–1977
...,...,...,...,...,...
697,Steve Young,2005,Quarterback,Tampa Bay Buccaneers,1985–1986
698,Steve Young,2005,Quarterback,San Francisco 49ers,1987–1999
699,Jack Youngblood,2001,Defensive end,Los Angeles Rams,1971–1984
700,Gary Zimmerman,2008,Offensive tackle,Minnesota Vikings,1986–1992


In [20]:
# rename columns
qb_df = qb_df.rename(columns={"Season" : "year", 
                              "Super Bowl" : 'sb_no',
                              'Winning QB' : 'winner_qb',
                              'Team': "winner_team", 
                              'Losing QB' : 'loser_qb',
                              "Team.1" : "loser_team"}) 
qb_df.head()

Unnamed: 0,year,sb_no,winner_qb,winner_team,loser_qb,loser_team,also_mvp
0,1966,I,Bart Starr,Green Bay Packersn,Len Dawson,Kansas City Chiefsa,MVP
1,1967,II,Bart Starr,Green Bay Packersn,Daryle Lamonica,Oakland Raidersa,MVP
2,1968,III,Joe Namath,New York Jetsa,Earl Morrall,Baltimore Coltsn,MVP
3,1969,IV,Len Dawson,Kansas City Chiefsa,Joe Kapp,Minnesota Vikingsn,MVP
4,1970,V,Johnny Unitas,Baltimore ColtsA,Craig Morton,Dallas CowboysN,


In [21]:
# the last character of the team name has 'a' or 'n' to nitity afc or nfc. 
# map a to 'AFC', n to 'NFC' then remove that extra character from team names

qb_df['winner_conf'] = np.where(qb_df['winner_team'].str[-1].str.lower() == 'a', 'AFC', 'NFC') 
qb_df['loser_conf'] = np.where(qb_df['loser_team'].str[-1].str.lower() == 'a', 'AFC', 'NFC')

qb_df['winner_team'] = [sub[ : -1] for sub in qb_df['winner_team']] 
qb_df['loser_team'] = [sub[ : -1] for sub in qb_df['loser_team']]

# Update the super bowl number 50 to L
qb_df['sb_no'] = qb_df['sb_no'].replace('50','L', regex=True)

qb_df.head()

Unnamed: 0,year,sb_no,winner_qb,winner_team,loser_qb,loser_team,also_mvp,winner_conf,loser_conf
0,1966,I,Bart Starr,Green Bay Packers,Len Dawson,Kansas City Chiefs,MVP,NFC,AFC
1,1967,II,Bart Starr,Green Bay Packers,Daryle Lamonica,Oakland Raiders,MVP,NFC,AFC
2,1968,III,Joe Namath,New York Jets,Earl Morrall,Baltimore Colts,MVP,AFC,NFC
3,1969,IV,Len Dawson,Kansas City Chiefs,Joe Kapp,Minnesota Vikings,MVP,AFC,NFC
4,1970,V,Johnny Unitas,Baltimore Colts,Craig Morton,Dallas Cowboys,,AFC,NFC


In [22]:
# Rearrange the columns
qb_df = qb_df[['sb_no', 'year', 'winner_conf', 'winner_team', 'winner_qb', 'also_mvp', \
               'loser_conf', 'loser_team', 'loser_qb']]

qb_df.head()

Unnamed: 0,sb_no,year,winner_conf,winner_team,winner_qb,also_mvp,loser_conf,loser_team,loser_qb
0,I,1966,NFC,Green Bay Packers,Bart Starr,MVP,AFC,Kansas City Chiefs,Len Dawson
1,II,1967,NFC,Green Bay Packers,Bart Starr,MVP,AFC,Oakland Raiders,Daryle Lamonica
2,III,1968,AFC,New York Jets,Joe Namath,MVP,NFC,Baltimore Colts,Earl Morrall
3,IV,1969,AFC,Kansas City Chiefs,Len Dawson,MVP,NFC,Minnesota Vikings,Joe Kapp
4,V,1970,AFC,Baltimore Colts,Johnny Unitas,,NFC,Dallas Cowboys,Craig Morton


In [23]:
# set index to sb_no
qb_df.set_index("sb_no", inplace=True)
qb_df.tail()

Unnamed: 0_level_0,year,winner_conf,winner_team,winner_qb,also_mvp,loser_conf,loser_team,loser_qb
sb_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LI,2016,AFC,New England Patriots,Tom Brady,MVP,NFC,Atlanta Falcons,Matt Ryan
LII,2017,NFC,Philadelphia Eagles,Nick Foles,MVP,AFC,New England Patriots,Tom Brady
LIII,2018,AFC,New England Patriots,Tom Brady,,NFC,Los Angeles Rams,Jared Goff
LIV,2019,AFC,Kansas City Chiefs,Patrick Mahomes,MVP,NFC,San Francisco 49ers,Jimmy Garoppolo
Super Bowl,Seas,NFC,Tea,Winning QB,,NFC,Tea,Losing QB


In [24]:
qb_df.drop(['Super Bowl'], inplace=True)
#dispaly the data
qb_df.tail()

Unnamed: 0_level_0,year,winner_conf,winner_team,winner_qb,also_mvp,loser_conf,loser_team,loser_qb
sb_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
L,2015,AFC,Denver Broncos,Peyton Manning,,NFC,Carolina Panthers,Cam Newton
LI,2016,AFC,New England Patriots,Tom Brady,MVP,NFC,Atlanta Falcons,Matt Ryan
LII,2017,NFC,Philadelphia Eagles,Nick Foles,MVP,AFC,New England Patriots,Tom Brady
LIII,2018,AFC,New England Patriots,Tom Brady,,NFC,Los Angeles Rams,Jared Goff
LIV,2019,AFC,Kansas City Chiefs,Patrick Mahomes,MVP,NFC,San Francisco 49ers,Jimmy Garoppolo


### Create database connection

In [25]:
connection_string = "postgres:postgres@localhost:5432/nfl_db"
engine = create_engine(f'postgresql://{connection_string}')

### Load DataFrames into database

In [26]:
pp_df.to_sql(name='player_position', con=engine, if_exists='replace', index=True)

In [46]:
sb_df.to_sql(name='sb_winners', con=engine, if_exists='replace', index=True)

In [28]:
mvp_df.to_sql(name='most_valuable_player', con=engine, if_exists='replace', index=True)

In [29]:
qb_df.to_sql(name='sb_quarterbacks', con=engine, if_exists='replace', index=True)

In [30]:
engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/nfl_db')
connection = engine.connect()

In [31]:
query = '''
    select *
      from mvp_position
      ;
'''
mvp_position_df = pd.read_sql_query(query, engine)
mvp_position_df.tail()

Unnamed: 0,team,sb_no,player,position_abbr,position,highlights
49,Baltimore,XXXV,Ray Lewis,LB,Linebacker,Led a dominant Ravens defense
50,Dallas,V,Chuck Howley,LB,Linebacker,"Two interceptions, fumble recovery"
51,Tampa Bay,XXXVII,Dexter Jackson,CB,Cornerback,Two first-half interceptions
52,Dallas,XXX,Larry Brown,CB,Cornerback,Two interceptions
53,Miami,VII,Jake Scott,S,Safety,Two interceptions


In [34]:
score_query = '''
    select *
      from sb_divisions
      ;
'''
sb_scores_df = pd.read_sql_query(score_query, engine)
sb_scores_df.head()

Unnamed: 0,sb_no,year,winner_conf,winner_div,winner_team,winner_score,winner_qb,also_mvp,loser_conf,loser_div,loser_team,loser_score
0,LIV,2020,AFC,AFC West,Kansas City Chiefs,31,Patrick Mahomes,MVP,NFC,NFC West,San Francisco 49ers,20
1,LIII,2019,AFC,AFC East,New England Patriots,13,Tom Brady,,NFC,NFC West,Los Angeles Rams,3
2,LII,2018,NFC,NFC East,Philadelphia Eagles,41,Nick Foles,MVP,AFC,AFC East,New England Patriots,33
3,LI,2017,AFC,AFC East,New England Patriots,34,Tom Brady,MVP,NFC,NFC South,Atlanta Falcons,28
4,L,2016,AFC,AFC West,Denver Broncos,24,Peyton Manning,,NFC,NFC South,Carolina Panthers,10


In [None]:
sb_