In [34]:
import sqlalchemy
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [29]:
def get_date_of_birth(player_id):
    """Scrape date of birth as string from hockey reference player home page"""
    # Go to hockey reference
    response = requests.get(url=f'https://www.hockey-reference.com/players/{player_id[0]}/{player_id}.html')
    soup=BeautifulSoup(response.content, 'html.parser')
    # Find dob span element
    dob_html = soup.find('span', id='necro-birth')
    # If there is no html or the attribute doesn't exist, print message and exit
    if not dob_html or not dob_html.has_attr('data-birth'):
        return None
    return dob_html['data-birth']

In [35]:
def update_dob(player_id, mysql_engine, sleep=False):
    """Update the DOB in player history table for rows in which the DOB is NULL"""
    # If we are updating a large batch at once, may be necessary to wait a few seconds
    if sleep:
        time.sleep(4)

    # Call webscrape to get DOB
    dob = get_date_of_birth(player_id=player_id)

    # Without date of birth, we cannot make an update
    if not dob:
        print(f'No date of birth was found for player id: {player_id}')
        return

    # Write update statement
    # Leaving 'AND dob IS NULL' out for now to ensure all dob's are the same within a player id
    update_query = sqlalchemy.text(f"""
        UPDATE player_history
        SET dob = '{dob}'
        WHERE player_id = '{player_id}'; 
    """)

    # Make the updates
    with mysql_engine.begin() as conn:
        conn.execute(update_query)
    
    return

In [31]:
# Create the engine to connect to the MySQL database
engine = sqlalchemy.create_engine('mysql+mysqlconnector://root:rootdata@localhost/nhl')

In [36]:
# Figure out which rows from player_history have null dob
find_null_query = """
    SELECT DISTINCT player_id
    FROM player_history
    WHERE dob IS NULL;
"""

# Run query
null_dob = pd.read_sql(find_null_query, con=engine)

In [37]:
# Run updates
if len(null_dob) > 0:
    null_dob.apply(lambda row: update_dob(row['player_id'], mysql_engine=engine, sleep=True), axis=1)

No date of birth was found for player id: andrean01
No date of birth was found for player id: angelan01
No date of birth was found for player id: anglety01


KeyboardInterrupt: 