This script prepares the database by adding the upcoming season's schedule. This will populate the nfl_data and results tables with games to make predictions on.

In [10]:
import os
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout
import time
import psycopg2 # PostgreSQL database adapter

In [11]:
upcoming_season = 2023

In [12]:
async def get_html(url, selector, sleep = 5, retries = 3):
    # Selector will be a CSS selector -- ID that's used to locate unique element within the html
    html = None
    # Allows for more time to avoid sending to many requests and getting banned
    for i in range(1, retries+1):
        time.sleep(sleep * i)
        # Logic to handle errors when web scraping
        try:
            async with async_playwright() as p:
                browser = await p.firefox.launch() # Can also use p.chromium.launch() if issues persist
                page = await browser.new_page()
                await page.goto(url)
                print(await page.title())
                html = await page.inner_html(selector)
        except PlaywrightTimeout:
            print(f"Timeout error on {url}")
            continue
        else:
            break
    return html

In [13]:
async def scrape_season(season):
    url = f"https://www.pro-football-reference.com/years/{season}/games.htm"
    html = await get_html(url, "#all_games")
    
    save_path = os.path.join(os.getcwd(), f"{season}-games.htm") # Designate the filename and path
    if os.path.exists(save_path): # Check that we are not scraping data that has already been scraped
        return
            
    with open(save_path, "w+") as f:
        f.write(html)

In [14]:
await scrape_season(upcoming_season)

2023 NFL Weekly League Schedule | Pro-Football-Reference.com


In [None]:
try:
    # Establish a connection to the PostgreSQL database
    connection = psycopg2.connect(
        host = "localhost",
        dbname = "nfl",
        user = "postgres",
        password = "Plenoir2002!", # Enter correct password
        port = 5432
    )
    print("Connected to PostgreSQL database.")
except (Exception, psycopg2.Error) as error:
    print("Error connecting to PostgreSQL database: ", error)

In [None]:
# Create a cursor object to interact with the database
cursor = connection.cursor()