In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import psycopg2
import json
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
# Automate the execution of this to run every day until Feb 20th, 2024 when the season has finished
# After 2023 season has ended change the year_list variable below to be 2024

year_list = [2023]

In [4]:
# Step 1: Drop all records from 2023 season in SQL database (both raw, dbo)
# Step 2: Gather list of game ids from GAMECAST
# Step 3: Gather list of game ids from BOXSCORE
# Step 4: MODIFIED LIST = GAMECAST LIST - BOXSCORE LIST
# Step 5: Insert full data for game ids in BOXSCORE LIST
# Step 6: Insert partial data for game ids in MODIFIED LIST

In [5]:
# Retrieve postgres database credentials from json file
with open('nfl_project_postgres_info.json', 'r') as file:
    # Read the file contents
    json_data = file.read()

    # Parse the JSON data
    postgres_info = json.loads(json_data)

In [6]:
# Connect to the PostgreSQL database
conn = psycopg2.connect(
    
        host="localhost",
        database=postgres_info['database_name'],
        user=postgres_info['nfl_project_username'],
        password=postgres_info['nfl_project_password']
)

# Create a cursor
cursor = conn.cursor()

# Prepare the SQL statement to delete records
sql = "DELETE FROM raw.nfl_game_data WHERE season = 2023"
sql2 = "DELETE FROM dbo.nfl_game_data WHERE season = 2023"

# Execute the delete statement
cursor.execute(sql)
cursor.execute(sql2)

# Commit the changes to the database
conn.commit()

# Close the cursor and connection
cursor.close()
conn.close()

In [7]:
# ESPN NFL Schedule URL
base_url = "https://www.espn.com/nfl/scoreboard/_/week/{}/year/{}/seasontype/{}"
season_type = [2,3]  # 2 for regular season, 3 for playoffs

# Set up the Selenium WebDriver
driver = webdriver.Chrome()

# Create a game_id list
gamecast_game_ids = []

for year in year_list:
    for season in season_type:
        for week in range(1, 20):  # Regular season has 17 weeks
            url = base_url.format(week, year, season)
            driver.get(url)

            # Get the HTML content after the page is loaded
            html_content = driver.page_source

            # Create a BeautifulSoup object from the HTML content
            soup = BeautifulSoup(html_content, 'html.parser')

            # Find all anchor tags (a) with href attributes containing "gameId/"
            anchor_tags = soup.find_all('a', href=lambda href: href and 'game/_/gameId/' in href)

            # Extract the href attribute values
            hrefs = [tag['href'] for tag in anchor_tags]
            
            # Retrieve week information
            week = soup.find_all("div", class_="custom--week is-active")
            try:
                week_element = week[0]
                week_span = week_element.find('span', class_='week week-range')
                week_number = week_span.text.strip()
            except IndexError: week_number = ''

            for href in hrefs:
                start_index = href.find("gameId/") + len("gameId/")  # Find the index after "gameId/"
                extracted_string = href[start_index:start_index + 9]  # Extract the 9-digit gameID
                gamecast_game_ids.append((extracted_string,season,week_number,year))

# Close the browser
driver.quit()

In [8]:
# ESPN NFL Schedule URL
base_url = "https://www.espn.com/nfl/scoreboard/_/week/{}/year/{}/seasontype/{}"
season_type = [2,3]  # 2 for regular season, 3 for playoffs

# Set up the Selenium WebDriver
driver = webdriver.Chrome()

# Create a game_id list
boxscore_game_ids = []

for year in year_list:
    for season in season_type:
        for week in range(1, 20):  # Regular season has 17 weeks
            url = base_url.format(week, year, season)
            driver.get(url)

            # Get the HTML content after the page is loaded
            html_content = driver.page_source

            # Create a BeautifulSoup object from the HTML content
            soup = BeautifulSoup(html_content, 'html.parser')

            # Find all anchor tags (a) with href attributes containing "gameId/"
            anchor_tags = soup.find_all('a', href=lambda href: href and 'boxscore/_/gameId/' in href)

            # Extract the href attribute values
            hrefs = [tag['href'] for tag in anchor_tags]
            
            # Retrieve week information
            week = soup.find_all("div", class_="custom--week is-active")
            try:
                week_element = week[0]
                week_span = week_element.find('span', class_='week week-range')
                week_number = week_span.text.strip()
            except IndexError: week_number = ''

            for href in hrefs:
                start_index = href.find("gameId/") + len("gameId/")  # Find the index after "gameId/"
                extracted_string = href[start_index:start_index + 9]  # Extract the 9-digit gameID
                boxscore_game_ids.append((extracted_string,season,week_number,year))

# Close the browser
driver.quit()

In [9]:
# Reduce the set of game_ids
modified_game_id_list = [tup for tup in gamecast_game_ids if not any(tup[0] == item[0] for item in boxscore_game_ids)]

In [10]:
# Game data list of dictionaries
game_data = []

# Initial URL
base_url = "https://www.espn.com/nfl/boxscore/_/gameId/{}"

for game_id in boxscore_game_ids:
    
    # Print for troubleshooting visibility
    # print(game_id)
    
    url = base_url.format(game_id[0])
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    
    if game_id[1] == 2:
        part_of_season = 'regular_season'
    elif game_id[1] == 3:
        part_of_season = 'playoffs'
    
    team_name_elements = soup.find_all("div", class_="Gamestrip__InfoLogo")
    
    try:
        a_element = team_name_elements[0].find('a')
        parts = a_element['href'].split('/')
        away_team = parts[-1].replace('-', '_')
    except AttributeError: away_team = ''
        
    try:
        a_element = team_name_elements[1].find('a')
        parts = a_element['href'].split('/')
        home_team = parts[-1].replace('-', '_')
    except AttributeError: home_team = ''
    
    try:
        game_location = soup.find("span", class_="Location__Text").text.strip()
    except AttributeError: game_location = ''
        
    try:
        game_stadium = soup.find("div", class_="n6 clr-gray-03 GameInfo__Location__Name").text.strip()
    except AttributeError:
        try:
            game_stadium = soup.find("div", class_="n6 clr-gray-03 GameInfo__Location__Name--noImg").text.strip()
        except AttributeError: game_stadium = ''
    
    game_datetime_string = soup.find("div", class_="n8 GameInfo__Meta").text.strip()
    try:
        game_time = game_datetime_string.split(',', 1)[0]
    except AttributeError: game_time = ''
    
    try:
        game_date = game_datetime_string.split(',', 1)[1].split('Coverage:', 1)[0].strip()
    except AttributeError: game_date = ''
        
    try:
        attendance = soup.find("div", class_="Attendance__Numbers").text.strip()
        digits = ''.join(filter(str.isdigit, attendance))
        game_attendance = int(digits)
    except AttributeError: game_attendance = ''
    
    try:
        capacity = soup.find("div", class_="Attendance__Capacity h10").text.strip()
        digits = ''.join(filter(str.isdigit, capacity))
        stadium_capacity = int(digits)
    except AttributeError: stadium_capacity = ''
    
    try:
        percent = soup.find("div", class_="n3 flex-expand Attendance__Percentage").text.strip()
        digits = ''.join(filter(str.isdigit, percent))
        attendance_percent = int(digits)
    except AttributeError: attendance_percent = ''
        
    team_records = soup.find_all("div", class_="Gamestrip__Record db n10 clr-gray-03")
    away_team_total_record = team_records[0].text.split()[0].strip(',')
    away_team_away_record = team_records[0].text.split()[1]
    home_team_total_record = team_records[0].text.split()[0].strip(',')
    home_team_home_record = team_records[0].text.split()[1]
    
    team_scores = soup.find_all("tr", class_="Table__TR Table__TR--sm Table__even")
    
    if team_scores[0].find_all()[1].text.isdigit() == True:
        away_team_quarter_one = int(team_scores[0].find_all()[1].text)
        away_team_quarter_second = int(team_scores[0].find_all()[2].text)
        away_team_quarter_third = int(team_scores[0].find_all()[3].text)
        away_team_quarter_fourth = int(team_scores[0].find_all()[4].text)
        if len(team_scores[0].find_all()) == 7:
            away_team_quarter_ot = int(team_scores[0].find_all()[5].text)
            away_team_final = int(team_scores[0].find_all()[6].text)
        else:
            away_team_quarter_ot = np.nan
            away_team_final = int(team_scores[0].find_all()[5].text)

        home_team_quarter_one = int(team_scores[1].find_all()[1].text)
        home_team_quarter_second = int(team_scores[1].find_all()[2].text)
        home_team_quarter_third = int(team_scores[1].find_all()[3].text)
        home_team_quarter_fourth = int(team_scores[1].find_all()[4].text)
        if len(team_scores[0].find_all()) == 7:
            home_team_quarter_ot = int(team_scores[1].find_all()[5].text)
            home_team_final = int(team_scores[1].find_all()[6].text)
        else:
            home_team_quarter_ot = np.nan
            home_team_final = int(team_scores[1].find_all()[5].text)
    else:
        away_team_quarter_one = np.nan
        away_team_quarter_second = np.nan
        away_team_quarter_third = np.nan
        away_team_quarter_fourth = np.nan
        away_team_quarter_ot = np.nan
        away_team_final = np.nan
        home_team_quarter_one = np.nan
        home_team_quarter_second = np.nan
        home_team_quarter_third = np.nan
        home_team_quarter_fourth = np.nan
        home_team_quarter_ot = np.nan
        home_team_final = np.nan
        
    team_stats = soup.find_all("tr", class_="Boxscore__Totals Table__TR Table__TR--sm Table__even")
    
    if len(team_stats) > 0:
    
        away_pass_comp_attempts = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[0].text
        away_pass_yards = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[1].text
        away_pass_tds = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[3].text
        away_pass_ints = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[4].text
        away_sacks_allowed = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[5].text
        if team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[6].text == '--':
            away_pass_rating = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[7].text
        else: away_pass_rating = team_stats[1].find_all("td", class_="Boxscore__Totals_Items Table__TD")[6].text
        away_rush_attempts = team_stats[5].find_all("td", class_="Boxscore__Totals_Items Table__TD")[0].text
        away_rush_yards = team_stats[5].find_all("td", class_="Boxscore__Totals_Items Table__TD")[1].text
        away_rush_tds = team_stats[5].find_all("td", class_="Boxscore__Totals_Items Table__TD")[3].text
        away_rush_long = team_stats[5].find_all("td", class_="Boxscore__Totals_Items Table__TD")[4].text
        away_rec_targets = team_stats[9].find_all("td", class_="Boxscore__Totals_Items Table__TD")[5].text
        away_rec_receptions = team_stats[9].find_all("td", class_="Boxscore__Totals_Items Table__TD")[0].text
        away_rec_yards = team_stats[9].find_all("td", class_="Boxscore__Totals_Items Table__TD")[1].text
        away_rec_tds = team_stats[9].find_all("td", class_="Boxscore__Totals_Items Table__TD")[3].text
        away_rec_long = team_stats[9].find_all("td", class_="Boxscore__Totals_Items Table__TD")[4].text

        home_pass_comp_attempts = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[0].text
        home_pass_yards = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[1].text
        home_pass_tds = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[3].text
        home_pass_ints = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[4].text
        home_sacks_allowed = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[5].text
        if team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[6].text == '--':
            home_pass_rating = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[7].text
        else: home_pass_rating = team_stats[3].find_all("td", class_="Boxscore__Totals_Items Table__TD")[6].text
        home_rush_attempts = team_stats[7].find_all("td", class_="Boxscore__Totals_Items Table__TD")[0].text
        home_rush_yards = team_stats[7].find_all("td", class_="Boxscore__Totals_Items Table__TD")[1].text
        home_rush_tds = team_stats[7].find_all("td", class_="Boxscore__Totals_Items Table__TD")[3].text
        home_rush_long = team_stats[7].find_all("td", class_="Boxscore__Totals_Items Table__TD")[4].text
        home_rec_targets = team_stats[11].find_all("td", class_="Boxscore__Totals_Items Table__TD")[5].text
        home_rec_receptions = team_stats[11].find_all("td", class_="Boxscore__Totals_Items Table__TD")[0].text
        home_rec_yards = team_stats[11].find_all("td", class_="Boxscore__Totals_Items Table__TD")[1].text
        home_rec_tds = team_stats[11].find_all("td", class_="Boxscore__Totals_Items Table__TD")[3].text
        home_rec_long = team_stats[11].find_all("td", class_="Boxscore__Totals_Items Table__TD")[4].text
    
    else:
        
        away_pass_comp_attempts = np.nan
        away_pass_yards = np.nan
        away_pass_tds = np.nan
        away_pass_ints = np.nan
        away_sacks_allowed = np.nan
        away_pass_rating = np.nan
        away_rush_attempts = np.nan
        away_rush_yards = np.nan
        away_rush_tds = np.nan
        away_rush_long = np.nan
        away_rec_targets = np.nan
        away_rec_receptions = np.nan
        away_rec_yards = np.nan
        away_rec_tds = np.nan
        away_rec_long = np.nan

        home_pass_comp_attempts = np.nan
        home_pass_yards = np.nan
        home_pass_tds = np.nan
        home_pass_ints = np.nan
        home_sacks_allowed = np.nan
        home_pass_rating = np.nan
        home_rush_attempts = np.nan
        home_rush_yards = np.nan
        home_rush_tds = np.nan
        home_rush_long = np.nan
        home_rec_targets = np.nan
        home_rec_receptions = np.nan
        home_rec_yards = np.nan
        home_rec_tds = np.nan
        home_rec_long = np.nan
    
    game_data.append(
    
    {'espn_game_id':game_id[0],
     'season':game_id[3],
     'part_of_season':part_of_season,
     'week_number':game_id[2],
     'away_team':away_team,
     'home_team':home_team,
     'game_location':game_location,
     'game_stadium':game_stadium,
     'game_time':game_time,
     'game_date':game_date,
     'game_attendance':game_attendance,
     'stadium_capacity':stadium_capacity,
     'attendance_percent':attendance_percent,
     'weather':'',
     'away_team_total_record':away_team_total_record,
     'away_team_away_record':away_team_away_record,
     'home_team_total_record':home_team_total_record,
     'home_team_home_record':home_team_home_record,
     'away_team_quarter_first':away_team_quarter_one,
     'away_team_quarter_second':away_team_quarter_second,
     'away_team_quarter_third':away_team_quarter_third,
     'away_team_quarter_fourth':away_team_quarter_fourth,
     'away_team_quarter_ot':away_team_quarter_ot,
     'away_team_final':away_team_final,
     'away_pass_comp_attempts':away_pass_comp_attempts,
     'away_pass_yards':away_pass_yards,
     'away_pass_tds':away_pass_tds,
     'away_pass_ints':away_pass_ints,
     'away_sacks_allowed':away_sacks_allowed,
     'away_pass_rating':away_pass_rating,
     'away_rush_attempts':away_rush_attempts,
     'away_rush_yards':away_rush_yards,
     'away_rush_tds':away_rush_tds,
     'away_rush_long':away_rush_long,
     'away_rec_targets':away_rec_targets,
     'away_rec_receptions':away_rec_receptions,
     'away_rec_yards':away_rec_yards,
     'away_rec_tds':away_rec_tds,
     'away_rec_long':away_rec_long,
     'home_team_quarter_first':home_team_quarter_one,
     'home_team_quarter_second':home_team_quarter_second,
     'home_team_quarter_third':home_team_quarter_third,
     'home_team_quarter_fourth':home_team_quarter_fourth,
     'home_team_quarter_ot':home_team_quarter_ot,
     'home_team_final':home_team_final,
     'home_pass_comp_attempts':home_pass_comp_attempts,
     'home_pass_yards':home_pass_yards,
     'home_pass_tds':home_pass_tds,
     'home_pass_ints':home_pass_ints,
     'home_sacks_allowed':home_sacks_allowed,
     'home_pass_rating':home_pass_rating,
     'home_rush_attempts':home_rush_attempts,
     'home_rush_yards':home_rush_yards,
     'home_rush_tds':home_rush_tds,
     'home_rush_long':home_rush_long,
     'home_rec_targets':home_rec_targets,
     'home_rec_receptions':home_rec_receptions,
     'home_rec_yards':home_rec_yards,
     'home_rec_tds':home_rec_tds,
     'home_rec_long':home_rec_long,
     
    })

In [11]:
# Convert dictionary to dataframe
game_table = pd.DataFrame(game_data)

In [12]:
# Connect to the PostgreSQL database
conn = psycopg2.connect(
    
        host="localhost",
        database=postgres_info['database_name'],
        user=postgres_info['nfl_project_username'],
        password=postgres_info['nfl_project_password']
)

# Create a cursor
cursor = conn.cursor()

# Convert the DataFrame to a list of tuples
data = [tuple(row) for row in game_table.values]

# Get the column names from the DataFrame
columns = list(game_table.columns)

# Prepare the SQL statement for batch insertion
sql = "INSERT INTO raw.nfl_game_data ({}) VALUES %s".format(", ".join(columns))  # Dynamically generate column names

from psycopg2 import extras

# Execute the batch insert
extras.execute_values(cursor, sql, data)

# Commit the changes to the database
conn.commit()

# Close the cursor and connection
cursor.close()
conn.close()

In [13]:
# Game data list of dictionaries
pre_game_data = []

# Initial URL
base_url = "https://www.espn.com/nfl/game/_/gameId/{}"

for game_id in modified_game_id_list:
    
    # Print for troubleshooting visibility
    # print(game_id)
    
    url = base_url.format(game_id[0])
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    
    if game_id[1] == 2:
        part_of_season = 'regular_season'
    elif game_id[1] == 3:
        part_of_season = 'playoffs'
    
    team_name_elements = soup.find_all("div", class_="Gamestrip__InfoLogo")
    
    try:
        a_element = team_name_elements[0].find('a')
        parts = a_element['href'].split('/')
        away_team = parts[-1].replace('-', '_')
    except AttributeError: away_team = ''
    except TypeError: away_team = ''
        
    try:
        a_element = team_name_elements[1].find('a')
        parts = a_element['href'].split('/')
        home_team = parts[-1].replace('-', '_')
    except AttributeError: home_team = ''
    except TypeError: home_team = ''
    
    try:
        game_location = soup.find("span", class_="Location__Text").text.strip()
    except AttributeError: game_location = ''
    except TypeError: game_location = ''
        
    try:
        game_stadium = soup.find("div", class_="n6 clr-gray-03 GameInfo__Location__Name").text.strip()
    except AttributeError:
        try:
            game_stadium = soup.find("div", class_="n6 clr-gray-03 GameInfo__Location__Name--noImg").text.strip()
        except AttributeError: game_stadium = ''
        except TypeError: game_stadium = ''
    except TypeError: game_stadium = ''
    
    game_datetime_string = soup.find("div", class_="n8 GameInfo__Meta").text.strip()
    try:
        game_time = game_datetime_string.split(',', 1)[0]
    except AttributeError: game_time = ''
    except TypeError: game_time = ''
    
    try:
        game_date = game_datetime_string.split(',', 1)[1].split('Coverage:', 1)[0].strip()
    except AttributeError: game_date = ''
    except TypeError: game_date = ''
        
    game_attendance = np.nan
    
    try:
        capacity = soup.find("div", class_="Attendance__Capacity h10").text.strip()
        digits = ''.join(filter(str.isdigit, capacity))
        stadium_capacity = int(digits)
    except AttributeError: stadium_capacity = ''
    except TypeError: stadium_capacity = ''
    
    attendance_percent = np.nan

    away_team_total_record = np.nan
    away_team_away_record = np.nan
    home_team_total_record = np.nan
    home_team_home_record = np.nan
    
    away_team_quarter_one = np.nan
    away_team_quarter_second = np.nan
    away_team_quarter_third = np.nan
    away_team_quarter_fourth = np.nan
    away_team_quarter_ot = np.nan
    away_team_final = np.nan
    home_team_quarter_one = np.nan
    home_team_quarter_second = np.nan
    home_team_quarter_third = np.nan
    home_team_quarter_fourth = np.nan
    home_team_quarter_ot = np.nan
    home_team_final = np.nan
         
    away_pass_comp_attempts = np.nan
    away_pass_yards = np.nan
    away_pass_tds = np.nan
    away_pass_ints = np.nan
    away_sacks_allowed = np.nan
    away_pass_rating = np.nan
    away_rush_attempts = np.nan
    away_rush_yards = np.nan
    away_rush_tds = np.nan
    away_rush_long = np.nan
    away_rec_targets = np.nan
    away_rec_receptions = np.nan
    away_rec_yards = np.nan
    away_rec_tds = np.nan
    away_rec_long = np.nan

    home_pass_comp_attempts = np.nan
    home_pass_yards = np.nan
    home_pass_tds = np.nan
    home_pass_ints = np.nan
    home_sacks_allowed = np.nan
    home_pass_rating = np.nan
    home_rush_attempts = np.nan
    home_rush_yards = np.nan
    home_rush_tds = np.nan
    home_rush_long = np.nan
    home_rec_targets = np.nan
    home_rec_receptions = np.nan
    home_rec_yards = np.nan
    home_rec_tds = np.nan
    home_rec_long = np.nan
    
    pre_game_data.append(
    
    {'espn_game_id':game_id[0],
     'season':game_id[3],
     'part_of_season':part_of_season,
     'week_number':game_id[2],
     'away_team':away_team,
     'home_team':home_team,
     'game_location':game_location,
     'game_stadium':game_stadium,
     'game_time':game_time,
     'game_date':game_date,
     'game_attendance':game_attendance,
     'stadium_capacity':stadium_capacity,
     'attendance_percent':attendance_percent,
     'weather':'',
     'away_team_total_record':away_team_total_record,
     'away_team_away_record':away_team_away_record,
     'home_team_total_record':home_team_total_record,
     'home_team_home_record':home_team_home_record,
     'away_team_quarter_first':away_team_quarter_one,
     'away_team_quarter_second':away_team_quarter_second,
     'away_team_quarter_third':away_team_quarter_third,
     'away_team_quarter_fourth':away_team_quarter_fourth,
     'away_team_quarter_ot':away_team_quarter_ot,
     'away_team_final':away_team_final,
     'away_pass_comp_attempts':away_pass_comp_attempts,
     'away_pass_yards':away_pass_yards,
     'away_pass_tds':away_pass_tds,
     'away_pass_ints':away_pass_ints,
     'away_sacks_allowed':away_sacks_allowed,
     'away_pass_rating':away_pass_rating,
     'away_rush_attempts':away_rush_attempts,
     'away_rush_yards':away_rush_yards,
     'away_rush_tds':away_rush_tds,
     'away_rush_long':away_rush_long,
     'away_rec_targets':away_rec_targets,
     'away_rec_receptions':away_rec_receptions,
     'away_rec_yards':away_rec_yards,
     'away_rec_tds':away_rec_tds,
     'away_rec_long':away_rec_long,
     'home_team_quarter_first':home_team_quarter_one,
     'home_team_quarter_second':home_team_quarter_second,
     'home_team_quarter_third':home_team_quarter_third,
     'home_team_quarter_fourth':home_team_quarter_fourth,
     'home_team_quarter_ot':home_team_quarter_ot,
     'home_team_final':home_team_final,
     'home_pass_comp_attempts':home_pass_comp_attempts,
     'home_pass_yards':home_pass_yards,
     'home_pass_tds':home_pass_tds,
     'home_pass_ints':home_pass_ints,
     'home_sacks_allowed':home_sacks_allowed,
     'home_pass_rating':home_pass_rating,
     'home_rush_attempts':home_rush_attempts,
     'home_rush_yards':home_rush_yards,
     'home_rush_tds':home_rush_tds,
     'home_rush_long':home_rush_long,
     'home_rec_targets':home_rec_targets,
     'home_rec_receptions':home_rec_receptions,
     'home_rec_yards':home_rec_yards,
     'home_rec_tds':home_rec_tds,
     'home_rec_long':home_rec_long,
     
    })

In [14]:
# Convert dictionary to dataframe
pre_game_table = pd.DataFrame(pre_game_data)

In [15]:
# Connect to the PostgreSQL database
conn = psycopg2.connect(
    
        host="localhost",
        database=postgres_info['database_name'],
        user=postgres_info['nfl_project_username'],
        password=postgres_info['nfl_project_password']
)

# Create a cursor
cursor = conn.cursor()

# Convert the DataFrame to a list of tuples
data = [tuple(row) for row in pre_game_table.values]

# Get the column names from the DataFrame
columns = list(pre_game_table.columns)

# Prepare the SQL statement for batch insertion
sql = "INSERT INTO raw.nfl_game_data ({}) VALUES %s".format(", ".join(columns))  # Dynamically generate column names

from psycopg2 import extras

# Execute the batch insert
extras.execute_values(cursor, sql, data)

# Commit the changes to the database
conn.commit()

# Close the cursor and connection
cursor.close()
conn.close()