In [96]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as BeautifulSoup
import pandas as pd
import yagmail
from datetime import datetime, timedelta, timezone
import pytz
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from io import StringIO
import re
import numpy as np
import requests
import time
import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

gameDate = '2023-01-09'
awayTeamID = 0
awayTeamName = 'AWAY'
homeTeamID = 0
homeTeamName = 'HOME'

browser = Browser('chrome')

allowed_events = ['SHOT', 'FAC', 'HIT', 'BLOCK', 'MISS', 'GIVE', 'TAKE', 'GOAL', 'PENL', 'DELPEN']

In [97]:
# url = "https://www.nhl.com/scores/htmlreports/20232024/PL020175.HTM"

In [98]:
homeOnlySked = pd.read_csv('data/sked.csv', index_col=False)

homeOnlySked['gameDT'] = pd.to_datetime(homeOnlySked['gameDT'])
# homeOnlySked.info()

completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow() - timedelta(days=0.5))]

# # completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow())]
completedSked.sort_values('gameDate')

game_list = completedSked['gameID'].astype(str).str[-6:].tolist()


file_name = f"data/summary_statsS-2023-12-17.csv"
player_biosS = pd.read_csv(file_name, index_col=False)
file_name = f"data/summary_statsG-2023-12-17.csv"
player_biosG = pd.read_csv(file_name, index_col=False)
for index, row in player_biosG.iterrows():
    player_biosG.at[index, 'tertiaryPosition'] = 'G'
player_bios = pd.concat([player_biosS, player_biosG], axis=0)
player_bios

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,...,secondaryPosition,tertiaryPosition,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain,saves,shutout,creaseShare
0,8476453,N. Kucherov,TBL,R,40470,31,20.0,32.0,24.0,146,...,W,F,9.44,3.42,0,50,171.00,,,
1,8479318,A. Matthews,TOR,C,35008,27,23.0,12.0,11.0,121,...,C,F,9.86,3.55,0,54,191.70,,,
2,8476468,J. Miller,VAN,C,37062,31,15.0,28.0,20.0,71,...,C,F,9.23,3.06,0,51,156.06,,,
3,8477492,N. MacKinnon,COL,C,40820,30,12.0,31.0,14.0,133,...,C,F,8.10,3.06,0,52,159.12,,,
4,8480865,N. Dobson,NYI,D,45996,30,5.0,27.0,13.0,72,...,D,D,6.99,2.98,0,52,154.96,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,8481692,D. Wolf,CGY,G,16643,5,,,,149,...,,G,-0.09,-0.08,0,7,-0.65,133.0,0.0,14.83
76,8482221,D. Levi,BUF,G,44616,13,,,,372,...,,G,1.36,1.29,0,19,26.41,334.0,0.0,38.97
77,8482411,H. Shepard,WSH,G,7200,2,,,,59,...,,G,4.40,4.40,0,4,17.76,54.0,0.0,7.34
78,8482821,A. Soderblom,CHI,G,45909,14,,,,398,...,,G,-1.85,-1.69,0,23,-43.29,347.0,0.0,44.13


In [99]:
# game_list = ['020175', '020176', '020177']

In [100]:
def extract_numbers(player_info):
    return [int(num) for num in player_info.split() if num.isdigit()]

In [103]:
def get_play_by_play(tempURL2):
    
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode (without GUI)

    # Set up the webdriver with Chrome
    driver = webdriver.Chrome(options=chrome_options)
    try:
        # Open the webpage with Selenium
        driver.get(tempURL2)

        # Get the HTML content after the dynamic content has loaded
        html_content = driver.page_source

        # Parse the HTML with BeautifulSoup
        soup = BeautifulSoup(html_content, 'html.parser')

        # Now you can work with the soup object to extract data from the HTML.
        tables = soup.find_all('table', class_='tablewidth')

        dataframes = [pd.read_html(str(table))[0] for table in tables]

    finally:
        # Close the webdriver
        driver.quit()
        
    time.sleep(2)
    
    num_tables = len(dataframes)
    temp_df = pd.DataFrame()

    for frame in range(num_tables):
#         print(frame)
        try:
            temporary =  dataframes[frame]
            temporary = temporary[temporary[4].isin(allowed_events)]

            
#             temporary = temporary.loc[temporary.iloc[:, 5].notna()]
#             temporary = temporary.iloc[1:]
#             new_headers = temporary.iloc[0]
#             temporary = temporary[1:].set_axis(new_headers, axis=1)
#             temporary = temporary.iloc[:-1]
#             temporary.set_index('#', inplace=True)
            temp_df = pd.concat([temp_df, temporary], axis=0, ignore_index=True)
#             temporary.to_excel('fixed.xlsx')
        except:
            temporary.to_excel('broken.xlsx')
            temp_df = temp_df
        
    
    
    return temp_df

In [102]:
def get_game_details(tempURL):
    global gameDate, awayTeamID, awayTeamName, homeTeamID, homeTeamName
    
    test = requests.get(tempURL).json()
    
    gameDate = test['gameDate']
    awayTeamID = test['awayTeam']['id']
    awayTeamName = test['awayTeam']['abbrev']
    homeTeamID = test['homeTeam']['id']
    homeTeamName = test['homeTeam']['abbrev']

    jerseys = pd.DataFrame()

    for spot in test['rosterSpots']:
        teamId = spot.get('teamId')
        sweaterNumber = spot.get('sweaterNumber')
        playerId = spot.get('playerId')

        temp = pd.DataFrame([{'teamId': teamId, 'sweaterNumber': sweaterNumber, 'playerId': playerId}])

        jerseys = pd.concat([jerseys, temp], ignore_index=True)

    return jerseys

In [104]:
corsi_df = pd.DataFrame()

for game in game_list:
    
    gameURL = 'https://www.nhl.com/scores/htmlreports/20232024/PL' + game + '.HTM'
    tables = get_play_by_play(gameURL)
    
    playURL = 'https://api-web.nhle.com/v1/gamecenter/2023' + game + '/play-by-play'
    players = get_game_details(playURL)
    
    game_id = '2023' + game
    print(game_id)
    events_df = tables
    
#     column_names = ['#', 'period', 'strength', 'times', 'event', 'description',  awayTeamName + ' On Ice', homeTeamName + ' On Ice']
#     events_df.columns = column_names
#     events_df.set_index('#', inplace=True)

        
    for index, row in events_df.iterrows():
        events_df.at[index, 'Corsi'] = row[5][:3]
        events_df.at[index, 'gameID'] = game_id
        

    
    events_df = events_df.loc[(events_df['Corsi'] == homeTeamName) | (events_df['Corsi'] == awayTeamName)]
    
#     awayColumnName = awayTeamName + ' On Ice'
#     homeColumnName = homeTeamName + ' On Ice'
    events_df['awayNumbers'] = events_df[6].apply(extract_numbers)
    events_df['homeNumbers'] = events_df[7].apply(extract_numbers)
    
    home = players.loc[players['teamId'] == homeTeamID]
    away = players.loc[players['teamId'] == awayTeamID]
    
    sweater_number_mapping_home = dict(zip(home['sweaterNumber'], home['playerId']))
    sweater_number_mapping_away = dict(zip(away['sweaterNumber'], away['playerId']))
    events_df['homeNumbers'] = events_df['homeNumbers'].apply(lambda arr: [sweater_number_mapping_home.get(num, num) for num in arr])
    events_df['awayNumbers'] = events_df['awayNumbers'].apply(lambda arr: [sweater_number_mapping_away.get(num, num) for num in arr])
    
#     columns_to_drop = [6, 7, 8, 9, 10]
    
#     events_df = events_df.drop(columns=events_df.columns[columns_to_drop])
    
    corsi_df = pd.concat([corsi_df, events_df], axis=0, ignore_index=True)
    
    

#     valid_types = ['SHOT', 'BLOCK', 'MISS', 'GOAL']
#     events_df = events_df[events_df['Event'].isin(valid_types)]
    

    

    

    
#     for player in home['sweaterNumber'].tolist():
#         temp = events_df.copy()
#         temp['contains_player'] = events_df['homeNumbers'].apply(lambda x: player in x)
#         temp = temp[temp['contains_player'] == True]
#         CorsiFor = len(temp.loc[temp['Corsi'] == homeTeamName])
#         CorsiAgainst = len(temp.loc[temp['Corsi'] == awayTeamName])
#         PID = home.loc[home['sweaterNumber'] == player]['playerId'].iloc[0]
#         name = player_bios.loc[player_bios['playerId'] == PID]['name'].iloc[0]
#         print(f"{name}: For = {CorsiFor}; Against = {CorsiAgainst}")
    
    
    
corsi_df

2023020003
2023020031
2023020048
2023020098
2023020111
2023020138
2023020155
2023020173
2023020192
2023020209
2023020315
2023020368
2023020379
2023020427
2023020440
2023020460
2023020037
2023020053
2023020148
2023020183
2023020196
2023020225
2023020308
2023020322
2023020338
2023020370
2023020424
2023020432
2023020456
2023020006
2023020021
2023020100
2023020119
2023020130
2023020145
2023020193
2023020258
2023020293
2023020340
2023020371
2023020392
2023020408
2023020462
2023020009
2023020116
2023020124
2023020140
2023020172
2023020179
2023020241
2023020265
2023020278
2023020336
2023020353
2023020387
2023020404
2023020419
2023020439
2023020454
2023020008
2023020032
2023020079
2023020097
2023020125
2023020202
2023020221
2023020249
2023020266
2023020309
2023020339
2023020373
2023020446
2023020005
2023020026
2023020038
2023020139
2023020165
2023020177
2023020190
2023020207
2023020215
2023020329
2023020346
2023020364
2023020413
2023020449
2023020466
2023020020
2023020045
2023020058
2023020093

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,Corsi,gameID,awayNumbers,homeNumbers,11,12,13,14
0,5,1,EV,0:00 20:00,FAC,SEA won Neu. Zone - SEA #37 GOURDE vs VGK #10 ROY,37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
1,7,1,EV,0:12 19:48,FAC,SEA won Off. Zone - SEA #37 GOURDE vs VGK #10 ROY,37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
2,8,1,EV,0:14 19:46,MISS,"SEA #24 OLEKSIAK, Slap, High and Wide Right, O...",37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
3,9,1,EV,0:28 19:32,HIT,"VGK #28 CARRIER HIT SEA #24 OLEKSIAK, Off. Zone",37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,VGK,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
4,10,1,EV,0:42 19:18,GIVE,"VGK GIVEAWAY - #14 HAGUE, Def. Zone",37 C 22 R 19 L 3 D 6 D 31 G,9 C 49 C 81 R 7 D 14 D 33 G,,,,VGK,2023020003,"[8476826, 8477416, 8477955, 8478840, 8476457, ...","[8478403, 8477964, 8476539, 8474565, 8479980, ...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120345,297,3,EV,18:45 1:15,FAC,WSH won Def. Zone - WSH #26 DOWD vs CHI #90 JO...,26 C 47 L 3 D 74 D 35 G,23 C 90 C 98 C 11 R 72 D,,,,WSH,2023020422,"[8475343, 8479359, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8481568]",,,,
120346,298,3,PP,19:12 0:48,MISS,"CHI #98 BEDARD, Wrist, High and Wide Left, Off...",24 C 92 C 3 D 74 D 35 G,23 C 90 C 98 C 11 R 17 L 72 D,,,,CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ...",,,,
120347,299,3,PP,19:18 0:42,SHOT,"CHI ONGOAL - #98 BEDARD, Backhand , Off. Zone,...",24 C 92 C 3 D 74 D 35 G,23 C 90 C 98 C 11 R 17 L 72 D,,,,CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ...",,,,
120348,300,3,PP,19:25 0:35,TAKE,"CHI TAKEAWAY - #98 BEDARD, Off. Zone",24 C 92 C 3 D 74 D 35 G,23 C 90 C 98 C 11 R 17 L 72 D,,,,CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ...",,,,


In [105]:
corsi_df.to_excel('corsi.xlsx')

In [110]:
file_name = f"corsi.xlsx"
Corsi = pd.read_excel(file_name, index_col=False)
Corsi

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,Corsi,gameID,awayNumbers,homeNumbers,11,12,13,14
0,0,5,1,EV,0:00 20:00,FAC,SEA won Neu. Zone - SEA #37 GOURDE vs VGK #10 ROY,37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
1,1,7,1,EV,0:12 19:48,FAC,SEA won Off. Zone - SEA #37 GOURDE vs VGK #10 ROY,37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
2,2,8,1,EV,0:14 19:46,MISS,"SEA #24 OLEKSIAK, Slap, High and Wide Right, O...",37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
3,3,9,1,EV,0:28 19:32,HIT,"VGK #28 CARRIER HIT SEA #24 OLEKSIAK, Off. Zone",37 C 20 R 22 R 3 D 24 D 31 G,10 C 55 R 28 L 7 D 14 D 33 G,,,,VGK,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ...",,,,
4,4,10,1,EV,0:42 19:18,GIVE,"VGK GIVEAWAY - #14 HAGUE, Def. Zone",37 C 22 R 19 L 3 D 6 D 31 G,9 C 49 C 81 R 7 D 14 D 33 G,,,,VGK,2023020003,"[8476826, 8477416, 8477955, 8478840, 8476457, ...","[8478403, 8477964, 8476539, 8474565, 8479980, ...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120345,120345,297,3,EV,18:45 1:15,FAC,WSH won Def. Zone - WSH #26 DOWD vs CHI #90 JO...,26 C 47 L 3 D 74 D 35 G,23 C 90 C 98 C 11 R 72 D,,,,WSH,2023020422,"[8475343, 8479359, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8481568]",,,,
120346,120346,298,3,PP,19:12 0:48,MISS,"CHI #98 BEDARD, Wrist, High and Wide Left, Off...",24 C 92 C 3 D 74 D 35 G,23 C 90 C 98 C 11 R 17 L 72 D,,,,CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ...",,,,
120347,120347,299,3,PP,19:18 0:42,SHOT,"CHI ONGOAL - #98 BEDARD, Backhand , Off. Zone,...",24 C 92 C 3 D 74 D 35 G,23 C 90 C 98 C 11 R 17 L 72 D,,,,CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ...",,,,
120348,120348,300,3,PP,19:25 0:35,TAKE,"CHI TAKEAWAY - #98 BEDARD, Off. Zone",24 C 92 C 3 D 74 D 35 G,23 C 90 C 98 C 11 R 17 L 72 D,,,,CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ...",,,,


In [113]:
column_names = ['#', 'period', 'strength', 'times', 'event', 'description',  'Corsi', 'gameID', 'awayNumbers', 'homeNumbers']
Corsi.columns = column_names
# Corsi.set_index('#', inplace=True)
Corsi

Unnamed: 0_level_0,period,strength,times,event,description,Corsi,gameID,awayNumbers,homeNumbers
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
5,1,EV,0:00 20:00,FAC,SEA won Neu. Zone - SEA #37 GOURDE vs VGK #10 ROY,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
7,1,EV,0:12 19:48,FAC,SEA won Off. Zone - SEA #37 GOURDE vs VGK #10 ROY,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
8,1,EV,0:14 19:46,MISS,"SEA #24 OLEKSIAK, Slap, High and Wide Right, O...",SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
9,1,EV,0:28 19:32,HIT,"VGK #28 CARRIER HIT SEA #24 OLEKSIAK, Off. Zone",VGK,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
10,1,EV,0:42 19:18,GIVE,"VGK GIVEAWAY - #14 HAGUE, Def. Zone",VGK,2023020003,"[8476826, 8477416, 8477955, 8478840, 8476457, ...","[8478403, 8477964, 8476539, 8474565, 8479980, ..."
...,...,...,...,...,...,...,...,...,...
297,3,EV,18:45 1:15,FAC,WSH won Def. Zone - WSH #26 DOWD vs CHI #90 JO...,WSH,2023020422,"[8475343, 8479359, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8481568]"
298,3,PP,19:12 0:48,MISS,"CHI #98 BEDARD, Wrist, High and Wide Left, Off...",CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ..."
299,3,PP,19:18 0:42,SHOT,"CHI ONGOAL - #98 BEDARD, Backhand , Off. Zone,...",CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ..."
300,3,PP,19:25 0:35,TAKE,"CHI TAKEAWAY - #98 BEDARD, Off. Zone",CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ..."


In [114]:
Corsi.reset_index(inplace=True)  
Corsi

Unnamed: 0,#,period,strength,times,event,description,Corsi,gameID,awayNumbers,homeNumbers
0,5,1,EV,0:00 20:00,FAC,SEA won Neu. Zone - SEA #37 GOURDE vs VGK #10 ROY,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
1,7,1,EV,0:12 19:48,FAC,SEA won Off. Zone - SEA #37 GOURDE vs VGK #10 ROY,SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
2,8,1,EV,0:14 19:46,MISS,"SEA #24 OLEKSIAK, Slap, High and Wide Right, O...",SEA,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
3,9,1,EV,0:28 19:32,HIT,"VGK #28 CARRIER HIT SEA #24 OLEKSIAK, Off. Zone",VGK,2023020003,"[8476826, 8480009, 8477416, 8478840, 8476467, ...","[8478462, 8478434, 8477478, 8474565, 8479980, ..."
4,10,1,EV,0:42 19:18,GIVE,"VGK GIVEAWAY - #14 HAGUE, Def. Zone",VGK,2023020003,"[8476826, 8477416, 8477955, 8478840, 8476457, ...","[8478403, 8477964, 8476539, 8474565, 8479980, ..."
...,...,...,...,...,...,...,...,...,...,...
120345,297,3,EV,18:45 1:15,FAC,WSH won Def. Zone - WSH #26 DOWD vs CHI #90 JO...,WSH,2023020422,"[8475343, 8479359, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8481568]"
120346,298,3,PP,19:12 0:48,MISS,"CHI #98 BEDARD, Wrist, High and Wide Left, Off...",CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ..."
120347,299,3,PP,19:18 0:42,SHOT,"CHI ONGOAL - #98 BEDARD, Backhand , Off. Zone,...",CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ..."
120348,300,3,PP,19:25 0:35,TAKE,"CHI TAKEAWAY - #98 BEDARD, Off. Zone",CHI,2023020422,"[8481580, 8475744, 8475324, 8474590, 8475311]","[8480798, 8474870, 8484144, 8479390, 8473422, ..."


In [None]:
# corsi_df.to_excel('corsi.xlsx')

In [116]:
file_name = f"data/corsi-{today.strftime('%Y-%m-%d')}.csv"
Corsi.to_csv(file_name, index=False)

In [None]:


try:
    # Open the webpage with Selenium
    driver.get(url)

    # Get the HTML content after the dynamic content has loaded
    html_content = driver.page_source

    # Parse the HTML with BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')

    # Now you can work with the soup object to extract data from the HTML.
    tables = soup.find_all('table', class_='tablewidth')

    dataframes = [pd.read_html(str(table))[0] for table in tables]

    # Print the extracted DataFrames
    for index, df in enumerate(dataframes, 1):
        print(f"DataFrame {index}:\n{df}\n{'='*50}\n")

finally:
    # Close the webdriver
    driver.quit()