In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import pickle
import json
pd.options.mode.chained_assignment = None 
import re
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, RFE, mutual_info_classif
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import asyncio
import aiohttp
import nest_asyncio

In [5]:
#Grabbing game info from the 2023-2024 season

# Initialize the starting and ending game IDs
start_game_id = 2023020001
end_game_id = 2023021307

# Base URL for the API
base_url = "https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId="

# Create an empty list to store the game IDs and URLs
game_data = []

# Loop through the range of game IDs
for game_id in range(start_game_id, end_game_id + 1):
    # Create the full API URL
    api_url = f"{base_url}{game_id}"
    # Append the game ID and URL to the list
    game_data.append({"game_id": game_id, "url": api_url})

# Convert the list to a DataFrame
game_df = pd.DataFrame(game_data)
game_df


Unnamed: 0,game_id,url
0,2023020001,https://api.nhle.com/stats/rest/en/shiftcharts...
1,2023020002,https://api.nhle.com/stats/rest/en/shiftcharts...
2,2023020003,https://api.nhle.com/stats/rest/en/shiftcharts...
3,2023020004,https://api.nhle.com/stats/rest/en/shiftcharts...
4,2023020005,https://api.nhle.com/stats/rest/en/shiftcharts...
...,...,...
1302,2023021303,https://api.nhle.com/stats/rest/en/shiftcharts...
1303,2023021304,https://api.nhle.com/stats/rest/en/shiftcharts...
1304,2023021305,https://api.nhle.com/stats/rest/en/shiftcharts...
1305,2023021306,https://api.nhle.com/stats/rest/en/shiftcharts...


In [6]:
#Expanding on one game to see what the info looks like


url = "https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId=2023020274"
#url = "https://api-web.nhle.com/v1/gamecenter/2023020260/boxscore"
#game_id = 2023021272
response = requests.get(url )
content = json.loads(response.content)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # The response content can be accessed using response.text
    response_text = response.text
    #pprint(response_text)
else:
    print(f"Request failed with status code {response.status_code}")

json_data = json.loads(response_text)

#This is a look at the goals scored
details = pd.DataFrame(json_data['data'])
goals = details.query('typeCode == 505')
goals

Unnamed: 0,id,detailCode,duration,endTime,eventDescription,eventDetails,eventNumber,firstName,gameId,hexValue,lastName,period,playerId,shiftNumber,startTime,teamAbbrev,teamId,teamName,typeCode
39,13455214,802,,01:01,EVG,"Miro Heiskanen, Roope Hintz",749,Joe,2023020274,#006341,Pavelski,3,8470794,0,01:01,DAL,25,Dallas Stars,505
75,13455215,802,,14:23,EVG,"Joe Pavelski, Roope Hintz",686,Jamie,2023020274,#006341,Benn,2,8473994,0,14:23,DAL,25,Dallas Stars,505
216,13455216,803,,08:24,EVG,Mason Marchment,269,Tyler,2023020274,#006341,Seguin,3,8475794,0,08:24,DAL,25,Dallas Stars,505
256,13455217,804,,16:25,PPG,"Chris Kreider, Erik Gustafsson",253,Vincent,2023020274,#0033A0,Trocheck,1,8476389,0,16:25,NYR,3,New York Rangers,505
319,13455222,806,,19:44,EVG,"Braden Schneider, Jacob Trouba",1103,Barclay,2023020274,#0033A0,Goodrow,3,8476624,0,19:44,NYR,3,New York Rangers,505
475,13455218,803,,17:04,EN,Tyler Seguin,277,Roope,2023020274,#006341,Hintz,3,8478449,0,17:04,DAL,25,Dallas Stars,505
519,13455219,803,,07:06,PPG,"Wyatt Johnston, Matt Duchene",268,Mason,2023020274,#006341,Marchment,3,8478975,0,07:06,DAL,25,Dallas Stars,505
563,13455220,803,,16:30,EN,,275,Sam,2023020274,#006341,Steel,3,8479351,0,16:30,DAL,25,Dallas Stars,505
670,13455221,803,,13:38,EVG,"Nick Bonino, Jacob Trouba",258,Kaapo,2023020274,#0033A0,Kakko,2,8481554,0,13:38,NYR,3,New York Rangers,505


In [7]:
goals['detailCode'].value_counts()

#801 = slap shot
#802 = snap shot
#803 = wrist shot
#804 = wrap-around
#805= tip-in
#806 = backhanded shot
#807 = deflected in
#808 = bat shot
#809 = cradle/Michigan
#810 = poke
#811 = between the legs

detailCode
803    5
802    2
804    1
806    1
Name: count, dtype: int64

In [8]:
#Streamline and re-organize the goals df
goals = goals[['gameId', 'playerId', 'firstName', 'lastName', 'teamName', 'eventDetails', 'eventDescription']]
goals[['assist_1', 'assist_2']] = goals['eventDetails'].str.split(', ', expand=True)



In [9]:
# Apply nest_asyncio to allow nested event loops in Jupyter notebooks
nest_asyncio.apply()

#Loop through all of the games to get all of the game summary data
# Initialize the starting and ending game IDs
start_game_id = 2023020001
end_game_id = 2023021307

# Base URL for the API
base_url = "https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId="

# Create an empty list to store the game IDs and URLs
game_data = [{"game_id": game_id, "url": f"{base_url}{game_id}"} for game_id in range(start_game_id, end_game_id + 1)]

# Convert the list to a DataFrame
game_df = pd.DataFrame(game_data)

async def fetch_shift_data(session, url, game_id):
    try:
        async with session.get(url) as response:
            if response.status == 200:
                json_data = await response.json()
                details = pd.DataFrame(json_data['data'])
                if not details.empty:
                    details['player_name'] = details['firstName'] + " " + details['lastName']

                    # Handle empty or null eventDetails only for typeCode 505
                    mask_505 = details['typeCode'] == 505
                    if 'typeCode' in details.columns and mask_505.any():
                        details.loc[mask_505, 'eventDetails'] = details.loc[mask_505, 'eventDetails'].fillna('unassisted')
                        details.loc[mask_505, 'eventDetails'] = details.loc[mask_505, 'eventDetails'].replace(r'^\s*$', 'unassisted', regex=True)

                        # Handle splitting of eventDetails
                        assists = details.loc[mask_505, 'eventDetails'].str.split(', ', expand=True)

                        details.loc[mask_505, 'assist_1'] = assists[0]
                        details.loc[mask_505, 'assist_2'] = assists[1] if assists.shape[1] > 1 else None

                    return details
            else:
                print(f"Request failed with status code {response.status}")
                return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred: {e}")
        return pd.DataFrame()

async def fetch_all_shift_data(game_df):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for _, row in game_df.iterrows():
            url = row['url']
            game_id = row['game_id']
            tasks.append(fetch_shift_data(session, url, game_id))
        
        results = await asyncio.gather(*tasks)
        return results

# Run the async tasks and collect the results
all_shifts_data = asyncio.run(fetch_all_shift_data(game_df))

# Concatenate all the shift data DataFrames into a single DataFrame
all_shifts = pd.concat(all_shifts_data, ignore_index=True)

all_shifts.head()

Unnamed: 0,id,detailCode,duration,endTime,eventDescription,eventDetails,eventNumber,firstName,gameId,hexValue,...,playerId,shiftNumber,startTime,teamAbbrev,teamId,teamName,typeCode,player_name,assist_1,assist_2
0,13244643,0.0,00:30,00:30,,,7.0,Ryan,2023020001,#FFB81C,...,8474151,1,00:00,NSH,18,Nashville Predators,517,Ryan McDonagh,,
1,13244644,0.0,00:51,03:02,,,74.0,Ryan,2023020001,#FFB81C,...,8474151,2,02:11,NSH,18,Nashville Predators,517,Ryan McDonagh,,
2,13244645,0.0,00:44,04:34,,,92.0,Ryan,2023020001,#FFB81C,...,8474151,3,03:50,NSH,18,Nashville Predators,517,Ryan McDonagh,,
3,13244646,0.0,00:47,06:45,,,222.0,Ryan,2023020001,#FFB81C,...,8474151,4,05:58,NSH,18,Nashville Predators,517,Ryan McDonagh,,
4,13244647,0.0,00:41,09:00,,,250.0,Ryan,2023020001,#FFB81C,...,8474151,5,08:19,NSH,18,Nashville Predators,517,Ryan McDonagh,,


In [10]:
goals_df = all_shifts.query('typeCode == 505')
shifts_df = all_shifts.query('typeCode !=505')

In [11]:
# Fill NA values with 0 (or any other integer you prefer)
goals_df['detailCode'] = goals_df['detailCode'].fillna(0).astype(int)

In [12]:
goals_df=goals_df[['gameId',  'playerId', 'player_name','detailCode', 'eventDescription', 'assist_1', 'assist_2' ]]


# Define the mapping dictionary
detail_code_to_shot_type = {
    801: 'slap shot',
    802: 'snap shot',
    803: 'wrist shot',
    804: 'wrap-around',
    805: 'tip-in',
    806: 'backhanded shot',
    807: 'deflected in',
    808: 'bat shot',
    809: 'cradle/Michigan',
    810: 'poke',
    811: 'between the legs'
}

# Assuming goals_df is your DataFrame and detailCode is the column with shot types
goals_df['shot_type'] = goals_df['detailCode'].map(detail_code_to_shot_type)

# Display the first few rows of the DataFrame to verify
goals_df.head()


Unnamed: 0,gameId,playerId,player_name,detailCode,eventDescription,assist_1,assist_2,shot_type
127,2023020001,8475158,Ryan O'Reilly,805,EVG,Filip Forsberg,Ryan McDonagh,tip-in
210,2023020001,8476453,Nikita Kucherov,801,EVG,Victor Hedman,Brayden Point,slap shot
225,2023020001,8476453,Nikita Kucherov,803,EN,Nicholas Paul,,wrist shot
332,2023020001,8477426,Nicholas Paul,803,PPG,Brayden Point,Steven Stamkos,wrist shot
333,2023020001,8477426,Nicholas Paul,803,PPG,Mikhail Sergachev,Brayden Point,wrist shot


In [13]:
pasta_goals = goals_df.query('playerId == 8477956')
pasta_goals.head()

Unnamed: 0,gameId,playerId,player_name,detailCode,eventDescription,assist_1,assist_2,shot_type
4230,2023020006,8477956,David Pastrnak,803,EVG,Milan Lucic,,wrist shot
4238,2023020006,8477956,David Pastrnak,803,EN,Brad Marchand,Charlie Coyle,wrist shot
15956,2023020021,8477956,David Pastrnak,803,Penalty Shot,unassisted,,wrist shot
47288,2023020062,8477956,David Pastrnak,803,EVG,Brandon Carlo,,wrist shot
60354,2023020079,8477956,David Pastrnak,803,PPG,Brad Marchand,James van Riemsdyk,wrist shot


In [14]:
assist_1 = pasta_goals.groupby('assist_1')['gameId'].count()
assist_1 = pd.DataFrame(assist_1).reset_index()
assist_1=assist_1.rename(columns={'assist_1': 'player_name', 'gameId':'assists'})    
assist_1

Unnamed: 0,player_name,assists
0,Brad Marchand,2
1,Brandon Carlo,3
2,Charlie Coyle,2
3,Charlie McAvoy,5
4,Hampus Lindholm,1
5,Jake DeBrusk,4
6,John Beecher,1
7,Kevin Shattenkirk,5
8,Mason Lohrei,2
9,Matt Grzelcyk,1


In [15]:
assist_2 =pasta_goals.groupby('assist_2')['gameId'].count()
assist_2 = pd.DataFrame(assist_2).reset_index()
assist_2=assist_2.rename(columns={'assist_2': 'player_name', 'gameId':'assists'})   
assist_2

Unnamed: 0,player_name,assists
0,Brad Marchand,5
1,Charlie Coyle,2
2,Charlie McAvoy,7
3,Danton Heinen,3
4,Hampus Lindholm,2
5,Jake DeBrusk,1
6,James van Riemsdyk,2
7,Jesper Boqvist,1
8,Morgan Geekie,4
9,Pavel Zacha,3


In [16]:
points_w_pasta =pd.concat([assist_1, assist_2]).reset_index(drop=True)
points_w_pasta

Unnamed: 0,player_name,assists
0,Brad Marchand,2
1,Brandon Carlo,3
2,Charlie Coyle,2
3,Charlie McAvoy,5
4,Hampus Lindholm,1
5,Jake DeBrusk,4
6,John Beecher,1
7,Kevin Shattenkirk,5
8,Mason Lohrei,2
9,Matt Grzelcyk,1


In [17]:
# Group by 'B' column and count occurrences
points_w_pasta = points_w_pasta.groupby('player_name')['assists'].sum()
points_w_pasta = pd.DataFrame(points_w_pasta).reset_index()
points_w_pasta

Unnamed: 0,player_name,assists
0,Brad Marchand,7
1,Brandon Carlo,3
2,Charlie Coyle,4
3,Charlie McAvoy,12
4,Danton Heinen,3
5,Hampus Lindholm,3
6,Jake DeBrusk,5
7,James van Riemsdyk,2
8,Jesper Boqvist,1
9,John Beecher,1


In [18]:
if isinstance(points_w_pasta, pd.DataFrame) and 'player_name' in points_w_pasta.columns:
    # Filter the DataFrame to exclude rows where player_name is 'unassisted' using the ~ operator
    filtered_points_w_pasta = points_w_pasta[~points_w_pasta['player_name'].str.lower().eq('unassisted')]

    # Convert the filtered player_name column to a list
    new_players = filtered_points_w_pasta['player_name'].tolist()
new_players = ['Danton Heinen', 'James van Riemsdyk', 'Kevin Shatttenkirk', 'Pavel Zacha', 'Morgan Geekie']
new_players

['Danton Heinen',
 'James van Riemsdyk',
 'Kevin Shatttenkirk',
 'Pavel Zacha',
 'Morgan Geekie']

In [19]:
goals_df.head()

Unnamed: 0,gameId,playerId,player_name,detailCode,eventDescription,assist_1,assist_2,shot_type
127,2023020001,8475158,Ryan O'Reilly,805,EVG,Filip Forsberg,Ryan McDonagh,tip-in
210,2023020001,8476453,Nikita Kucherov,801,EVG,Victor Hedman,Brayden Point,slap shot
225,2023020001,8476453,Nikita Kucherov,803,EN,Nicholas Paul,,wrist shot
332,2023020001,8477426,Nicholas Paul,803,PPG,Brayden Point,Steven Stamkos,wrist shot
333,2023020001,8477426,Nicholas Paul,803,PPG,Mikhail Sergachev,Brayden Point,wrist shot


In [20]:
player_goals = goals_df['player_name'].value_counts().reset_index()
player_goals.columns = ['player_name', 'goal_count']
player_goals

Unnamed: 0,player_name,goal_count
0,Auston Matthews,71
1,Sam Reinhart,64
2,Zach Hyman,56
3,Artemi Panarin,53
4,Nathan MacKinnon,52
...,...,...
744,Jonas Siegenthaler,1
745,Adam Erne,1
746,Joel Edmundson,1
747,Haydn Fleury,1


In [21]:
assist_goals = points_w_pasta.merge(player_goals, on='player_name', how='left')

# Fill NaN values with 0 (for players with no appearances in final_goals_df)
assist_goals['goal_count'] = assist_goals['goal_count'].fillna(0).astype(int)
assist_goals

Unnamed: 0,player_name,assists,goal_count
0,Brad Marchand,7,31
1,Brandon Carlo,3,4
2,Charlie Coyle,4,27
3,Charlie McAvoy,12,13
4,Danton Heinen,3,17
5,Hampus Lindholm,3,3
6,Jake DeBrusk,5,22
7,James van Riemsdyk,2,11
8,Jesper Boqvist,1,6
9,John Beecher,1,7


In [22]:
#times when pasta assists

pasta_assist1 = goals_df.query('assist_1 == "David Pastrnak"')
pasta_assist2 = goals_df.query('assist_2 == "David Pastrnak"')
pasta_assist1.head()

Unnamed: 0,gameId,playerId,player_name,detailCode,eventDescription,assist_1,assist_2,shot_type
60076,2023020079,8473419,Brad Marchand,803,EVG,David Pastrnak,Derek Forbort,wrist shot
60088,2023020079,8473419,Brad Marchand,803,EVG,David Pastrnak,Pavel Zacha,wrist shot
76042,2023020100,8475745,Charlie Coyle,805,PPG,David Pastrnak,Charlie McAvoy,tip-in
99237,2023020130,8479325,Charlie McAvoy,803,EVG,David Pastrnak,Pavel Zacha,wrist shot
122205,2023020161,8474037,James van Riemsdyk,803,EVG,David Pastrnak,Pavel Zacha,wrist shot


In [23]:
pasta_assist1=pasta_assist1.groupby('player_name')['gameId'].count()
pasta_assist1 = pd.DataFrame(pasta_assist1).reset_index()
pasta_assist1=pasta_assist1.rename(columns={'gameId':'assists_from_pasta'})   

pasta_assist2=pasta_assist2.groupby('player_name')['gameId'].count()
pasta_assist2 = pd.DataFrame(pasta_assist2).reset_index()
pasta_assist2=pasta_assist2.rename(columns={'gameId':'assists_from_pasta'})   

pasta_points =pd.concat([pasta_assist1, pasta_assist2]).reset_index(drop=True)

pasta_points = pasta_points.groupby('player_name')['assists_from_pasta'].sum()
pasta_points = pd.DataFrame(pasta_points).reset_index()
pasta_points


Unnamed: 0,player_name,assists_from_pasta
0,Brad Marchand,10
1,Brandon Carlo,1
2,Charlie Coyle,9
3,Charlie McAvoy,4
4,Danton Heinen,3
5,Hampus Lindholm,2
6,Jake DeBrusk,3
7,James van Riemsdyk,7
8,Kevin Shattenkirk,4
9,Morgan Geekie,6


In [24]:
# #assists from other players not pasta

# not_pasta1 = goals_df.query('assist_1 != "David Pastrnak"')
# not_pasta2 = goals_df.query('assist_2 != "David Pastrnak"')


# not_pasta1=not_pasta1.groupby('player_name')['gameId'].count()
# not_pasta1 = pd.DataFrame(not_pasta1).reset_index()
# not_pasta1=not_pasta1.rename(columns={'gameId':'not_from_pasta'})   

# not_pasta2=not_pasta2.groupby('player_name')['gameId'].count()
# not_pasta2 = pd.DataFrame(not_pasta2).reset_index()
# not_pasta2=not_pasta2.rename(columns={'gameId':'not_from_pasta'})   

# not_from_pasta =pd.concat([not_pasta1, not_pasta2]).reset_index(drop=True)

# not_from_pasta = not_from_pasta.groupby('player_name')['not_from_pasta'].sum()
# not_from_pasta = pd.DataFrame(not_from_pasta).reset_index()
# not_from_pasta


In [25]:
players = pd.merge(pasta_points, assist_goals, on='player_name', how='right')
#players= pd.merge(players, not_from_pasta, on='player_name', how = 'left')

# Fill NA values with 0 (or any other integer you prefer)
players['assists_from_pasta'] = players['assists_from_pasta'].fillna(0).astype(int)

players

Unnamed: 0,player_name,assists_from_pasta,assists,goal_count
0,Brad Marchand,10,7,31
1,Brandon Carlo,1,3,4
2,Charlie Coyle,9,4,27
3,Charlie McAvoy,4,12,13
4,Danton Heinen,3,3,17
5,Hampus Lindholm,2,3,3
6,Jake DeBrusk,3,5,22
7,James van Riemsdyk,7,2,11
8,Jesper Boqvist,0,1,6
9,John Beecher,0,1,7


In [26]:
#all players who scored goals, had assists that came from pasta, assisted pasta, and percent of their goals that were assisted from pasta
players['percent_from'] = round((players['assists_from_pasta']/players['goal_count'])*100).fillna(0).astype(int)
players

Unnamed: 0,player_name,assists_from_pasta,assists,goal_count,percent_from
0,Brad Marchand,10,7,31,32
1,Brandon Carlo,1,3,4,25
2,Charlie Coyle,9,4,27,33
3,Charlie McAvoy,4,12,13,31
4,Danton Heinen,3,3,17,18
5,Hampus Lindholm,2,3,3,67
6,Jake DeBrusk,3,5,22,14
7,James van Riemsdyk,7,2,11,64
8,Jesper Boqvist,0,1,6,0
9,John Beecher,0,1,7,0


In [27]:
#stats before becoming bruins
#Grabbing game info from the 2023-2024 season

# Initialize the starting and ending game IDs
start_game_id = 2022020001
end_game_id = 2022021307

# Base URL for the API
base_url = "https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId="

# Create an empty list to store the game IDs and URLs
game_data = []

# Loop through the range of game IDs
for game_id in range(start_game_id, end_game_id + 1):
    # Create the full API URL
    api_url = f"{base_url}{game_id}"
    # Append the game ID and URL to the list
    game_data.append({"game_id": game_id, "url": api_url})

# Convert the list to a DataFrame
game_df = pd.DataFrame(game_data)
game_df


Unnamed: 0,game_id,url
0,2022020001,https://api.nhle.com/stats/rest/en/shiftcharts...
1,2022020002,https://api.nhle.com/stats/rest/en/shiftcharts...
2,2022020003,https://api.nhle.com/stats/rest/en/shiftcharts...
3,2022020004,https://api.nhle.com/stats/rest/en/shiftcharts...
4,2022020005,https://api.nhle.com/stats/rest/en/shiftcharts...
...,...,...
1302,2022021303,https://api.nhle.com/stats/rest/en/shiftcharts...
1303,2022021304,https://api.nhle.com/stats/rest/en/shiftcharts...
1304,2022021305,https://api.nhle.com/stats/rest/en/shiftcharts...
1305,2022021306,https://api.nhle.com/stats/rest/en/shiftcharts...


In [28]:
import nest_asyncio
import pandas as pd
import asyncio
import aiohttp

# Apply nest_asyncio to allow nested event loops in Jupyter notebooks
nest_asyncio.apply()

# Loop through all of the games to get all of the game summary data
# Initialize the starting and ending game IDs
start_game_id = 2019020001
end_game_id = 2023021307

# Base URL for the API
base_url = "https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId="

# Create an empty list to store the game IDs and URLs
game_data = [{"game_id": game_id, "url": f"{base_url}{game_id}"} for game_id in range(start_game_id, end_game_id + 1)]

# Convert the list to a DataFrame
game_df = pd.DataFrame(game_data)

async def fetch_shift_data(session, url, game_id):
    try:
        async with session.get(url) as response:
            if response.status == 200:
                json_data = await response.json()
                details = pd.DataFrame(json_data['data'])
                if not details.empty:
                    details['player_name'] = details['firstName'] + " " + details['lastName']

                    # Handle empty or null eventDetails only for typeCode 505
                    mask_505 = details['typeCode'] == 505
                    if 'typeCode' in details.columns and mask_505.any():
                        details.loc[mask_505, 'eventDetails'] = details.loc[mask_505, 'eventDetails'].fillna('unassisted')
                        details.loc[mask_505, 'eventDetails'] = details.loc[mask_505, 'eventDetails'].replace(r'^\s*$', 'unassisted', regex=True)

                        # Handle splitting of eventDetails
                        assists = details.loc[mask_505, 'eventDetails'].str.split(', ', expand=True)

                        details.loc[mask_505, 'assist_1'] = assists[0]
                        details.loc[mask_505, 'assist_2'] = assists[1] if assists.shape[1] > 1 else None

                    return details
            else:
                print(f"Request failed with status code {response.status}")
                return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred: {e}")
        return pd.DataFrame()

async def fetch_prev_shift_data(game_df, chunk_size=100):
    async with aiohttp.ClientSession() as session:
        results = []
        for i in range(0, len(game_df), chunk_size):
            tasks = []
            chunk = game_df.iloc[i:i + chunk_size]
            for _, row in chunk.iterrows():
                url = row['url']
                game_id = row['game_id']
                tasks.append(fetch_shift_data(session, url, game_id))
            
            chunk_results = await asyncio.gather(*tasks)
            results.extend(chunk_results)
            print(f"Processed {i + chunk_size} of {len(game_df)} games")

        return results

# Run the async tasks and collect the results
previous_shifts_data = asyncio.run(fetch_prev_shift_data(game_df))

# Concatenate all the shift data DataFrames into a single DataFrame
previous_shifts = pd.concat(previous_shifts_data, ignore_index=True)

previous_shifts.head()


Processed 100 of 4001307 games
Processed 200 of 4001307 games
Processed 300 of 4001307 games
Processed 400 of 4001307 games
Processed 500 of 4001307 games
Processed 600 of 4001307 games
Processed 700 of 4001307 games
Processed 800 of 4001307 games
Processed 900 of 4001307 games
Processed 1000 of 4001307 games
Processed 1100 of 4001307 games
Processed 1200 of 4001307 games
Processed 1300 of 4001307 games
Processed 1400 of 4001307 games
Processed 1500 of 4001307 games
Processed 1600 of 4001307 games
Processed 1700 of 4001307 games
Processed 1800 of 4001307 games
Processed 1900 of 4001307 games
Processed 2000 of 4001307 games
Processed 2100 of 4001307 games
Processed 2200 of 4001307 games
Processed 2300 of 4001307 games
Processed 2400 of 4001307 games
Processed 2500 of 4001307 games
Processed 2600 of 4001307 games
Processed 2700 of 4001307 games
Processed 2800 of 4001307 games
Processed 2900 of 4001307 games
Processed 3000 of 4001307 games
Processed 3100 of 4001307 games
Processed 3200 of