In [2]:
import json
import time

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

from pydantic import BaseModel
from typing import List, Optional

from selenium import webdriver

from supabase import create_client, Client

In [3]:
class MatchEvent(BaseModel):
    id: int
    event_id: int
    minute: int
    second: Optional[float] = None
    team_id: int
    player_id: int
    x: float
    y: float
    end_x: Optional[float] = None
    end_y: Optional[float] = None
    qualifiers: List[dict]
    is_touch: bool
    blocked_x: Optional[float] = None
    blocked_y: Optional[float] = None
    goal_mouth_z: Optional[float] = None
    goal_mouth_y: Optional[float] = None
    is_shot: bool
    card_type: bool
    is_goal: bool
    type_display_name: str
    outcome_type_display_name: str
    period_display_name: str

In [4]:
def insert_match_events(df, supabase):
    events = [
        MatchEvent(**x).dict()
        for x in df.to_dict(orient='records')
    ]
    
    execution = supabase.table('match_event').upsert(events).execute()

In [5]:
class Player(BaseModel):
    player_id: int
    shirt_no: int
    name: str
    age: int
    position: str
    team_id: int

In [6]:
def insert_players(team_info, supabase):
    players = []
    for team in team_info:
        for player in team['players']:
            players.append({
                'player_id': player['playerId'],
                'team_id': team['team_id'],
                'shirt_no': player['shirtNo'],
                'name': player['name'],
                'position': player['position'],
                'age': player['age']
            })
            
    execution = supabase.table('players').upsert(players).execute()

In [8]:
from Database import Database
supabase_password = Database.get_password()
project_url = Database.get_project_url()
api_key = Database.get_api_key()

supabase = create_client(project_url, api_key)

In [9]:
driver = webdriver.Chrome()

In [10]:
def scrape_match_events(whoscored_url, driver):
    
    driver.get(whoscored_url)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    element = soup.select_one('script:-soup-contains("matchCentreData")')
    if element is None:
        return None
    
    matchdict = json.loads(element.text.split("matchCentreData: ")[1].split(',\n')[0])
    
    match_events = matchdict['events']
    
    df = pd.DataFrame(match_events)
    
    df.dropna(subset='playerId', inplace=True)
    
    df = df.where(pd.notnull(df), None)
    
    df = df.rename(
    {
        'eventId': 'event_id',
        'expandedMinute': 'expanded_minute',
        'outcomeType': 'outcome_type',
        'isTouch': 'is_touch',
        'playerId': 'player_id',
        'teamId': 'team_id',
        'endX': 'end_x',
        'endY': 'end_y',
        'blockedX': 'blocked_x',
        'blockedY': 'blocked_y',
        'goalMouthZ': 'goal_mouth_z',
        'goalMouthY': 'goal_mouth_y',
        'isShot': 'is_shot',
        'cardType': 'card_type',
        'isGoal': 'is_goal'
    },
        axis=1
    )
    
    df['period_display_name'] = df['period'].apply(lambda x: x['displayName'])
    df['type_display_name'] = df['type'].apply(lambda x: x['displayName'])
    df['outcome_type_display_name'] = df['outcome_type'].apply(lambda x: x['displayName'])
    
    df.drop(columns=["period", "type", "outcome_type"], inplace=True)
    
    if 'is_goal' not in df.columns:
        df['is_goal'] = False
        
    if 'is_card' not in df.columns:
        df['is_card'] = False
        df['card_type'] = False
        
    df = df[~(df['type_display_name'] == "OffsideGiven")]
    
    df = df[[
        'id', 'event_id', 'minute', 'second', 'team_id', 'player_id', 'x', 'y', 'end_x', 'end_y',
        'qualifiers', 'is_touch', 'blocked_x', 'blocked_y', 'goal_mouth_z', 'goal_mouth_y', 'is_shot',
        'card_type', 'is_goal', 'type_display_name', 'outcome_type_display_name',
        'period_display_name'
    ]]
    
    df[['id', 'event_id', 'minute', 'team_id', 'player_id']] = df[['id', 'event_id', 'minute', 'team_id', 'player_id']].astype(np.int64)
    df[['second', 'x', 'y', 'end_x', 'end_y']] = df[['second', 'x', 'y', 'end_x', 'end_y']].astype(float)
    df[['is_shot', 'is_goal', 'card_type']] = df[['is_shot', 'is_goal', 'card_type']].astype(bool)
    
    df['is_goal'] = df['is_goal'].fillna(False)
    df['is_shot'] = df['is_shot'].fillna(False)
    
    for column in df.columns:
        if df[column].dtype == np.float64 or df[column].dtype == np.float32:
            df[column] = np.where(
                np.isnan(df[column]),
                None,
                df[column]
            )
            
            
    insert_match_events(df, supabase)
    
    
    team_info = []
    team_info.append({
        'team_id': matchdict['home']['teamId'],
        'name': matchdict['home']['name'],
        'country_name': matchdict['home']['countryName'],
        'manager_name': matchdict['home']['managerName'],
        'players': matchdict['home']['players'],
    })

    team_info.append({
        'team_id': matchdict['away']['teamId'],
        'name': matchdict['away']['name'],
        'country_name': matchdict['away']['countryName'],
        'manager_name': matchdict['away']['managerName'],
        'players': matchdict['away']['players'],
    })
    
    insert_players(team_info, supabase)
    
    return print('Success')

In [19]:
driver.get("https://www.whoscored.com/Teams/607/Fixtures/France-Lille")

In [20]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [21]:
all_urls = soup.select('a[href*="\/Live\/"]')

In [22]:
all_urls = list(set([
    'https://whoscored.com' + x.attrs['href']
    for x in all_urls
]))

In [23]:
all_urls 

['https://whoscored.com/Matches/1775883/Live/Europe-Conference-League-2023-2024-Lille-Olimpija-Ljubljana',
 'https://whoscored.com/Matches/1741100/Live/France-Ligue-1-2023-2024-Lille-Lorient',
 'https://whoscored.com/Matches/1741114/Live/France-Ligue-1-2023-2024-Montpellier-Lille',
 'https://whoscored.com/Matches/1741716/Live/France-Ligue-1-2023-2024-Lille-Metz',
 'https://whoscored.com/Matches/1787886/Live/France-Coupe-de-France-2023-2024-Lille-Golden-Lion-de-Saint-Joseph',
 'https://whoscored.com/Matches/1741735/Live/France-Ligue-1-2023-2024-Lille-Paris-Saint-Germain',
 'https://whoscored.com/Matches/1773567/Live/Europe-Conference-League-2023-2024-Rijeka-Lille',
 'https://whoscored.com/Matches/1741708/Live/France-Ligue-1-2023-2024-Lyon-Lille',
 'https://whoscored.com/Matches/1773544/Live/Europe-Conference-League-2023-2024-Lille-Rijeka',
 'https://whoscored.com/Matches/1775882/Live/Europe-Conference-League-2023-2024-Lille-Slovan-Bratislava',
 'https://whoscored.com/Matches/1795060/Liv

In [24]:
for url in all_urls:
    print(url)
    scrape_match_events(
            whoscored_url=url,
            driver=driver
    )
    time.sleep(2)

https://whoscored.com/Matches/1775883/Live/Europe-Conference-League-2023-2024-Lille-Olimpija-Ljubljana
https://whoscored.com/Matches/1741100/Live/France-Ligue-1-2023-2024-Lille-Lorient


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:08:42,303:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:08:42,554:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741114/Live/France-Ligue-1-2023-2024-Montpellier-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:08:46,887:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:08:47,170:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741716/Live/France-Ligue-1-2023-2024-Lille-Metz


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:08:51,476:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:08:51,689:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1787886/Live/France-Coupe-de-France-2023-2024-Lille-Golden-Lion-de-Saint-Joseph
https://whoscored.com/Matches/1741735/Live/France-Ligue-1-2023-2024-Lille-Paris-Saint-Germain


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:08:58,862:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:08:59,092:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1773567/Live/Europe-Conference-League-2023-2024-Rijeka-Lille
https://whoscored.com/Matches/1741708/Live/France-Ligue-1-2023-2024-Lyon-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:06,524:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:06,768:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1773544/Live/Europe-Conference-League-2023-2024-Lille-Rijeka
https://whoscored.com/Matches/1775882/Live/Europe-Conference-League-2023-2024-Lille-Slovan-Bratislava
https://whoscored.com/Matches/1795060/Live/France-Coupe-de-France-2023-2024-Racing-Club-de-France-Lille
https://whoscored.com/Matches/1741255/Live/France-Ligue-1-2023-2024-Lille-Reims


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:19,623:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:19,860:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741188/Live/France-Ligue-1-2023-2024-Lens-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:24,208:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:24,484:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741741/Live/France-Ligue-1-2023-2024-Strasbourg-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:28,721:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:28,915:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741230/Live/France-Ligue-1-2023-2024-Lorient-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:33,427:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:33,659:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741203/Live/France-Ligue-1-2023-2024-Lille-Monaco


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:38,160:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:38,358:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741213/Live/France-Ligue-1-2023-2024-Marseille-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:42,934:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:43,202:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741237/Live/France-Ligue-1-2023-2024-Lille-Montpellier


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:48,094:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:48,285:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1775875/Live/Europe-Conference-League-2023-2024-Lille-Klaksvik
https://whoscored.com/Matches/1775880/Live/Europe-Conference-League-2023-2024-Klaksvik-Lille
https://whoscored.com/Matches/1741246/Live/France-Ligue-1-2023-2024-Rennes-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:09:58,952:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:09:59,221:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741696/Live/France-Ligue-1-2023-2024-Lille-Toulouse


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:10:03,662:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:10:03,861:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741220/Live/France-Ligue-1-2023-2024-Lille-Nantes


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:10:08,228:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:10:08,442:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741075/Live/France-Ligue-1-2023-2024-Nice-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:10:13,099:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:10:13,409:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741728/Live/France-Ligue-1-2023-2024-Clermont-Foot-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:10:18,118:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:10:18,353:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1741182/Live/France-Ligue-1-2023-2024-Le-Havre-Lille


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:10:23,030:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:10:23,265:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1775814/Live/Europe-Conference-League-2023-2024-Olimpija-Ljubljana-Lille
https://whoscored.com/Matches/1741196/Live/France-Ligue-1-2023-2024-Lille-Brest


C:\Users\marvi\AppData\Local\Temp\ipykernel_27960\2685486566.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  MatchEvent(**x).dict()
2024-01-31 15:10:30,812:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/match_event "HTTP/1.1 201 Created"
2024-01-31 15:10:31,063:INFO - HTTP Request: POST https://ypljxhueshyvowbzpglx.supabase.co/rest/v1/players "HTTP/1.1 201 Created"


Success
https://whoscored.com/Matches/1775876/Live/Europe-Conference-League-2023-2024-Slovan-Bratislava-Lille
