In [40]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import re

## Preliminary round

In [52]:
pr_data = []
for group in ["A", "B", "C", "D"]:
  site = f"https://en.wikipedia.org/wiki/EuroBasket_2022_Group_{group}"
  response = requests.get(site)
  html = response.content
  soup = BeautifulSoup(html, "html.parser")
  t = soup.find("i").text
  timezone = int(re.search(r'\d+', t).group()) - 2

  events = soup.find_all('div', {"class": "vevent"})
  for event in events:
    rows = event.find_all("tr")
    first_row = [td.text.strip() for td in rows[0].find_all("td")]
    date, h_team, a_team = first_row[0], first_row[1], first_row[3]
    arena = rows[0].find("th").text.strip()
    start_time = rows[1].text.strip()
    start_hour, start_minutes = start_time.split(":")
    start_hour = int(start_hour) - timezone
    end_hour = start_hour + 2
    start_time = f"{start_hour}:{start_minutes}"
    end_time = f"{end_hour}:{start_minutes}"

    date_obj = datetime.strptime(date, '%d %B %Y')
    date = date_obj.strftime("%m/%d/%Y")

    all_day_event = False
    pr_data.append([f"{h_team} vs. {a_team}", date, date, start_time, end_time,
                    all_day_event, arena])

pr_data.append(["Great Britain vs. Italy", "09/08/2022", "09/08/2022", "21:00", "23:00", 
  False, "Mediolanum Forum, Milan"])
df_pr = pd.DataFrame(data=pr_data, columns=["Subject", "Start Date", "End Date",
                                            "Start Time", "End Time",
                                            "All Day Event", "Location"])

df_pr.to_csv("../eurobasket_preliminary.csv")

## Knockout stage

In [53]:
ks_data = []
site = "https://en.wikipedia.org/wiki/EuroBasket_2022_knockout_stage"
response = requests.get(site)
html = response.content
soup = BeautifulSoup(html, "html.parser")

events = soup.find_all('div', {"class": "vevent"})
for event in events:
  rows = event.find_all("tr")
  first_row = [td.text.strip() for td in rows[0].find_all("td")]
  date, h_team, a_team = first_row[0], first_row[1], first_row[3]
  arena = rows[0].find("th").text.strip()

  date_obj = datetime.strptime(date, '%d %B %Y')
  date = date_obj.strftime("%m/%d/%Y")

  all_day_event = True
  start_time = "00:00"
  end_time = "00:00"
  ks_data.append([f"{h_team} vs. {a_team}", date, date, start_time,
                  end_time, all_day_event, arena])

df_ks = pd.DataFrame(data=ks_data, columns=["Subject", "Start Date", "End Date",
                                            "Start Time", "End Time",
                                            "All Day Event", "Location"])

df_ks.to_csv("../eurobasket_knockout.csv")

Merge both files

In [57]:
df_full = pd.concat([df_pr, df_ks], axis=0, ignore_index=True)
df_full.to_csv("../eurobasket_full.csv")