In [1]:
# Import Libraries for web scrapping and so on...
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import pickle
from string import ascii_uppercase as alphabet

# Path to the chromedriver executable
path = 'C:/Users/cagri/Desktop/euro_2024_cup/chromedriver.exe'  # Replace with your actual path

# Check if the path to chromedriver is correct and exists
try:
    service = Service(executable_path=path)
    driver = webdriver.Chrome(service=service)
except WebDriverException as e:
    print(f"Error: {e}")
    print("Please ensure the chromedriver executable path is correct and matches your Chrome version.")
    exit(1)

# Scrape data into that years -> from 1972 to 2020
years = [1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020]


def get_matches(year):
    web = f'https://en.wikipedia.org/wiki/UEFA_Euro_{year}'
    response = requests.get(web)
    content = response.text
    soup = BeautifulSoup(content, 'lxml')
    matches = soup.find_all('div', class_='footballbox')

    home = []
    score = []
    away = []

    for match in matches:
        home.append(match.find('th', class_='fhome').get_text())
        score.append(match.find('th', class_='fscore').get_text())
        away.append(match.find('th', class_='faway').get_text())

    dict_football = {'home': home, 'score': score, 'away': away}
    df_football = pd.DataFrame(dict_football)
    df_football['year'] = year
    return df_football

# all historical data can parse and define into 'fifa' variable
fifa = [get_matches(year) for year in years]
df_euro = pd.concat(fifa, ignore_index=True)
df_euro.to_csv("uefa_euro_all_data.csv", index=False)

# also in this year there are some matches, and here we can define as df_fixture
df_fixture = get_matches(2024)
df_fixture.to_csv('uefa_euro_fixture.csv',index=False)
df_fixture.head()

Unnamed: 0,home,score,away,year
0,Germany,Match 1,Scotland,2024
1,Hungary,Match 2,Switzerland,2024
2,Germany,Match 14,Hungary,2024
3,Scotland,Match 13,Switzerland,2024
4,Switzerland,Match 25,Germany,2024


In [2]:
"""Now, we have all historical datas but there are some problem in some of the matches, for example:
'Czechoslovakia 3–1 (a.e.t.) Netherlands' a.e.t means "Extra time" and "After Extra Time". We can remove
this to make all columns the same."""
df_euro.head()

Unnamed: 0,home,score,away,year
0,Hungary,0–1,Soviet Union,1972
1,Belgium,1–2,West Germany,1972
2,Hungary,1–2,Belgium,1972
3,West Germany,3–0,Soviet Union,1972
4,Czechoslovakia,3–1 (a.e.t.),Netherlands,1976


In [3]:
# Extracting all Euro 2024 tables in website
all_tables = pd.read_html('https://en.wikipedia.org/wiki/UEFA_Euro_2024')

# After 18. index, in every 7th index has the next group's rank table.
all_tables[53]

Unnamed: 0,Pos,Teamvte,Pld,W,D,L,GF,GA,GD,Pts,Qualification
0,1,Turkey,0,0,0,0,0,0,0,0,Advance to knockout stage
1,2,Georgia,0,0,0,0,0,0,0,0,Advance to knockout stage
2,3,Portugal,0,0,0,0,0,0,0,0,Possible knockout stage based on ranking
3,4,Czech Republic,0,0,0,0,0,0,0,0,


In [4]:
# For Group A is 18. index, B is 25. , C is 32. , D is 39. , E is 46. , and F is 53.
all_tables = pd.read_html('https://en.wikipedia.org/wiki/UEFA_Euro_2024')
for i in range(18,54,7):
    print(i)
    df = all_tables[i]
    df.rename(columns={df.columns[1]:'Team'}, inplace=True)
    df.pop('Qualification')

18
25
32
39
46
53


In [5]:
all_tables = pd.read_html('https://en.wikipedia.org/wiki/UEFA_Euro_2024')

dict_table = {}
for letter, i in zip(alphabet, range(18,54,7)): # A=11, B=18, ...
    df = all_tables[i]
    df.rename(columns={df.columns[1]:'Team'}, inplace=True)
    df.pop('Qualification')
    dict_table[f'Group {letter}'] = df

In [6]:
dict_table.keys()

dict_keys(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F'])

In [7]:
# With using pickle library, we can save our dict_table.
with open('uefa_euro_dict_table', 'wb') as output:
    pickle.dump(dict_table, output)