In [1]:
# football_match_scraper.py

"""
Football Match Scraper - Web Scraping Script
----------------------------------------------
This script scrapes football match data from YallaKora.com based on a user-provided date.
It extracts championship name, teams, match result, and match time, then saves them to a CSV file.
"""

'\nFootball Match Scraper - Web Scraping Script\n----------------------------------------------\nThis script scrapes football match data from YallaKora.com based on a user-provided date.\nIt extracts championship name, teams, match result, and match time, then saves them to a CSV file.\n'

In [2]:
#import required libraries:
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime

In [5]:
# Validate user input for date format
while True:
    date = input("Input the date of the match in the following format 'MM/DD/YYYY'\nEx: 6/20/2025: ")
    try:
        parsed_date = datetime.strptime(date, "%m/%d/%Y")
        break
    except ValueError:
        print("Invalid date format. Please follow MM/DD/YYYY format.")

# Prepare URL for scraping (Windows-safe)
formatted_date = f"{parsed_date.month}/{parsed_date.day}/{parsed_date.year}"
url = f"https://www.yallakora.com/match-center?date={formatted_date}#days"

page=requests.get(f"https://www.yallakora.com/match-center?date={date}#days")

Input the date of the match in the following format 'MM/DD/YYYY'
Ex: 6/20/2025:  2020


Invalid date format. Please follow MM/DD/YYYY format.


Input the date of the match in the following format 'MM/DD/YYYY'
Ex: 6/20/2025:  20/20/2025


Invalid date format. Please follow MM/DD/YYYY format.


Input the date of the match in the following format 'MM/DD/YYYY'
Ex: 6/20/2025:  6/12/2025


In [6]:
# Define the main function that takes 'page' as input (from requests.get or similar)
def main(page):
    # Extract HTML content from the response
    src = page.content
    
    # Parse the HTML using BeautifulSoup with lxml parser
    soup = BeautifulSoup(src, "lxml")
    
    # Create an empty list to store match details for all matches
    match_details = []
    
    # Find all the championships containers (each match card represents a championship)
    championships = soup.find_all("div", {"class": "matchCard"})
    
    # Define an inner function to extract information for each championship
    def getmatchinfo(championships):
        all_matches = []  # Will store all matches from all championships
        
        # Loop over each championship card
        for championship in championships:
            # Extract the name/title of the championship
            champion_title = championship.find("h2").text.strip()
            
            # Extract all the matches inside this championship
            matches = championship.contents[3].find_all("div", class_="item finish liItem")
            all_matches.extend(matches)  # Add these matches to the master list
            
            numberof_matches = len(all_matches)
            print("number of matches on that day: ", numberof_matches)
            print(" ")
            
            # Loop over all extracted matches
            for i in range(numberof_matches):
                print(f"Match Number {i+1}")
                
                # Extract team A name
                teamA = all_matches[i].find("div", {"class": "teams teamA"}).text.strip()
                print("team A: ", teamA)
                
                # Extract team B name
                teamB = all_matches[i].find("div", {"class": "teams teamB"}).text.strip()
                print("team B: ", teamB)
                
                # Extract match result (scores for both teams)
                match_result = all_matches[i].find("div", {"class": "MResult"}).find_all("span", {"class": "score"})
                
                # Build score string like: "TeamA 1-2 TeamB"
                score = f"{teamA} {match_result[0].text.strip()}-{match_result[1].text.strip()} {teamB}"
                print(f"Match Result: {score}")
                
                # Extract match time
                match_time = all_matches[i].find("span", {"class": "time"}).text.strip()
                print(f"Match Time: {match_time}")
                print(" ")
                
                # Add this match data as a dictionary to match_details list
                match_details.append({
                    "Champion Title": champion_title,
                    "Team A": teamA,
                    "Team B": teamB,
                    "Match Result": score,
                    "Match Time": match_time
                })

    getmatchinfo(championships)
    
    # Print the full extracted match details (for debugging)
    print(match_details)
    
    # Check if any matches were found before writing the CSV
    if match_details:
        # Extract column names from first match dictionary
        keys = match_details[0].keys()
        
        # Write match details into CSV file
        with open("F:\mycareerprojects\match_details.csv", "w", newline='', encoding='utf-8-sig') as outputFile:
            dict_writer = csv.DictWriter(outputFile, keys)  # Create CSV writer object
            dict_writer.writeheader()  # Write CSV column headers
            dict_writer.writerows(match_details)  # Write all match rows into file
            print("file created")
    else:
        # Inform user if no matches found
        print("No matches found for the selected date or maybe you entered date in a wrong format. Make sure the date format like the following ex:6/20/2025")


# Finally, call the main function and pass 'page' object to it
main(page)


number of matches on that day:  2
 
Match Number 1
team A:  الإسماعيلي
team B:  إنبي
Match Result: الإسماعيلي 2-0 إنبي
Match Time: 17:00
 
Match Number 2
team A:  سيراميكا كليوباترا
team B:  البنك الاهلي
Match Result: سيراميكا كليوباترا 2-0 البنك الاهلي
Match Time: 20:30
 
[{'Champion Title': 'كأس رابطة الأندية', 'Team A': 'الإسماعيلي', 'Team B': 'إنبي', 'Match Result': 'الإسماعيلي 2-0 إنبي', 'Match Time': '17:00'}, {'Champion Title': 'كأس رابطة الأندية', 'Team A': 'سيراميكا كليوباترا', 'Team B': 'البنك الاهلي', 'Match Result': 'سيراميكا كليوباترا 2-0 البنك الاهلي', 'Match Time': '20:30'}]
file created
