In [8]:
import hashlib

def generate_match_id(season, date, home_team, away_team):
    unique_string = f"{season}{date}{home_team}{away_team}"
    match_id = hashlib.sha256(unique_string.encode()).hexdigest()[:10]
    return match_id

generate_match_id("2023-2024", "2023-02-24", "Manchester United", "Liverpool")

'62eae47d25'

In [14]:
import pandas as pd
import os

def concat_csvs(folder_path):
    
    all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]  # Get all CSV files
    df_list = [pd.read_csv(os.path.join(folder_path, file)) for file in all_files]  # Read each CSV
    
    combined_df = pd.concat(df_list, ignore_index=True)  # Concatenate all DataFrames
    
    # Ensure necessary columns exist before applying match_id
    required_cols = ["Season", "Date", "Home", "Away"]
    if all(col in combined_df.columns for col in required_cols):
        combined_df["match_id"] = combined_df.apply(
            lambda row: generate_match_id(row["Season"], row["Date"], row["Home"], row["Away"]), axis=1
        )

    # Move "match_id" to be the first column
    column_order = ["match_id"] + [col for col in combined_df.columns if col != "match_id"]
    combined_df = combined_df[column_order]
    
    combined_df.to_csv("scraped_data/La-Liga/combined.csv", index=False)  # Save to CSV if output path is given
    
    return combined_df

# Example usage:
folder_path = "scraped_data/La-Liga/"
concat_csvs(folder_path)


Unnamed: 0,match_id,Season,Wk,Date,Home,Away,Home_Goals,Away_Goals,Result
0,ce5654a75e,1988-1989,1,1988-09-03,Athletic Club,Sevilla,3.0,0.0,1.0
1,af4cd08e63,1988-1989,1,1988-09-03,Oviedo,Real Sociedad,1.0,0.0,1.0
2,8587af2cc5,1988-1989,1,1988-09-03,Logroñés,Atlético Madrid,1.0,0.0,1.0
3,ef85883484,1988-1989,1,1988-09-03,Barcelona,Espanyol,2.0,0.0,1.0
4,b196d440a4,1988-1989,1,1988-09-04,Zaragoza,Valencia,0.0,0.0,0.5
...,...,...,...,...,...,...,...,...,...
28443,e0d1e5bee6,2024-2025,38,2025-05-25,Betis,Valencia,,,0.5
28444,92fa70629b,2024-2025,38,2025-05-25,Villarreal,Sevilla,,,0.5
28445,58d88d84c7,2024-2025,38,2025-05-25,Girona,Atlético Madrid,,,0.5
28446,5f58f5cbc9,2024-2025,38,2025-05-25,Alavés,Osasuna,,,0.5
