In [8]:
import os
import pandas as pd

# Function to calculate expected points based on xG (proportional)
def calculate_expected_points(home_xg, away_xg):
    total_xg = home_xg + away_xg
    
    # If there's no xG, both teams get 0 points
    if total_xg == 0:
        return 0, 0
    
    # Calculate expected points based on the proportion of xG
    home_points = (home_xg / total_xg) * 3
    away_points = (away_xg / total_xg) * 3
    
    # Round to 2 decimal places
    return round(home_points, 2), round(away_points, 2)

# Function to process the folder of CSV files
def process_csv_folder(folder_path):
    # Initialize an empty list to store all the results
    all_results = []
    
    # Loop through each CSV file in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            # Load the CSV file
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            
            # Ensure the relevant columns exist
            if 'HomeTeam' in df.columns and 'AwayTeam' in df.columns and 'xG' in df.columns:
                # Aggregate the xG values for each game (home and away teams)
                for _, group in df.groupby(['HomeTeam', 'AwayTeam']):
                    home_team = group['HomeTeam'].iloc[0]
                    away_team = group['AwayTeam'].iloc[0]
                    
                    # Sum xG for the home team (shots where HomeTeam is the team taking the shot)
                    home_xg = group[group['TeamId'] == home_team]['xG'].sum()
                    
                    # Sum xG for the away team (shots where AwayTeam is the team taking the shot)
                    away_xg = group[group['TeamId'] == away_team]['xG'].sum()

                    # Calculate expected points for both teams
                    home_points, away_points = calculate_expected_points(home_xg, away_xg)
                    
                    # Store the result
                    all_results.append({
                        'home_teamId': home_team,
                        'away_teamId': away_team,
                        'home_xG': home_xg,
                        'away_xG': away_xg,
                        'home_expected_points': home_points,
                        'away_expected_points': away_points
                    })
    
    # Convert the results to a DataFrame
    results_df = pd.DataFrame(all_results)
    
    return results_df

# Function to calculate the total expected points and rank teams
def calculate_team_rankings(results_df):
    # Sum expected points for each team (both home and away)
    home_points = results_df.groupby('home_teamId')['home_expected_points'].sum()
    away_points = results_df.groupby('away_teamId')['away_expected_points'].sum()
    
    # Combine the home and away points into one series for each team
    total_points = home_points.add(away_points, fill_value=0)
    
    # Create a DataFrame for ranking
    ranked_teams = total_points.reset_index()
    ranked_teams.columns = ['teamId', 'total_expected_points']  # Rename columns for consistency
    
    # Rank teams based on total expected points, in descending order
    ranked_teams['rank'] = ranked_teams['total_expected_points'].rank(method='min', ascending=False)
    
    return ranked_teams

# Function to save the results to an Excel file
def save_to_excel(results_df, output_file, ranked_teams, rank_file):
    # Write the results DataFrame and ranked teams DataFrame to Excel
    with pd.ExcelWriter(output_file) as writer:
        results_df.to_excel(writer, sheet_name='Match Results', index=False)
        ranked_teams.to_excel(writer, sheet_name='Team Rankings', index=False)
    
    # Optionally save the rank file
    ranked_teams.to_excel(rank_file, index=False)

# Main execution
folder_path = '/Users/marclambertes/Python/Eredivisie 2014-2015/xgCSV/'  # Update this to your folder path
output_file = 'expected_points.xlsx'  # Output file name for all results
rank_file = 'team_rankings.xlsx'  # Output file name for team rankings

# Process the folder and save the results
results_df = process_csv_folder(folder_path)
ranked_teams = calculate_team_rankings(results_df)
save_to_excel(results_df, output_file, ranked_teams, rank_file)

print(f"Expected points and rankings have been saved to {output_file} and {rank_file}")


Expected points and rankings have been saved to expected_points.xlsx and team_rankings.xlsx
