In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Define the folder containing CSV files
folder_path = "Premier League 2024-2025/xgCSV"  # Replace with your folder path

# Check if CSV files exist in the folder
csv_files = list(Path(folder_path).glob("*.csv"))
if not csv_files:
    raise FileNotFoundError(f"No CSV files found in the folder: {folder_path}")

# Initialize empty lists to store DataFrames
all_totals_dataframes = []
all_per_date_dataframes = []

# Loop through all CSV files in the folder
for file in csv_files:
    print(f"\nProcessing file: {file.name}")
    
    # Load the CSV file into a DataFrame
    df = pd.read_csv(file)
    
    if df.empty:
        print(f"Skipping empty file: {file.name}")
        continue
    
    # Compute total xG and PsxG per Player (sum from the original file)
    xg_psxg_per_player = df.groupby(["PlayerId", "Date"]).agg(
        xG=("xG", "sum"),
        PsxG=("PsxG", "sum")
    ).reset_index()
    
    # Compute xAG - sum of xG where PlayerId appears as relatedPlayerId
    xag_per_related = df.groupby(["relatedPlayerId", "Date"], as_index=False)["xG"].sum().rename(columns={"relatedPlayerId": "PlayerId", "xG": "xAG"})
    
    # Merge xAG with the main DataFrame
    xg_psxg_per_player = pd.merge(xg_psxg_per_player, xag_per_related, on=["PlayerId", "Date"], how="left").fillna(0)
    
    # Add TeamId to the DataFrame
    team_mapping = df[["PlayerId", "TeamId", "Date"]].drop_duplicates()
    merged_totals_df = pd.merge(xg_psxg_per_player, team_mapping, on=["PlayerId", "Date"], how="left")
    
    # Compute MESH per Player (sum of xG from filtered data)
    even_strength_df = df[(df["Gamestate"] == "Draw") & (df["Type_of_play"] == "RegularPlay")]
    
    if even_strength_df.empty:
        print("No rows match Gamestate == 'Draw' and Type_of_play == 'RegularPlay'. Skipping these filters.")
        even_strength_df = df  # Use the entire DataFrame
    
    mesh_per_player = even_strength_df.groupby(["PlayerId", "Date"]).agg(
        MESH_Total=("xG", "sum"),
        xG_std=("xG", "std"),
        xG_mean=("xG", "mean")
    ).reset_index()
    
    # Calculate Coefficient of Variation (CV)
    mesh_per_player["Coefficient of Variation"] = mesh_per_player["xG_std"] / mesh_per_player["xG_mean"]
    
    # Merge MESH data
    merged_totals_df = pd.merge(merged_totals_df, mesh_per_player, on=["PlayerId", "Date"], how="left").fillna(0)
    
    # Compute MESH for assists (xAG from filtered even-strength data)
    mesh_assists = even_strength_df.groupby(["relatedPlayerId", "Date"])["xG"].sum().reset_index().rename(columns={"relatedPlayerId": "PlayerId", "xG": "MESH_Assists"})
    merged_totals_df = pd.merge(merged_totals_df, mesh_assists, on=["PlayerId", "Date"], how="left").fillna(0)
    
    # Create a new index incorporating MESH xG and MESH xA in relation to xG and xA
    merged_totals_df["MESH_Index"] = (merged_totals_df["MESH_Total"] + merged_totals_df["MESH_Assists"]) / (merged_totals_df["xG"] + merged_totals_df["xAG"] + 1e-6)
    
    # Calculate MESH Strength Score
    w1, w2, w3, w4, w5, w6 = 1.0, 1.0, 0.5, 0.5, 0.3, 1.5  # Adjustable weights
    merged_totals_df["MESH_Strength_Score"] = (
        w1 * merged_totals_df["MESH_Total"] +
        w2 * merged_totals_df["MESH_Assists"] +
        w3 * merged_totals_df["xG"] +
        w4 * merged_totals_df["xAG"] -
        w5 * merged_totals_df["Coefficient of Variation"] +
        w6 * merged_totals_df["MESH_Index"]
    )
    
    # Append the totals DataFrame to the list if not empty
    if not merged_totals_df.empty:
        all_totals_dataframes.append(merged_totals_df)
        all_per_date_dataframes.append(merged_totals_df)

# Ensure we have data before saving
if all_totals_dataframes:
    final_totals_df = pd.concat(all_totals_dataframes, ignore_index=True)
    final_per_date_df = pd.concat(all_per_date_dataframes, ignore_index=True)
    
    # Save results to Excel
    final_totals_df.to_excel("MESH_Totals_Combined.xlsx", sheet_name="MESH Totals", index=False)
    final_per_date_df.to_excel("MESH_Per_Date.xlsx", sheet_name="MESH Per Date", index=False)
    print("\nMESH totals saved to: MESH_Totals_Combined.xlsx")
    print("MESH per date saved to: MESH_Per_Date.xlsx")
else:
    print("No valid data found in any of the files. Skipping file creation.")



Processing file: 2024-10-05_Brentford FC - Wolverhampton Wanderers FC.csv

Processing file: 2025-02-01_Ipswich Town FC - Southampton FC.csv

Processing file: 2024-10-26_Brighton & Hove Albion FC - Wolverhampton Wanderers FC.csv

Processing file: 2024-11-10_Nottingham Forest FC - Newcastle United FC.csv

Processing file: 2024-08-17_Everton FC - Brighton & Hove Albion FC.csv

Processing file: 2024-12-29_Tottenham Hotspur FC - Wolverhampton Wanderers FC.csv

Processing file: 2024-10-19_Newcastle United FC - Brighton & Hove Albion FC.csv

Processing file: 2024-11-24_Ipswich Town FC - Manchester United FC.csv

Processing file: 2024-09-14_Liverpool FC - Nottingham Forest FC.csv

Processing file: 2024-12-01_Manchester United FC - Everton FC.csv

Processing file: 2024-12-04_Southampton FC - Chelsea FC.csv

Processing file: 2024-10-26_Brentford FC - Ipswich Town FC.csv

Processing file: 2024-10-26_Manchester City FC - Southampton FC.csv

Processing file: 2024-12-04_Manchester City FC - Notting


Processing file: 2024-08-16_Manchester United FC - Fulham FC.csv

Processing file: 2024-08-31_Brentford FC - Southampton FC.csv

Processing file: 2025-01-26_Tottenham Hotspur FC - Leicester City FC.csv

Processing file: 2024-11-23_Arsenal FC - Nottingham Forest FC.csv

Processing file: 2024-08-24_Brighton & Hove Albion FC - Manchester United FC.csv

Processing file: 2024-09-22_Brighton & Hove Albion FC - Nottingham Forest FC.csv

Processing file: 2024-12-27_Brighton & Hove Albion FC - Brentford FC.csv

Processing file: 2024-12-21_Brentford FC - Nottingham Forest FC.csv

Processing file: 2024-08-24_Tottenham Hotspur FC - Everton FC.csv

Processing file: 2024-10-19_Fulham FC - Aston Villa FC.csv

Processing file: 2024-12-07_Brentford FC - Newcastle United FC.csv

Processing file: 2024-09-21_Tottenham Hotspur FC - Brentford FC.csv

Processing file: 2025-01-26_Aston Villa FC - West Ham United FC.csv

Processing file: 2024-11-30_Wolverhampton Wanderers FC - AFC Bournemouth.csv

Processing 


Processing file: 2024-09-21_West Ham United FC - Chelsea FC.csv

Processing file: 2024-12-05_Fulham FC - Brighton & Hove Albion FC.csv

MESH totals saved to: MESH_Totals_Combined.xlsx
MESH per date saved to: MESH_Per_Date.xlsx


In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Load the per-date MESH data
file_path = "MESH_Per_Date.xlsx"  # Replace with the correct path
mesh_per_date_df = pd.read_excel(file_path)

# Ensure the necessary columns exist
required_columns = {"Date", "TeamId", "MESH_Strength_Score"}
if not required_columns.issubset(mesh_per_date_df.columns):
    raise ValueError(f"Missing required columns: {required_columns - set(mesh_per_date_df.columns)}")

# Convert Date to datetime format and remove time information
mesh_per_date_df["Date"] = pd.to_datetime(mesh_per_date_df["Date"]).dt.date

# Initialize ELO ratings
initial_elo = 1500
elo_ratings = {}
K = 32  # ELO K-factor (adjustable for impact sensitivity)

# Process ELO ratings per date
elo_history = []
mesh_per_date_df.sort_values("Date", inplace=True)

# Get all unique dates in the dataset
all_dates = mesh_per_date_df["Date"].unique()

for date in all_dates:
    matches = mesh_per_date_df[mesh_per_date_df["Date"] == date]
    teams = matches["TeamId"].unique()
    
    # Initialize ELO ratings for new teams
    for team in teams:
        if team not in elo_ratings:
            elo_ratings[team] = initial_elo  # Assign initial rating if new team appears
    
    # Update ELO ratings for each team based on their performance
    for team in teams:
        opponents = [t for t in teams if t != team]
        if not opponents:
            continue  # Skip if no opponents are available
        
        # Get the team's MESH_Strength_Score
        team_score = matches[matches["TeamId"] == team]["MESH_Strength_Score"].values[0]
        
        # Calculate the average MESH_Strength_Score of opponents
        avg_opponent_score = matches[matches["TeamId"].isin(opponents)]["MESH_Strength_Score"].mean()
        
        # Get the current ELO ratings
        R_team = elo_ratings[team]
        R_opponent = np.mean([elo_ratings[o] for o in opponents])
        
        # Calculate expected score
        E_team = 1 / (1 + 10 ** ((R_opponent - R_team) / 400))
        
        # Determine actual score based on MESH_Strength_Score comparison
        if team_score > avg_opponent_score:
            S_team = 1  # Team performed better than opponents
        elif team_score == avg_opponent_score:
            S_team = 0.5  # Team performed equally to opponents
        else:
            S_team = 0  # Team performed worse than opponents
        
        # Update ELO ratings
        elo_ratings[team] = R_team + K * (S_team - E_team)
    
    # Store ELO ratings history for all teams
    for team in elo_ratings.keys():
        elo_history.append({"Date": date, "TeamId": team, "ELO": elo_ratings[team]})

# Convert ELO history to DataFrame
elo_df = pd.DataFrame(elo_history)

# Save ELO ratings to Excel
elo_df.to_excel("ELO_PerDate.xlsx", index=False)
print("ELO per date data saved to: ELO_PerDate.xlsx")


ELO per date data saved to: ELO_PerDate.xlsx


In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pathlib import Path

# Load the per-date ELO data
file_path = "ELO_PerDate.xlsx"  # Replace with the correct path
elo_df = pd.read_excel(file_path)

# Ensure the necessary columns exist
required_columns = {"Date", "TeamId", "ELO"}
if not required_columns.issubset(elo_df.columns):
    raise ValueError(f"Missing required columns: {required_columns - set(elo_df.columns)}")

# Convert Date to datetime format and remove time information
elo_df["Date"] = pd.to_datetime(elo_df["Date"]).dt.date

# Get all unique teams
all_teams = elo_df["TeamId"].unique()

# Define styles and file names
styles = {
    "nyt": {"style": "seaborn-dark", "color": "black", "file": "ELO_NYT.png"},
    "538": {"style": "fivethirtyeight", "color": "blue", "file": "ELO_538.png"},
    "economist": {"style": "ggplot", "color": "red", "file": "ELO_Economist.png"},
    "bloomberg": {"style": "dark_background", "color": "cyan", "file": "ELO_Bloomberg.png"},
    "times": {"style": "seaborn-white", "color": "navy", "file": "ELO_Times.png"},
    "guardian": {"style": "seaborn-muted", "color": "green", "file": "ELO_Guardian.png"},
    "financial_times": {"style": "seaborn-poster", "color": "purple", "file": "ELO_Financial_Times.png"}
}

highlight_team = "Arsenal FC"
logo_path = "Data visuals/Outswinger FC (3).png"  # Adjust if needed

for key, style_info in styles.items():
    plt.style.use(style_info["style"])
    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot all teams with low opacity
    for team in all_teams:
        team_data = elo_df[elo_df["TeamId"] == team]
        if team == highlight_team:
            ax.plot(team_data["Date"], team_data["ELO"], label=team, linewidth=2.5, color=style_info["color"])
        else:
            ax.plot(team_data["Date"], team_data["ELO"], linewidth=1, color="gray", alpha=0.3)
    
    # Formatting for each style
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    if key in ["bloomberg", "nyt"]:
        ax.spines["left"].set_color("white")
        ax.spines["bottom"].set_color("white")
    
    text_color = "white" if key in ["bloomberg", "nyt"] else "black"
    
    # Title with PESI and League Name
    ax.set_title(f"{highlight_team} Probability Even Strength Index (PESI)\nLeague: Premier League | 2024-2025", 
                 fontsize=14, fontweight='bold', color=text_color)

    ax.set_xlabel("Date", fontsize=12, color=text_color)
    ax.set_ylabel("ELO Rating", fontsize=12, color=text_color)
    ax.legend(loc="upper left", fontsize=10, frameon=False)
    ax.tick_params(axis='x', rotation=45, labelsize=10, colors=text_color)
    ax.tick_params(axis='y', labelsize=10, colors=text_color)
    ax.set_ylim(1300, 1600)
    ax.grid(axis="y", linestyle="dotted" if key == "bloomberg" else "--", linewidth=0.5, alpha=0.5)
    
    # Add logo to the bottom left corner
    try:
        img = mpimg.imread(logo_path)
        newax = fig.add_axes([0.05, 0.05, 0.1, 0.1], anchor='SW', zorder=1)
        newax.imshow(img)
        newax.axis('off')
    except FileNotFoundError:
        print(f"Logo file not found: {logo_path}")
    
    plt.tight_layout()
    
    # Save figure
    plt.savefig(style_info["file"], dpi=300)
    plt.close()

print("All styles saved as PNG files: NYT, FiveThirtyEight, The Economist, Bloomberg, The Times, Guardian, and Financial Times with all teams shown and Arsenal FC highlighted.")


  plt.style.use(style_info["style"])
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.style.use(style_info["style"])
  plt.tight_layout()
  plt.style.use(style_info["style"])
  plt.tight_layout()
  plt.style.use(style_info["style"])
  plt.tight_layout()


All styles saved as PNG files: NYT, FiveThirtyEight, The Economist, Bloomberg, The Times, Guardian, and Financial Times with all teams shown and Arsenal FC highlighted.
