In [6]:
using DataFrames, CSV, XLSX, Statistics

# Load the dataset
file_path = "/Users/marclambertes/Python/Eerste Divisie 2024-2025/xgCSV/2025-02-01_SBV Excelsior - SBV Vitesse.csv"
df = CSV.read(file_path, DataFrame)

# Print column names to verify
println("Column Names: ", names(df))

# Ensure required columns exist before proceeding
required_columns = ["Gamestate", "xG", "timeMin", "TeamId", "PlayerId"]
missing_columns = setdiff(required_columns, names(df))
if !isempty(missing_columns)
    error("Missing columns: ", missing_columns)
end

# Filter for even-strength situations
even_strength_df = filter(row -> row.Gamestate == "Draw", df)

# Compute overall MESH (Mean xG at even strength)
mesh_value = mean(even_strength_df.xG)

# Compute MESH per team (if TeamId exists)
if "TeamId" in names(df)
    mesh_per_team = combine(groupby(even_strength_df, :TeamId), :xG => mean => :MESH_Value)
    mesh_per_team.Metric .= "MESH per Team"
    rename!(mesh_per_team, :TeamId => :Category)  # Ensure column consistency
else
    mesh_per_team = DataFrame(Metric=[], Category=[], MESH_Value=[])
end

# Compute MESH per player (if PlayerId exists)
if "PlayerId" in names(df)
    mesh_per_player = combine(groupby(even_strength_df, :PlayerId), :xG => mean => :MESH_Value)
    mesh_per_player.Metric .= "MESH per Player"
    rename!(mesh_per_player, :PlayerId => :Category)  # Ensure column consistency
else
    mesh_per_player = DataFrame(Metric=[], Category=[], MESH_Value=[])
end

# Compute MESH per half (if timeMin exists)
if "timeMin" in names(df)
    even_strength_df.Half = ifelse.(even_strength_df.timeMin .<= 45, "First Half", "Second Half")
    mesh_per_half = combine(groupby(even_strength_df, :Half), :xG => mean => :MESH_Value)
    mesh_per_half.Metric .= "MESH per Half"
    rename!(mesh_per_half, :Half => :Category)  # Ensure column consistency
else
    mesh_per_half = DataFrame(Metric=[], Category=[], MESH_Value=[])
end

# Compute MESH per shot type (if ShotQualityModifier exists)
if "ShotQualityModifier" in names(df)
    mesh_per_shot_type = combine(groupby(even_strength_df, :ShotQualityModifier), :xG => mean => :MESH_Value)
    mesh_per_shot_type.Metric .= "MESH per Shot Type"
    rename!(mesh_per_shot_type, :ShotQualityModifier => :Category)  # Ensure column consistency
else
    mesh_per_shot_type = DataFrame(Metric=[], Category=[], MESH_Value=[])
end

# Create overall MESH DataFrame
overall_mesh_df = DataFrame(Metric=["Overall MESH"], Category=["Overall"], MESH_Value=[mesh_value])

# Ensure all DataFrames have the same columns
columns_to_keep = ["Metric", "Category", "MESH_Value"]
for df in [mesh_per_team, mesh_per_player, mesh_per_half, mesh_per_shot_type]
    if !isempty(df)
        select!(df, columns_to_keep)
    end
end

# Combine all results into one DataFrame
mesh_combined = vcat(overall_mesh_df, mesh_per_team, mesh_per_player, mesh_per_half, mesh_per_shot_type)

# Define Excel file path
output_file = "/Users/marclambertes/Julia/Excel/MESH_Results.xlsx"

# Convert DataFrame into a format that XLSX.writetable() can accept
table_dict = Dict(column => mesh_combined[!, column] for column in names(mesh_combined))

# Save to Excel (fixing the formatting issue)
XLSX.openxlsx(output_file, mode="w") do xf
    sheet = XLSX.addsheet!(xf, "MESH Results")
    XLSX.writetable!(sheet, table_dict)
end

println("MESH results saved to: $output_file")


Column Names: ["Column1", "xG", "Date", "PlayerId", "TeamId", "HomeTeam", "AwayTeam", "timeMin", "timeSec", "x", "y", "angle", "distance", "Type_of_play", "relatedPlayerId", "GamestateOne", "GamestateTwo", "Bodypart", "isGoal", "isBigChance", "isIntentionalAssist", "isAssistedShot", "isOwnGoal", "expandedMinute", "Goal", "Time_in_sec", "Gamestate", "GoalMouthModifier", "ShotQualityModifier", "PsxG"]
MESH results saved to: /Users/marclambertes/Julia/Excel/MESH_Results.xlsx


In [8]:
using DataFrames, CSV, XLSX, Statistics

# Load the dataset
file_path = "/Users/marclambertes/Python/Eerste Divisie 2024-2025/xgCSV/2025-02-01_SBV Excelsior - SBV Vitesse.csv"
df = CSV.read(file_path, DataFrame)

# Print column names to verify
println("Column Names: ", names(df))

# Ensure required columns exist before proceeding
required_columns = ["TeamId", "PlayerId", "xG", "Gamestate"]
missing_columns = setdiff(required_columns, names(df))
if !isempty(missing_columns)
    error("Missing columns: ", missing_columns)
end

# Filter for even-strength situations
even_strength_df = filter(row -> row.Gamestate == "Draw", df)

# Debugging: Print number of rows after filtering
println("Rows after filtering even-strength situations: ", nrow(even_strength_df))

# Check if PlayerId exists and GNet can be calculated
if "PlayerId" in names(df) && !isempty(even_strength_df)
    gnet_per_player = combine(groupby(even_strength_df, :PlayerId), :xG => sum => :GNet)
    rename!(gnet_per_player, :PlayerId => :Player)

    # Debugging: Print GNet DataFrame before normalization
    println("GNet per Player: ")
    println(gnet_per_player)

    # Avoid division by zero if all GNet values are the same
    min_gnet, max_gnet = minimum(gnet_per_player.GNet), maximum(gnet_per_player.GNet)
    if min_gnet == max_gnet
        gnet_per_player.Rating .= 50  # Assign a neutral rating if all values are the same
    else
        gnet_per_player.Rating = @. ((gnet_per_player.GNet - min_gnet) / (max_gnet - min_gnet)) * 100
    end

    # Debugging: Print final rating table
    println("Final GNet Ratings:")
    println(gnet_per_player)
else
    println("No data available for GNet calculation.")
    gnet_per_player = DataFrame(Player=[], GNet=[], Rating=[])
end

# Define Excel file path
output_file = "/Users/marclambertes/Julia/Excel/GNet_Ratings.xlsx"

# Only write to Excel if there is valid data
if !isempty(gnet_per_player)
    # Convert DataFrame into a format that XLSX.writetable() can accept
    table_dict = Dict(column => gnet_per_player[!, column] for column in names(gnet_per_player))

    # Save to Excel
    XLSX.openxlsx(output_file, mode="w") do xf
        sheet = XLSX.addsheet!(xf, "GNet Ratings")
        XLSX.writetable!(sheet, table_dict)
    end

    println("GNet ratings saved to: $output_file")
else
    println("No valid data to write to Excel.")
end


Column Names: ["Column1", "xG", "Date", "PlayerId", "TeamId", "HomeTeam", "AwayTeam", "timeMin", "timeSec", "x", "y", "angle", "distance", "Type_of_play", "relatedPlayerId", "GamestateOne", "GamestateTwo", "Bodypart", "isGoal", "isBigChance", "isIntentionalAssist", "isAssistedShot", "isOwnGoal", "expandedMinute", "Goal", "Time_in_sec", "Gamestate", "GoalMouthModifier", "ShotQualityModifier", "PsxG"]
Rows after filtering even-strength situations: 5
GNet per Player: 
[1m5×2 DataFrame[0m
[1m Row [0m│[1m Player         [0m[1m GNet      [0m
     │[90m String31       [0m[90m Float64   [0m
─────┼───────────────────────────
   1 │ I. Yegoian      0.311433
   2 │ G. de Regt      0.101414
   3 │ L. Duijvestijn  0.0681532
   4 │ R. Omorowa      0.104495
   5 │ B. Huisman      0.0923642
Final GNet Ratings:
[1m5×3 DataFrame[0m
[1m Row [0m│[1m Player         [0m[1m GNet      [0m[1m Rating    [0m
     │[90m String31       [0m[90m Float64   [0m[90m Float64   [0m
─────┼─────