# Prediction Metrics

In [3]:
import pandas as pd

In [11]:
df = pd.read_csv('./point_predictions.csv')
df.columns = [col.lower() for col in df.columns]
df[(df["season"] == '2024-25') & (df["position"] == "MID")].sort_values("points", ascending= False).head()

Unnamed: 0,player_name,position,game_week,season,points,predicted_points,predicted_rank,actual_rank,prediction_accuracy
13552,Cole Palmer,MID,6,2024-25,20,12.959344,1,1,Correctly Predicted
14255,Kevin Schade,MID,13,2024-25,18,0.91188,48,1,Missed Prediction
14299,Bukayo Saka,MID,13,2024-25,11,0.0,92,2,Missed Prediction
13377,Mohamed Salah,MID,3,2024-25,11,7.930122,2,1,Correctly Predicted
13645,Bukayo Saka,MID,7,2024-25,11,6.630854,1,1,Correctly Predicted


In [5]:
correctly_predicted = df[(df['predicted_rank'] <= 10) & (df['actual_rank'] <= 10)]

hr = len(correctly_predicted) / len(df[df['predicted_rank'] <= 10])
precision_at_10 = len(correctly_predicted) / len(df[df['predicted_rank'] <= 10])
recall_at_10 = len(correctly_predicted) / len(df[df['actual_rank'] <= 10])

print(f"Hit Rate (HR): {round(hr * 100, 2)}%")
print(f"Precision@10 (P@10): {round(precision_at_10 * 100, 2)}%")
print(f"Recall@10 (R@10): {round(recall_at_10 * 100, 2)}%")

Hit Rate (HR): 42.09%
Precision@10 (P@10): 42.09%
Recall@10 (R@10): 42.09%


In [6]:
def top_10_accuracy(df):
    correct_predictions = 0
    total_predictions = 0
    
    for position in df['position'].unique():
        position_df = df[df['position'] == position]
        actual_top_10 = set(position_df[position_df['actual_rank'] <= 10]['player_name'])
        predicted_top_10 = set(position_df[position_df['predicted_rank'] <= 10]['player_name'])
        
        correct_predictions += len(actual_top_10 & predicted_top_10)
        total_predictions += len(actual_top_10)
    
    return correct_predictions / total_predictions if total_predictions > 0 else 0

def weighted_accuracy(df):
    total_weight = 0
    correct_weight = 0
    
    for position in df['position'].unique():
        position_df = df[df['position'] == position].copy()
        position_df['weight'] = 11 - position_df['actual_rank']  # Top 1 gets 10, top 2 gets 9, ..., top 10 gets 1
        actual_top_10 = position_df[position_df['actual_rank'] <= 10]
        
        for _, row in actual_top_10.iterrows():
            total_weight += row['weight']
            if row['predicted_rank'] <= 10:
                correct_weight += row['weight']
    
    return correct_weight / total_weight if total_weight > 0 else 0

def per_position_top_10_accuracy(df):
    position_accuracies = {}
    
    for position in df['position'].unique():
        position_df = df[df['position'] == position]
        actual_top_10 = set(position_df[position_df['actual_rank'] <= 10]['player_name'])
        predicted_top_10 = set(position_df[position_df['predicted_rank'] <= 10]['player_name'])
        
        correct_predictions = len(actual_top_10 & predicted_top_10)
        total_predictions = len(actual_top_10)
        
        position_accuracies[position] = correct_predictions / total_predictions if total_predictions > 0 else 0
    
    return position_accuracies

In [7]:
top10_acc = top_10_accuracy(df)
print("Top-10 Accuracy:", top10_acc)

weighted_acc = weighted_accuracy(df)
print("Weighted Accuracy:", weighted_acc)

position_acc = per_position_top_10_accuracy(df)
print("Per-Position Top-10 Accuracy:", position_acc)


Top-10 Accuracy: 0.7995689655172413
Weighted Accuracy: 0.5210313024035774
Per-Position Top-10 Accuracy: {'DEF': 0.7938144329896907, 'FWD': 0.9156626506024096, 'MID': 0.7540106951871658}


In [8]:
def select_best_10_players_with_aggregation(df):
    top_10_results = []
    aggregate_results = []

    df = df.sort_values(by=['season', 'game_week', 'predicted_points', 'points'], ascending=[True, True, False, False])

    grouped = df.groupby(['season', 'game_week'])
    
    for (season, game_week), group in grouped:
        selected_players = []
        position_counts = {'DEF': 0, 'MID': 0, 'FWD': 0}  # Track selected players by position
        
        sum_predicted_points = 0
        sum_actual_points = 0

        for _, row in group.iterrows():
            position = row['position']
            if len(selected_players) < 10 and position_counts[position] < {'DEF': 5, 'MID': 5, 'FWD': 3}.get(position, 0):
                selected_players.append(row)
                position_counts[position] += 1

                sum_predicted_points += row['predicted_points']
                sum_actual_points += row['points']

        top_10_results.extend(selected_players)

        best_possible_players = []
        best_position_counts = {'DEF': 0, 'MID': 0, 'FWD': 0}
        sum_best_possible_points = 0

        for _, row in group.sort_values(by='points', ascending=False).iterrows():
            position = row['position']
            if len(best_possible_players) < 10 and best_position_counts[position] < {'DEF': 5, 'MID': 5, 'FWD': 3}.get(position, 0):
                best_possible_players.append(row)
                best_position_counts[position] += 1
                sum_best_possible_points += row['points']

        aggregate_results.append({
            'season': season,
            'game_week': game_week,
            'sum_predicted_points': sum_predicted_points,
            'sum_actual_points': sum_actual_points,
            'best_possible_points': sum_best_possible_points
        })

    top_10_players_df = pd.DataFrame(top_10_results)
    aggregated_points_df = pd.DataFrame(aggregate_results)
    aggregated_points_df["pred_best_ratio"] = aggregated_points_df["sum_predicted_points"] / aggregated_points_df["best_possible_points"]
    aggregated_points_df["actual_best_ratio"] = aggregated_points_df["sum_actual_points"] / aggregated_points_df["best_possible_points"]
    return top_10_players_df, aggregated_points_df

top_10_players, aggregate_points = select_best_10_players_with_aggregation(df)

top_10_players[top_10_players["season"] == '2024-25']
aggregate_points[aggregate_points["season"] == '2024-25']

Unnamed: 0,season,game_week,sum_predicted_points,sum_actual_points,best_possible_points,pred_best_ratio,actual_best_ratio
73,2024-25,2,39.723905,22,50,0.794478,0.44
74,2024-25,3,53.623646,27,58,0.924546,0.465517
75,2024-25,4,44.933391,35,58,0.774714,0.603448
76,2024-25,5,47.498781,40,62,0.766109,0.645161
77,2024-25,6,51.462411,58,88,0.5848,0.659091
78,2024-25,7,42.43946,41,63,0.673642,0.650794
79,2024-25,8,47.033356,31,52,0.904488,0.596154
80,2024-25,9,52.034889,30,62,0.839272,0.483871
81,2024-25,10,51.673318,31,57,0.906549,0.54386
82,2024-25,11,55.991189,4,56,0.999843,0.071429


In [9]:
df

Unnamed: 0,player_name,position,game_week,season,points,predicted_points,predicted_rank,actual_rank,prediction_accuracy
0,Ben Davies,DEF,2,2022-23,3,5.070079,1,1,Correctly Predicted
1,Ben Davies,DEF,2,2022-23,3,5.070079,2,2,Correctly Predicted
2,Wesley Fofana,DEF,2,2022-23,0,0.261740,3,3,Correctly Predicted
3,Illia Zabarnyi,DEF,2,2022-23,0,0.124517,4,4,Correctly Predicted
4,Harry Toffolo,DEF,3,2022-23,0,2.335543,1,42,False Positive
...,...,...,...,...,...,...,...,...,...
14556,Amadou Onana,MID,16,2024-25,0,0.215419,39,19,Correctly Excluded
14557,Boubacar Kamara,MID,16,2024-25,0,0.015311,40,26,Correctly Excluded
14558,Tommy Doyle,MID,16,2024-25,0,0.000000,41,21,Correctly Excluded
14559,Declan Rice,MID,16,2024-25,0,0.000000,42,15,Correctly Excluded
