In [6]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

def gen_summary_stats(file_path, player1, player2):
    # Load the dataset
    events = pd.read_csv(file_path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    players = [player1, player2]
    summary_stats = pd.DataFrame({'Stat': ['Aces', 'Double Faults', '1st Serve In', '1st Serve Points Won',
                                        '2nd Serve Points Won', 'Break Points Saved', 'Total Serve Points Won',
                                        'Service Games Won', 'Average Rally Length', '0-4', '5-8', '9+']})

    # Aces
    aces = events[events['isAce'] == 1.0]
    aces_summary = aces['shotHitBy'].value_counts().reset_index()
    aces_summary.columns = ['player', 'Aces']

    for player in players:
        if player in aces_summary['player'].values:
            aces_count = str(aces_summary.loc[aces_summary['player'] == player, 'Aces'].values[0])
            summary_stats.loc[summary_stats['Stat'] == 'Aces', player] = aces_count
        else:
            summary_stats.loc[summary_stats['Stat'] == 'Aces', player] = "0"

    # Double Faults
    double_faults = events[events['isDoubleFault'] == 1.0]

    double_faults_summary = double_faults['shotHitBy'].value_counts().reset_index()
    double_faults_summary.columns = ['player', 'Double Faults']

    for player in players:
        if player in double_faults_summary['player'].values:
            summary_stats.loc[summary_stats['Stat'] == 'Double Faults', player] = f"{double_faults_summary.loc[double_faults_summary['player'] == player, 'Double Faults'].values[0]}"
        else:
            summary_stats.loc[summary_stats['Stat'] == 'Double Faults', player] = "0"

    # 1st Service In, 1st Serve Points Won, and 2nd Serve Points Won
    serves = events[(events['shotInRally'] == 1)]
    first_serve_in = serves[serves['firstServeIn'] == 1.0]
    sec_serve_in = serves[serves['secondServeIn'] == 1.0]

    for player in players:
        total_serves = serves[serves['shotHitBy'] == player].shape[0]
        serves_in = first_serve_in[first_serve_in['shotHitBy'] == player].shape[0]
        summary_stats.loc[summary_stats['Stat'] == '1st Serve In', player] = f"{serves_in}/{total_serves}"

    for player in players:
        total_serves_in = first_serve_in[first_serve_in['shotHitBy'] == player].shape[0]
        
        serves_in_and_won = first_serve_in[(first_serve_in['shotHitBy'] == player) & 
                                        (first_serve_in['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == '1st Serve Points Won', player] = f"{serves_in_and_won}/{total_serves_in}"

    for player in players:
        total_serves_in = sec_serve_in[sec_serve_in['shotHitBy'] == player].shape[0]
        
        serves_in_and_won = sec_serve_in[(sec_serve_in['shotHitBy'] == player) & 
                                        (sec_serve_in['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == '2nd Serve Points Won', player] = f"{serves_in_and_won}/{total_serves_in}"


    # Break Points Saved
    break_points = events[(events['isBreakPoint'] == 1.0)]
    for player in players:
        total_break_points_faced = break_points[break_points['serverName'] == player].shape[0]
        break_points_saved = break_points[(break_points['serverName'] == player) & 
                                        (break_points['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == 'Break Points Saved', player] = f"{break_points_saved}/{total_break_points_faced}"

    # Total Serve Points Won
    last_shot = events[(events['isPointEnd'] == 1.0)]

    for player in players:
        total_serve_pts = last_shot[last_shot['serverName'] == player].shape[0]
        serve_pts_won = last_shot[(last_shot['serverName'] == player) & 
                                (last_shot['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == 'Total Serve Points Won', player] = f"{serve_pts_won}/{total_serve_pts}"

    # Service Games Won
    # Filter for last shot in each game
    last_shot['gameGroup'] = (last_shot['gameScore'] != last_shot['gameScore'].shift()).cumsum()
    last_shot_grouped = last_shot.groupby(['gameScore', 'gameGroup']).tail(1).reset_index(drop=True)
    last_shot_grouped = last_shot_grouped.drop(columns=['gameGroup'])
    last_shot_filtered = last_shot_grouped[['gameScore', 'serverName', 'pointWonBy']]

    for player in players:
        total_service_points = last_shot_filtered[last_shot_filtered['serverName'] == player].shape[0]
        service_points_won = last_shot_filtered[(last_shot_filtered['serverName'] == player) &
                                                (last_shot_filtered['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == 'Service Games Won', player] = f"{service_points_won}/{total_service_points}"


    # Average Rally Length
    total_shots = last_shot['shotInRally'].sum()
    num_rallies = last_shot.shape[0]
    avg_rally_len = round(total_shots / num_rallies, 1) if num_rallies > 0 else 0

    summary_stats.loc[summary_stats['Stat'] == 'Average Rally Length', player1] = f"{avg_rally_len:.1f}"
    summary_stats.loc[summary_stats['Stat'] == 'Average Rally Length', player2] = f"{avg_rally_len:.1f}"


    # Win Percentage for Rally Length Groups
    ranges = {
        '0-4': (0, 4),
        '5-8': (5, 8),
        '9+': (9, np.inf)
    }

    for stat_name, (min_shots, max_shots) in ranges.items():
        # Filter last_shot for the given range
        filtered_shots = last_shot[(last_shot['shotInRally'] >= min_shots) & (last_shot['shotInRally'] <= max_shots)]
        
        # Calculate points won and total points for each player
        for player in players:
            total_points = filtered_shots.shape[0]
            points_won = filtered_shots[filtered_shots['pointWonBy'] == player].shape[0]
            
            # Store the result in summary_stats
            summary_stats.loc[summary_stats['Stat'] == f'{stat_name}', player] = f"{points_won}/{total_points}"


    # Add variable to indicate if stat is a count or proportion + if a rally stat
    summary_stats['isProp'] = [False, False, True, True , True, True, True , True, False, True, True, True]
    summary_stats['isRallyStat'] = [False, False, False, False , False, False, False , False, True, True, True, True]
    
    # Convert to JSON
    summary_stats_json = summary_stats.to_json(orient='records')
    with open('summary_stats.json', 'w') as f:
        f.write(summary_stats_json)


path = "../Match CSVs/Shot_Visuals_AnneLutkemeyer_CatherineGagnon.csv"
player1 = "Anne Lutkemeyer"
player2 = "Catherine Gagnon"
gen_summary_stats(path, player1, player2)