In [83]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')

# TODO: Update these variables when running notebook
path = "../match-csvs/Shot_Visuals_AnneLutkemeyer_CatherineGagnon.csv"
ucla_player = "Anne Lutkemeyer"
opp_player = "Catherine Gagnon"
side = "Deuce" # For Return Placement viz
fh_bh = "Forehand" # For Return Placement viz

# Adjusted output path for JSONs
output_dir = os.path.join(os.getcwd(), "../json")
os.makedirs(output_dir, exist_ok=True)

#1. 1st Return Contact
def return_contact(player, path):
    # deuce_scores = ["0-0", "30-0", "0-30", "15-15", "40-40"]
    # & (events['pointScore'].isin(deuce_scores))
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()
    returns_ucla = events[(events['shotHitBy'] == player) & (events['shotInRally'] == 2)][['pointStartTime', 'shotHitBy', 'shotContactX', 'shotContactY', 'pointWonBy', 'isWinner', 'shotFhBh']].dropna(subset=['pointWonBy', 'shotContactX', 'shotContactY']).copy()
    returns_ucla['shotContactX'] = returns_ucla.apply(lambda row: -row['shotContactX'] if row['shotContactY'] > 0 else row['shotContactX'], axis=1)
    returns_ucla['shotContactY'] = returns_ucla['shotContactY'].apply(lambda y: -y if y > 0 else y)
    returns_ucla['depth'] = returns_ucla['shotContactY'].apply(
        lambda y: 'short' if y >= -455 else 'mid' if -455 > y > -490 else 'deep'
    )

    # Calculate count + win pct.
    distribution = returns_ucla.groupby('depth').apply(
        lambda df: pd.Series({
            'freq': len(df),
            'win_percentage': int((df['pointWonBy'] == df['shotHitBy']).mean() * 100)
        })
    ).reset_index()

    max_win_percentage = distribution['win_percentage'].max()
    min_win_percentage = distribution['win_percentage'].min()

    distribution['maxMin'] = distribution['win_percentage'].apply(
        lambda x: 'max' if x == max_win_percentage else 'min' if x == min_win_percentage else 'no'
    )

    distribution['win_percentage'] = distribution['win_percentage'].astype(str) + '%'

    y_mapping = {
        'short': {'y': -420},
        'mid': {'y': -475},
        'deep': {'y': -515}
    }

    distribution['y'] = distribution['depth'].map(lambda d: y_mapping[d]['y'])

    # Export the data to JSON
    ret_contact_json = returns_ucla.to_json(orient='records')
    ret_contact_dist_json = distribution.to_json(orient='records')

    with open(os.path.join(output_dir, 'ret_contact.json'), 'w') as f:
        f.write(ret_contact_json)

    with open(os.path.join(output_dir, 'ret_contact_dist.json'), 'w') as f:
        f.write(ret_contact_dist_json)


#2. Net Errors
def net_errors(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    # Filter for the player's returns and shots in rally
    net_errors = events[(events['shotHitBy'] == player) & (events['isErrorNet'] == 1.0)][['pointStartTime', 'shotHitBy','shotContactX', 'shotContactY', 'shotLocationX', 'shotLocationY', 'pointWonBy', 'shotFhBh']].dropna(subset=['pointWonBy']).copy()

    # Adjust shotLocationY and shotLocationX and flip shotContactX, shotContactY when shotLocation > 0
    net_errors = net_errors.dropna()
    net_errors['shotLocationX'] = net_errors.apply(lambda row: -row['shotLocationX'] if row['shotContactY'] > 0 else row['shotLocationX'], axis=1)
    net_errors['shotLocationY'] = net_errors.apply(lambda row: -row['shotLocationY'] if row['shotContactY'] > 0 else row['shotLocationY'], axis=1)
    net_errors['shotContactX'] = net_errors.apply(lambda row: -row['shotContactX'] if row['shotContactY'] > 0 else row['shotContactX'], axis=1)
    net_errors['shotContactY'] = net_errors.apply(lambda row: -row['shotContactY'] if row['shotContactY'] > 0 else row['shotContactY'], axis=1)

    # Export the data to JSON
    net_errors_json = net_errors.to_json(orient='records')

    with open(os.path.join(output_dir, 'net_errors.json'), 'w') as f:
        f.write(net_errors_json)

#3. 1st Return Placement
def return_place(player, path, side, fh_bh):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()
    events['isError'] = (events['isErrorWideR'] == 1) | (events['isErrorWideL'] == 1) | (events['isErrorNet'] == 1) | (events['isErrorLong'] == 1)
    
    # Filter for the player's returns and shots in rally
    returns_place = events[(events['shotHitBy'] == player) & (events['shotInRally'] == 2)][['pointStartTime', 'shotHitBy', 'shotContactX', 'shotContactY', 'shotLocationX', 'shotLocationY','pointWonBy', 'isWinner', 'shotFhBh', 'isError', 'isErrorNet', 'side']].dropna(subset=['pointWonBy']).copy()
    # Flip shotContactX and shotContactY where necessary
    mask_bottom_half = (returns_place['shotLocationY'] < 0) & (returns_place['shotContactY'] > 0)
    mask_near_net = (returns_place['shotLocationY'] <= 50) & (returns_place['shotContactY'] > 0) & (returns_place['isErrorNet'] == 1)

    returns_place.loc[mask_bottom_half, 'shotContactX'] *= -1
    returns_place.loc[mask_bottom_half, 'shotLocationX'] *= -1
    returns_place.loc[mask_bottom_half & (returns_place['shotContactY'] > 0), 'shotContactY'] *= -1
    returns_place.loc[mask_bottom_half, 'shotLocationY'] = returns_place.loc[mask_bottom_half, 'shotLocationY'].abs()

    returns_place.loc[mask_near_net & ~mask_bottom_half, 'shotContactX'] *= -1
    returns_place.loc[mask_near_net & ~mask_bottom_half, 'shotLocationX'] *= -1
    returns_place.loc[mask_near_net & ~mask_bottom_half, 'shotContactY'] *= -1

    # Accounting for net error tagging discrepencies
    mask = (returns_place['shotLocationY'] != 0) & (returns_place['isErrorNet'] == 1)
    adjust_up = mask & (returns_place['shotLocationX'] <= returns_place['shotContactX'])
    adjust_down = mask & (returns_place['shotLocationX'] > returns_place['shotContactX'])

    returns_place.loc[adjust_up, 'shotLocationX'] += returns_place.loc[adjust_up, 'shotLocationY']
    returns_place.loc[adjust_down, 'shotLocationX'] -= returns_place.loc[adjust_down, 'shotLocationY']

    returns_place.loc[adjust_up, 'shotContactX'] += returns_place.loc[adjust_up, 'shotLocationY']
    returns_place.loc[adjust_down, 'shotContactX'] -= returns_place.loc[adjust_down, 'shotLocationY']
    returns_place.loc[adjust_up, 'shotLocationY'] = 0
    returns_place.loc[adjust_down, 'shotLocationY'] = 0

    # Additional filtering for fh_bh and/or side is specified
    if side != 'All':
        if fh_bh != 'All':
            returns_place = returns_place[returns_place['shotFhBh'] == fh_bh]
        returns_place = returns_place[returns_place['side'] == side]

    returns_place['fhBhFiltered'] = [fh_bh != 'All'] * len(returns_place)
    returns_place['sideFiltered'] = [side != 'All'] * len(returns_place)

    # Categorize into 'left', 'mid', 'right' based on shotLocationX
    returns_place['width'] = returns_place['shotLocationX'].apply(
        lambda x: 'left' if x <= -52.5 else 'mid' if -52.5 < x < 52.5 else 'right'
    )

    # Calculate count + win pct.
    distribution = returns_place.groupby('width').apply(
        lambda df: pd.Series({
            'freq': len(df),
            'win_percentage': int((df['pointWonBy'] == df['shotHitBy']).mean() * 100)
        })
    ).reset_index()

    max_win_percentage = distribution['win_percentage'].max()
    min_win_percentage = distribution['win_percentage'].min()

    # Assign 'max', 'min', or 'no' to the distribution based on win_percentage
    distribution['maxMin'] = distribution['win_percentage'].apply(
        lambda x: 'max' if x == max_win_percentage else 'min' if x == min_win_percentage else 'no'
    )

    # Convert win_percentage to string for display
    distribution['win_percentage'] = distribution['win_percentage'].astype(str) + '%'

    # Adjust x_mapping to match the width values
    x_mapping = {
        'left': {'x': -100},
        'mid': {'x': 0},
        'right': {'x': 100}
    }

    # Export the data as JSON
    return_place_json = returns_place.to_json(orient='records')
    return_place_dist_json = distribution.to_json(orient='records')

    with open(os.path.join(output_dir, 'return_place.json'), 'w') as f:
        f.write(return_place_json)

    with open(os.path.join(output_dir, 'return_place_dist.json'), 'w') as f:
        f.write(return_place_dist_json)


#4. Serve Distribution
def serve_dist(player, path):
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()
    
    filtered_events = events[
        (events['shotInRally'] == 1) &
        (events['serverName'] == player) &
        ((events['firstServeIn'] == 1.0) | (events['secondServeIn'] == 1.0))
    ]
    
    valid_placements = ['Wide', 'T', 'Body']
    filtered_events = filtered_events[filtered_events['serveInPlacement'].isin(valid_placements)]
    filtered_events['Zone'] =  filtered_events['side'] + " " + filtered_events['serveInPlacement']

    serve_counts = filtered_events.groupby('Zone').size()
    
    won_counts = filtered_events[filtered_events['pointWonBy'] == player].groupby('Zone').size()
    won_counts = won_counts.reindex(serve_counts.index, fill_value=0)
    
    serve_dist = pd.DataFrame({
        "Zone": serve_counts.index,
        "Win Proportion": won_counts.astype(str) + '/' + serve_counts.astype(str)
    }).reset_index(drop=True)

    serve_dist['Server'] = [player, "", "", "", "", ""]
    
    # Export data to JSON
    serve_dist_json = serve_dist.to_json(orient='records')

    with open(os.path.join(output_dir, 'serve_dist.json'), 'w') as f:
        f.write(serve_dist_json)


#5. Serve Error Distribution
def serve_error_dist(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    # Filter for returns by the specified player
    returns = events[(events['serverName'] == player) & (events['shotInRally'] == 1)].copy()

    # Flip y and x coordinates where y > 0
    returns['firstServeYCoord'] = returns['firstServeYCoord'].apply(lambda y: -y if y < 0 else y)
    returns['secondServeYCoord'] = returns['secondServeYCoord'].apply(lambda y: -y if y < 0 else y)
    
    returns['firstServeXCoord'] = returns.apply(lambda row: -row['firstServeXCoord'] if row['firstServeYCoord'] < 0 else row['firstServeXCoord'], axis=1)
    returns['secondServeXCoord'] = returns.apply(lambda row: -row['secondServeXCoord'] if row['secondServeYCoord'] < 0 else row['secondServeXCoord'], axis=1)

    # Round coords near net
    returns['firstServeYCoord'] = returns['firstServeYCoord'].apply(lambda y: 0 if y <= 25 else y)
    returns['secondServeYCoord'] = returns['secondServeYCoord'].apply(lambda y: 0 if y <= 25 else y)

    # Apply additional filtering for firstServeIn and firstServe/secondServeIn conditions
    returns = returns[
        (returns['firstServeIn'] != 1.0) | 
        ((returns['firstServeIn'] != 1.0) & (returns['secondServeIn'] != 1.0))
    ]

    # Create new columns 'x' and 'y' based on serve conditions
    returns['x'] = np.where(returns['firstServeIn'] != 1.0, returns['firstServeXCoord'], returns['secondServeXCoord'])
    returns['y'] = np.where(returns['firstServeIn'] != 1.0, returns['firstServeYCoord'], returns['secondServeYCoord'])

    # Duplicate rows where both firstServeIn and secondServeIn are not equal to 1.0
    double_errors = returns[(returns['firstServeIn'] != 1.0) & (returns['secondServeIn'] != 1.0)]
    if not double_errors.empty:
        first_serve_errors = double_errors.copy()
        second_serve_errors = double_errors.copy()

        # For first serve errors, use firstServe coordinates
        first_serve_errors['x'] = first_serve_errors['firstServeXCoord']
        first_serve_errors['y'] = first_serve_errors['firstServeYCoord']

        # For second serve errors, use secondServe coordinates
        second_serve_errors['x'] = second_serve_errors['secondServeXCoord']
        second_serve_errors['y'] = second_serve_errors['secondServeYCoord']

        # Combine original, first serve errors, and second serve errors
        returns = pd.concat([returns, first_serve_errors, second_serve_errors], ignore_index=True)
    else:
        returns = pd.concat([returns, double_errors], ignore_index=True)

    # Select only the specified columns
    serve_errors = returns[['serverName', 'firstServeIn', 'secondServeIn', 
                             'x', 'y']]
    
    # Categorize serve errors by type
    serve_errors['type'] = np.select(
        [
            (serve_errors['x'] < 0) & (serve_errors['y'] == 0),  # Deuce Net
            ((serve_errors['x'] < -157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)) |
            ((serve_errors['x'] > 0) & (serve_errors['x'] < 157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)), # Deuce Wide
            (serve_errors['x'] < 0) & (serve_errors['y'] > 245),  # Deuce Long
            (serve_errors['x'] > 0) & (serve_errors['y'] == 0),  # Ad Net
            ((serve_errors['x'] > 157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)) |
            ((serve_errors['x'] < 0) & (serve_errors['x'] > -157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)),  # Ad Wide
            (serve_errors['x'] > 0) & (serve_errors['y'] > 245)  # Ad Long
        ],
        [
            'Deuce Net', 'Deuce Wide', 'Deuce Long', 'Ad Net', 'Ad Wide', 'Ad Long'
        ],
        default='Unknown'
    )

    errorTypes = ['Deuce Net', 'Deuce Wide', 'Deuce Long', 'Ad Net', 'Ad Wide', 'Ad Long']

    # Distribution of serve error types
    serve_distribution = serve_errors.groupby('type').size().reindex(errorTypes, fill_value=0).reset_index(name='count')

    # Export the data
    serve_error_json = serve_errors.to_json(orient='records')
    serve_error_dist_json = serve_distribution.to_json(orient='records')

    with open(os.path.join(output_dir,'serve_error.json'), 'w') as f:
        f.write(serve_error_json)
    with open(os.path.join(output_dir,'serve_error_dist.json'), 'w') as f:
        f.write(serve_error_dist_json)


#6. Serve Placement
def serve_place(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    # Filter and mutate serves
    serves = events.groupby('pointNumber').apply(lambda df: pd.Series({
        'pointNumber': df['pointNumber'].iloc[0],
        'pointStartTime': df['pointStartTime'].iloc[0],
        'serverName': df['serverName'].iloc[0],
        'x': df['firstServeXCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeXCoord'].iloc[0],
        'y': df['firstServeYCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeYCoord'].iloc[0],
        'serveIn': (df['firstServeIn'].iloc[0] == 1.0) or (df['secondServeIn'].iloc[0] == 1.0),
        'side': df['side'].iloc[0],
        'serveInPlacement': df['serveInPlacement'].iloc[0],
        'pointWonByUCLA': (df['pointWonBy'].iloc[0] == player),
        'isAce': df['isAce'].iloc[0]
    })).reset_index(drop=True)

    serves = serves[serves['serveIn']]

    # Filter serves where server is UCLA player and serve was in
    serves_ucla = serves[(serves['serverName'] == player) & (serves['serveIn'])].copy()

    # Modify the coordinates based on the y-value
    serves_ucla['x'] = np.where(serves_ucla['y'] < 0, -serves_ucla['x'], serves_ucla['x'])
    serves_ucla['y'] = np.where(serves_ucla['y'] < 0, -serves_ucla['y'], serves_ucla['y'])

    # Define the serve outcome based on conditions
    serves_ucla['serveOutcome'] = np.where(
        serves_ucla['isAce'] == 1.0, 'Ace',
        np.where(serves_ucla['pointWonByUCLA'], 'Won', 'Lost')
    )

    # Cleaning for valid serve placements.
    valid_placements = ['Wide', 'T', 'Body']
    serves_ucla = serves_ucla[serves_ucla['serveInPlacement'].isin(valid_placements)]

    # Group by side and serveInPlacement, and calculate count and serves won
    distribution = serves_ucla.groupby(['side', 'serveInPlacement']).agg(
        count=('pointNumber', 'size'),
        serves_won=('pointWonByUCLA', 'sum')
    ).reset_index() 

    # Calculate the win percentage (proportion)
    distribution['proportion'] = distribution['serves_won'] / distribution['count']

    # Find the minimum and maximum proportions
    min_proportion = distribution['proportion'].min()
    max_proportion = distribution['proportion'].max()

    # Create labels DataFrame and determine if each value is max, min, or neither
    labels = distribution.copy()
    labels['proportion_label'] = (labels['proportion'] * 100).round(1).astype(str) + "%"
    labels['count_label'] = labels['count']

    # Add the x positions based on side and serveInPlacement
    labels['x'] = np.where(
        (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Wide'), 131.25,
        np.where(
            (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Body'), 78.75,
            np.where(
                (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'T'), 26.25,
                np.where(
                    (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'T'), -26.25,
                    np.where(
                        (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Body'), -78.75,
                        np.where(
                            (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Wide'), -131.25,
                            np.nan
                        )
                    )
                )
            )
        )
    )

    # Determine text color and max/min status
    labels['text_color'] = np.where(
        labels['proportion'] == min_proportion, "darkred",
        np.where(labels['proportion'] == max_proportion, "darkgreen", "black")
    )

    labels['max_min'] = np.where(
        labels['proportion'] == max_proportion, "max",
        np.where(labels['proportion'] == min_proportion, "min", "no")
    )

    # Export data frames as JSON
    serve_place_json = serves_ucla.to_json(orient='records')
    serve_place_labels_json = labels.to_json(orient='records')

    with open(os.path.join(output_dir,'serve_place.json'), 'w') as f:
        f.write(serve_place_json)

    with open(os.path.join(output_dir,'serve_place_labels.json'), 'w') as f:
        f.write(serve_place_labels_json)

#7. Summary Statistics
def summary_stats(player1, player2, path):
    # Load the dataset
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    players = [player1, player2]
    summary_stats = pd.DataFrame({'Stat': ['Aces', 'Double Faults', '1st Serve In', '1st Serve Points Won',
                                        '2nd Serve Points Won', 'Break Points Saved', 'Total Serve Points Won',
                                        'Service Games Won', 'Average Rally Length', '0-4', '5-8', '9+']})

    # Aces
    aces = events[events['isAce'] == 1.0]
    aces_summary = aces['shotHitBy'].value_counts().reset_index()
    aces_summary.columns = ['player', 'Aces']

    for player in players:
        if player in aces_summary['player'].values:
            aces_count = str(aces_summary.loc[aces_summary['player'] == player, 'Aces'].values[0])
            summary_stats.loc[summary_stats['Stat'] == 'Aces', player] = aces_count
        else:
            summary_stats.loc[summary_stats['Stat'] == 'Aces', player] = "0"

    # Double Faults
    double_faults = events[events['isDoubleFault'] == 1.0]

    double_faults_summary = double_faults['shotHitBy'].value_counts().reset_index()
    double_faults_summary.columns = ['player', 'Double Faults']

    for player in players:
        if player in double_faults_summary['player'].values:
            summary_stats.loc[summary_stats['Stat'] == 'Double Faults', player] = f"{double_faults_summary.loc[double_faults_summary['player'] == player, 'Double Faults'].values[0]}"
        else:
            summary_stats.loc[summary_stats['Stat'] == 'Double Faults', player] = "0"

    # 1st Service In, 1st Serve Points Won, and 2nd Serve Points Won
    serves = events[(events['shotInRally'] == 1)]
    first_serve_in = serves[serves['firstServeIn'] == 1.0]
    sec_serve_in = serves[serves['secondServeIn'] == 1.0]

    for player in players:
        total_serves = serves[serves['shotHitBy'] == player].shape[0]
        serves_in = first_serve_in[first_serve_in['shotHitBy'] == player].shape[0]
        summary_stats.loc[summary_stats['Stat'] == '1st Serve In', player] = f"{serves_in}/{total_serves}"

    for player in players:
        total_serves_in = first_serve_in[first_serve_in['shotHitBy'] == player].shape[0]
        
        serves_in_and_won = first_serve_in[(first_serve_in['shotHitBy'] == player) & 
                                        (first_serve_in['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == '1st Serve Points Won', player] = f"{serves_in_and_won}/{total_serves_in}"

    for player in players:
        total_serves_in = sec_serve_in[sec_serve_in['shotHitBy'] == player].shape[0]
        
        serves_in_and_won = sec_serve_in[(sec_serve_in['shotHitBy'] == player) & 
                                        (sec_serve_in['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == '2nd Serve Points Won', player] = f"{serves_in_and_won}/{total_serves_in}"


    # Break Points Saved
    break_points = events[(events['isBreakPoint'] == 1.0)]
    for player in players:
        total_break_points_faced = break_points[break_points['serverName'] == player].shape[0]
        break_points_saved = break_points[(break_points['serverName'] == player) & 
                                        (break_points['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == 'Break Points Saved', player] = f"{break_points_saved}/{total_break_points_faced}"

    # Total Serve Points Won
    last_shot = events[(events['isPointEnd'] == 1.0)]

    for player in players:
        total_serve_pts = last_shot[last_shot['serverName'] == player].shape[0]
        serve_pts_won = last_shot[(last_shot['serverName'] == player) & 
                                (last_shot['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == 'Total Serve Points Won', player] = f"{serve_pts_won}/{total_serve_pts}"

    # Service Games Won
    # Filter for last shot in each game
    last_shot['gameGroup'] = (last_shot['gameScore'] != last_shot['gameScore'].shift()).cumsum()
    last_shot_grouped = last_shot.groupby(['gameScore', 'gameGroup']).tail(1).reset_index(drop=True)
    last_shot_grouped = last_shot_grouped.drop(columns=['gameGroup'])
    last_shot_filtered = last_shot_grouped[['gameScore', 'serverName', 'pointWonBy']]

    for player in players:
        total_service_points = last_shot_filtered[last_shot_filtered['serverName'] == player].shape[0]
        service_points_won = last_shot_filtered[(last_shot_filtered['serverName'] == player) &
                                                (last_shot_filtered['pointWonBy'] == player)].shape[0]
        
        summary_stats.loc[summary_stats['Stat'] == 'Service Games Won', player] = f"{service_points_won}/{total_service_points}"


    # Average Rally Length
    total_shots = last_shot['shotInRally'].sum()
    num_rallies = last_shot.shape[0]
    avg_rally_len = round(total_shots / num_rallies, 1) if num_rallies > 0 else 0

    summary_stats.loc[summary_stats['Stat'] == 'Average Rally Length', player1] = f"{avg_rally_len:.1f}"
    summary_stats.loc[summary_stats['Stat'] == 'Average Rally Length', player2] = f"{avg_rally_len:.1f}"


    # Win Percentage for Rally Length Groups
    ranges = {
        '0-4': (0, 4),
        '5-8': (5, 8),
        '9+': (9, np.inf)
    }

    for stat_name, (min_shots, max_shots) in ranges.items():
        # Filter last_shot for the given range
        filtered_shots = last_shot[(last_shot['shotInRally'] >= min_shots) & (last_shot['shotInRally'] <= max_shots)]
        
        # Calculate points won and total points for each player
        for player in players:
            total_points = filtered_shots.shape[0]
            points_won = filtered_shots[filtered_shots['pointWonBy'] == player].shape[0]
            
            # Store the result in summary_stats
            summary_stats.loc[summary_stats['Stat'] == f'{stat_name}', player] = f"{points_won}/{total_points}"


    # Add variable to indicate if stat is a count or proportion + if a rally stat
    summary_stats['isProp'] = [False, False, True, True , True, True, True , True, False, True, True, True]
    summary_stats['isRallyStat'] = [False, False, False, False , False, False, False , False, True, True, True, True]
    
    # Convert to JSON
    summary_stats_json = summary_stats.to_json(orient='records')
    with open(os.path.join(output_dir,'summary_stats.json'), 'w') as f:
        f.write(summary_stats_json)


#8. Winner Placement
def winner_place(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()
    events['isVolley'] = events['isVolley'].fillna(0)

    # Filter for the player's returns and shots in rally
    winners = events[(events['shotHitBy'] == player) & (events['isWinner'] == 1.0) & (events['pointWonBy'] == player)][['shotHitBy', 'pointStartTime', 'shotContactX', 'shotContactY', 'shotLocationX', 'shotLocationY', 'pointWonBy', 'isVolley', 'shotFhBh']].dropna(subset=['pointWonBy']).copy()

    # Adjust shotLocationY and shotLocationX and flip shotContactX, shotContactY when shotLocation > 0
    winners = winners.dropna()
    winners['shotLocationX'] = winners.apply(lambda row: -row['shotLocationX'] if row['shotLocationY'] < 0 else row['shotLocationX'], axis=1)
    winners['shotContactX'] = winners.apply(lambda row: -row['shotContactX'] if row['shotLocationY'] < 0 else row['shotContactX'], axis=1)
    winners['shotContactY'] = winners.apply(lambda row: -row['shotContactY'] if row['shotLocationY'] < 0 else row['shotContactY'], axis=1)
    winners['shotLocationY'] = winners.apply(lambda row: -row['shotLocationY'] if row['shotLocationY'] < 0 else row['shotLocationY'], axis=1)

    # Export the data as JSON
    winners_json = winners.to_json(orient='records')

    with open(os.path.join(output_dir,'winners.json'), 'w') as f:
        f.write(winners_json)

# Run all functions
return_contact(ucla_player, path)
net_errors(ucla_player, path)
return_place(ucla_player, path, side, fh_bh)
serve_dist(ucla_player, path)
serve_error_dist(ucla_player, path)
serve_place(ucla_player, path)
summary_stats(ucla_player, opp_player, path)
winner_place(ucla_player, path)