In [2]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')

# TODO: Update these variables when running notebook
path = "../match-csvs/Shot_Visuals_SpencerJohnson_ThomasPaulsell.csv"
player = "Spencer Johnson"

# Adjusted output path for JSONs
output_dir = os.path.join(os.getcwd(), "../json")
os.makedirs(output_dir, exist_ok=True)

#1. Serve Distribution
def serve_dist(player, path):
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()
    
    filtered_events = events[
        (events['shotInRally'] == 1) &
        (events['serverName'] == player) &
        ((events['firstServeIn'] == 1.0) | (events['secondServeIn'] == 1.0))
    ]
    
    valid_placements = ['Wide', 'T', 'Body']
    filtered_events = filtered_events[filtered_events['serveInPlacement'].isin(valid_placements)]
    filtered_events['Zone'] =  filtered_events['side'] + " " + filtered_events['serveInPlacement']

    serve_counts = filtered_events.groupby('Zone').size()
    
    won_counts = filtered_events[filtered_events['pointWonBy'] == player].groupby('Zone').size()
    won_counts = won_counts.reindex(serve_counts.index, fill_value=0)
    
    serve_dist = pd.DataFrame({
        "Zone": serve_counts.index,
        "Win Proportion": won_counts.astype(str) + '/' + serve_counts.astype(str)
    }).reset_index(drop=True)

    serve_dist['Server'] = [player, "", "", "", "", ""]
    
    # Export data to JSON
    serve_dist_json = serve_dist.to_json(orient='records')

    with open(os.path.join(output_dir, 'serve_dist.json'), 'w') as f:
        f.write(serve_dist_json)


#2 Serve Placement
def serve_place(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    # Filter and mutate serves
    serves = events.groupby('pointNumber').apply(lambda df: pd.Series({
        'pointNumber': df['pointNumber'].iloc[0],
        'pointStartTime': df['pointStartTime'].iloc[0],
        'serverName': df['serverName'].iloc[0],
        'x': df['firstServeXCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeXCoord'].iloc[0],
        'y': df['firstServeYCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeYCoord'].iloc[0],
        'serveIn': (df['firstServeIn'].iloc[0] == 1.0) or (df['secondServeIn'].iloc[0] == 1.0),
        'side': df['side'].iloc[0],
        'serveInPlacement': df['serveInPlacement'].iloc[0],
        'pointWonByUCLA': (df['pointWonBy'].iloc[0] == player),
        'isAce': df['isAce'].iloc[0]
    })).reset_index(drop=True)

    serves = serves[serves['serveIn']]

    # Filter serves where server is UCLA player and serve was in
    serves_ucla = serves[(serves['serverName'] == player) & (serves['serveIn'])].copy()

    # Modify the coordinates based on the y-value
    serves_ucla['x'] = np.where(serves_ucla['y'] < 0, -serves_ucla['x'], serves_ucla['x'])
    serves_ucla['y'] = np.where(serves_ucla['y'] < 0, -serves_ucla['y'], serves_ucla['y'])

    # Define the serve outcome based on conditions
    serves_ucla['serveOutcome'] = np.where(
        serves_ucla['isAce'] == 1.0, 'Ace',
        np.where(serves_ucla['pointWonByUCLA'], 'Won', 'Lost')
    )

    # Cleaning for valid serve placements.
    valid_placements = ['Wide', 'T', 'Body']
    serves_ucla = serves_ucla[serves_ucla['serveInPlacement'].isin(valid_placements)]

    # Group by side and serveInPlacement, and calculate count and serves won
    distribution = serves_ucla.groupby(['side', 'serveInPlacement']).agg(
        count=('pointNumber', 'size'),
        serves_won=('pointWonByUCLA', 'sum')
    ).reset_index() 

    # Calculate the win percentage (proportion)
    distribution['proportion'] = distribution['serves_won'] / distribution['count']

    # Find the minimum and maximum proportions
    min_proportion = distribution['proportion'].min()
    max_proportion = distribution['proportion'].max()

    # Create labels DataFrame and determine if each value is max, min, or neither
    labels = distribution.copy()
    labels['proportion_label'] = (labels['proportion'] * 100).round(1).astype(str) + "%"
    labels['count_label'] = labels['count']

    # Add the x positions based on side and serveInPlacement
    labels['x'] = np.where(
        (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Wide'), 131.25,
        np.where(
            (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Body'), 78.75,
            np.where(
                (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'T'), 26.25,
                np.where(
                    (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'T'), -26.25,
                    np.where(
                        (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Body'), -78.75,
                        np.where(
                            (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Wide'), -131.25,
                            np.nan
                        )
                    )
                )
            )
        )
    )

    # Determine text color and max/min status
    labels['text_color'] = np.where(
        labels['proportion'] == min_proportion, "darkred",
        np.where(labels['proportion'] == max_proportion, "darkgreen", "black")
    )

    labels['max_min'] = np.where(
        labels['proportion'] == max_proportion, "max",
        np.where(labels['proportion'] == min_proportion, "min", "no")
    )

    # Export data frames as JSON
    serve_place_json = serves_ucla.to_json(orient='records')
    serve_place_labels_json = labels.to_json(orient='records')

    with open(os.path.join(output_dir,'serve_place.json'), 'w') as f:
        f.write(serve_place_json)

    with open(os.path.join(output_dir,'serve_place_labels.json'), 'w') as f:
        f.write(serve_place_labels_json)


#3. Serve Error Distribution
def serve_error_dist(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    # Filter for returns by the specified player
    returns = events[(events['serverName'] == player) & (events['shotInRally'] == 1)].copy()

    # Flip y and x coordinates where y > 0
    returns['firstServeXCoord'] = returns.apply(lambda row: -row['firstServeXCoord'] if row['firstServeYCoord'] < 0 else row['firstServeXCoord'], axis=1)
    returns['secondServeXCoord'] = returns.apply(lambda row: -row['secondServeXCoord'] if row['secondServeYCoord'] < 0 else row['secondServeXCoord'], axis=1)
    returns['firstServeYCoord'] = returns['firstServeYCoord'].apply(lambda y: -y if y < 0 else y)
    returns['secondServeYCoord'] = returns['secondServeYCoord'].apply(lambda y: -y if y < 0 else y)

    # Round coords near net
    returns['firstServeYCoord'] = returns['firstServeYCoord'].apply(lambda y: 0 if y <= 25 else y)
    returns['secondServeYCoord'] = returns['secondServeYCoord'].apply(lambda y: 0 if y <= 25 else y)

    # Apply additional filtering for firstServeIn and firstServe/secondServeIn conditions
    returns = returns[
        (returns['firstServeIn'] != 1.0) | 
        ((returns['firstServeIn'] != 1.0) & (returns['secondServeIn'] != 1.0))
    ]

    # Create new columns 'x' and 'y' based on serve conditions
    returns['x'] = np.where(returns['firstServeIn'] != 1.0, returns['firstServeXCoord'], returns['secondServeXCoord'])
    returns['y'] = np.where(returns['firstServeIn'] != 1.0, returns['firstServeYCoord'], returns['secondServeYCoord'])

    # Duplicate rows where both firstServeIn and secondServeIn are not equal to 1.0
    double_errors = returns[(returns['firstServeIn'] != 1.0) & (returns['secondServeIn'] != 1.0)]
    if not double_errors.empty:
        first_serve_errors = double_errors.copy()
        second_serve_errors = double_errors.copy()

        # For first serve errors, use firstServe coordinates
        first_serve_errors['x'] = first_serve_errors['firstServeXCoord']
        first_serve_errors['y'] = first_serve_errors['firstServeYCoord']

        # For second serve errors, use secondServe coordinates
        second_serve_errors['x'] = second_serve_errors['secondServeXCoord']
        second_serve_errors['y'] = second_serve_errors['secondServeYCoord']

        # Combine original, first serve errors, and second serve errors
        returns = pd.concat([returns, first_serve_errors, second_serve_errors], ignore_index=True)
    else:
        returns = pd.concat([returns, double_errors], ignore_index=True)

    # Select only the specified columns
    serve_errors = returns[['serverName', 'firstServeIn', 'secondServeIn', 
                             'x', 'y']]
    
    # Categorize serve errors by type
    serve_errors['type'] = np.select(
        [
            (serve_errors['x'] < 0) & (serve_errors['y'] == 0),  # Deuce Net
            ((serve_errors['x'] < -157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)) |
            ((serve_errors['x'] > 0) & (serve_errors['x'] < 157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)), # Deuce Wide
            (serve_errors['x'] < 0) & (serve_errors['y'] > 245),  # Deuce Long
            (serve_errors['x'] > 0) & (serve_errors['y'] == 0),  # Ad Net
            ((serve_errors['x'] > 157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)) |
            ((serve_errors['x'] < 0) & (serve_errors['x'] > -157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)),  # Ad Wide
            (serve_errors['x'] > 0) & (serve_errors['y'] > 245)  # Ad Long
        ],
        [
            'Deuce Net', 'Deuce Wide', 'Deuce Long', 'Ad Net', 'Ad Wide', 'Ad Long'
        ],
        default='Unknown'
    )

    errorTypes = ['Deuce Net', 'Deuce Wide', 'Deuce Long', 'Ad Net', 'Ad Wide', 'Ad Long']

    # Distribution of serve error types
    serve_distribution = serve_errors.groupby('type').size().reindex(errorTypes, fill_value=0).reset_index(name='count')

    # Export the data
    serve_error_json = serve_errors.to_json(orient='records')
    serve_error_dist_json = serve_distribution.to_json(orient='records')

    with open(os.path.join(output_dir,'serve_error.json'), 'w') as f:
        f.write(serve_error_json)
    with open(os.path.join(output_dir,'serve_error_dist.json'), 'w') as f:
        f.write(serve_error_dist_json)

serve_dist(player, path)
serve_place(player, path)
serve_error_dist(player, path)

FileNotFoundError: [Errno 2] No such file or directory: '../match-csvs/Shot_Visuals_SpencerJohnson_ThomasPaulsell'