In [2]:
import pandas as pd
import numpy as np

# Load the data
events = pd.read_csv("../../Shot_Visuals_RudyQuan_AristotelisThanos.csv")

# Rename columns
events = events.rename(columns={
    'pointNumber': 'rallyid',
    'shotContactX': 'hitter_x',
    'shotContactY': 'hitter_y',
    'shotInRally': 'strokeid',
    'shotHitBy': 'hitter'
})

# Add and mutate columns
events['server'] = events['hitter']
events['receiver_x'] = 0
events['receiver_y'] = 0
events['receiver'] = events['returnerName']

# Set default serve coordinates
events['hitter_x'] = np.where(
    (events['strokeid'] == 1) & (events['serverFarNear'] == 'Near') & (events['side'] == 'Deuce'), 50,
    np.where(
        (events['strokeid'] == 1) & (events['serverFarNear'] == 'Near') & (events['side'] == 'Ad'), -50,
        np.where(
            (events['strokeid'] == 1) & (events['serverFarNear'] == 'Far') & (events['side'] == 'Deuce'), -50,
            np.where(
                (events['strokeid'] == 1) & (events['serverFarNear'] == 'Far') & (events['side'] == 'Ad'), 50,
                events['hitter_y']
            )
        )
    )
)

events['hitter_y'] = np.where(
    events['strokeid'] == 1,
    np.where(events['serverFarNear'] == 'Near', -475, 475),
    events['hitter_y']
)

events['score'] = events['gameScore'] + ', ' + events['pointScore']

# Filter points
points = events[events['isPointEnd'] == 1.0]

# Filter and mutate serves
serves = events.groupby('rallyid').apply(lambda df: pd.Series({
    'rallyid': df['rallyid'].iloc[0],
    'server': df['server'].iloc[0],
    'x': df['firstServeXCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeXCoord'].iloc[0],
    'y': df['firstServeYCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeYCoord'].iloc[0],
    'serveIn': (df['firstServeIn'].iloc[0] == 1.0) or (df['secondServeIn'].iloc[0] == 1.0),
    'side': df['side'].iloc[0],
    'serveInPlacement': df['serveInPlacement'].iloc[0],
    'pointWonByUCLA': (df['pointWonBy'].iloc[-1] == 'Rudy Quan'),
    'isAce': df['isAce'].iloc[0],
    'serveResult': df['serveResult'].iloc[0]
})).reset_index(drop=True)

serves = serves[serves['serveIn']]

  serves = events.groupby('rallyid').apply(lambda df: pd.Series({


In [3]:
# Cleaning for coordinate data

# Filter serves where server is UCLA player and serve was in
serves_ucla = serves[(serves['server'] == 'Rudy Quan') & (serves['serveIn'])].copy()

# Modify the coordinates based on the y-value
serves_ucla['x'] = np.where(serves_ucla['y'] > 0, -serves_ucla['x'], serves_ucla['x'])
serves_ucla['y'] = np.where(serves_ucla['y'] > 0, -serves_ucla['y'], serves_ucla['y'])

# Define whether 1st or 2nd serve
serves_ucla['firstOrSecServe'] = np.where(
    serves_ucla['serveResult'] == "1st Serve In", '1st Serve',
    np.where(serves_ucla['serveResult'] == "2nd Serve In", '2nd Serve', np.nan)
)

In [4]:
# Cleaning for Win % and Freq.
distribution = serves_ucla.groupby(['side', 'serveInPlacement']).agg(
    count=('rallyid', 'size'),
    serves_won=('pointWonByUCLA', 'sum')
).reset_index()

# Calculate the win percentage (proportion)
distribution['proportion'] = distribution['serves_won'] / distribution['count']

# Find the minimum and maximum proportions
min_proportion = distribution['proportion'].min()
max_proportion = distribution['proportion'].max()

# Create labels DataFrame and determine if each value is max, min, or neither
labels = distribution.copy()
labels['proportion_label'] = (labels['proportion'] * 100).round(1).astype(str) + "%"
labels['count_label'] = labels['count']

# Add the x positions based on side and serveInPlacement
labels['x'] = np.where(
    (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Wide'), -131.25,
    np.where(
        (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Body'), -78.75,
        np.where(
            (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'T'), -26.25,
            np.where(
                (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'T'), 26.25,
                np.where(
                    (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Body'), 78.75,
                    np.where(
                        (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Wide'), 131.25,
                        np.nan
                    )
                )
            )
        )
    )
)

# Determine text color and max/min status
labels['text_color'] = np.where(
    labels['proportion'] == min_proportion, "darkred",
    np.where(labels['proportion'] == max_proportion, "darkgreen", "black")
)

labels['max_min'] = np.where(
    labels['proportion'] == max_proportion, "max",
    np.where(labels['proportion'] == min_proportion, "min", "no")
)

# Resulting DataFrame 'labels' now has the required columns
print(labels)

    side serveInPlacement  count  serves_won  proportion proportion_label  \
0     Ad             Body     24          17    0.708333            70.8%   
1     Ad                T      2           2    1.000000           100.0%   
2     Ad             Wide      5           3    0.600000            60.0%   
3  Deuce             Body     17           4    0.235294            23.5%   
4  Deuce                T     15           7    0.466667            46.7%   
5  Deuce             Wide      2           0    0.000000             0.0%   

   count_label       x text_color max_min  
0           24  -78.75      black      no  
1            2  -26.25  darkgreen     max  
2            5 -131.25      black      no  
3           17   78.75      black      no  
4           15   26.25      black      no  
5            2  131.25    darkred     min  


In [5]:
# Export data frames as JSON
serves_firstSec_json = serves_ucla.to_json(orient='records')
labels_json = labels.to_json(orient='records')

with open('serves_firstSec.json', 'w') as f:
    f.write(serves_firstSec_json)

with open('labels.json', 'w') as f:
    f.write(labels_json)