In [7]:
import pandas as pd
import numpy as np

def gen_serve_error(player, path):
    # Load the data
    events = pd.read_csv(path)
    events['pointWonBy'] = events.groupby('pointNumber')['pointWonBy'].bfill()

    # Filter for returns by the specified player
    returns = events[(events['serverName'] == player) & (events['shotInRally'] == 1)].copy()

    # Flip y and x coordinates where y > 0
    returns['firstServeYCoord'] = returns['firstServeYCoord'].apply(lambda y: -y if y < 0 else y)
    returns['secondServeYCoord'] = returns['secondServeYCoord'].apply(lambda y: -y if y < 0 else y)
    
    returns['firstServeXCoord'] = returns.apply(lambda row: -row['firstServeXCoord'] if row['firstServeYCoord'] < 0 else row['firstServeXCoord'], axis=1)
    returns['secondServeXCoord'] = returns.apply(lambda row: -row['secondServeXCoord'] if row['secondServeYCoord'] < 0 else row['secondServeXCoord'], axis=1)

    # Round coords near net
    returns['firstServeYCoord'] = returns['firstServeYCoord'].apply(lambda y: 0 if y <= 25 else y)
    returns['secondServeYCoord'] = returns['secondServeYCoord'].apply(lambda y: 0 if y <= 25 else y)

    # Apply additional filtering for firstServeIn and firstServe/secondServeIn conditions
    returns = returns[
        (returns['firstServeIn'] != 1.0) | 
        ((returns['firstServeIn'] != 1.0) & (returns['secondServeIn'] != 1.0))
    ]

    # Create new columns 'x' and 'y' based on serve conditions
    returns['x'] = np.where(returns['firstServeIn'] != 1.0, returns['firstServeXCoord'], returns['secondServeXCoord'])
    returns['y'] = np.where(returns['firstServeIn'] != 1.0, returns['firstServeYCoord'], returns['secondServeYCoord'])

    # Duplicate rows where both firstServeIn and secondServeIn are not equal to 1.0
    double_errors = returns[(returns['firstServeIn'] != 1.0) & (returns['secondServeIn'] != 1.0)]
    if not double_errors.empty:
        first_serve_errors = double_errors.copy()
        second_serve_errors = double_errors.copy()

        # For first serve errors, use firstServe coordinates
        first_serve_errors['x'] = first_serve_errors['firstServeXCoord']
        first_serve_errors['y'] = first_serve_errors['firstServeYCoord']

        # For second serve errors, use secondServe coordinates
        second_serve_errors['x'] = second_serve_errors['secondServeXCoord']
        second_serve_errors['y'] = second_serve_errors['secondServeYCoord']

        # Combine original, first serve errors, and second serve errors
        returns = pd.concat([returns, first_serve_errors, second_serve_errors], ignore_index=True)
    else:
        returns = pd.concat([returns, double_errors], ignore_index=True)

    # Select only the specified columns
    serve_errors = returns[['serverName', 'firstServeIn', 'secondServeIn', 
                             'x', 'y']]
    
    # Categorize serve errors by type
    serve_errors['type'] = np.select(
        [
            (serve_errors['x'] < 0) & (serve_errors['y'] == 0),  # Deuce Net
            ((serve_errors['x'] < -157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)) |
            ((serve_errors['x'] > 0) & (serve_errors['x'] < 157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)), # Deuce Wide
            (serve_errors['x'] < 0) & (serve_errors['y'] > 245),  # Deuce Long
            (serve_errors['x'] > 0) & (serve_errors['y'] == 0),  # Ad Net
            ((serve_errors['x'] > 157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)) |
            ((serve_errors['x'] < 0) & (serve_errors['x'] > -157.5) & (serve_errors['y'] < 0) & (serve_errors['y'] < 245)),  # Ad Wide
            (serve_errors['x'] > 0) & (serve_errors['y'] > 245)  # Ad Long
        ],
        [
            'Deuce Net', 'Deuce Wide', 'Deuce Long', 'Ad Net', 'Ad Wide', 'Ad Long'
        ],
        default='Unknown'
    )

    errorTypes = ['Deuce Net', 'Deuce Wide', 'Deuce Long', 'Ad Net', 'Ad Wide', 'Ad Long']

    # Distribution of serve error types
    serve_distribution = serve_errors.groupby('type').size().reindex(errorTypes, fill_value=0).reset_index(name='count')

    # Export the data
    serve_errors_json = serve_errors.to_json(orient='records')
    serve_errorDist_json = serve_distribution.to_json(orient='records')

    with open('serve_errors.json', 'w') as f:
        f.write(serve_errors_json)
    with open('serve_errorDist.json', 'w') as f:
        f.write(serve_errorDist_json)

player = "Anne Lutkemeyer"
path = "../Match CSVs/Shot_Visuals_AnneLutkemeyer_CatherineGagnon.csv"
gen_serve_error(player, path)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  serve_errors['type'] = np.select(
