In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from scipy.spatial import Voronoi, voronoi_plot_2d

In [2]:
from shapely.geometry import Polygon, Point
from shapely.ops import unary_union
import mplcursors

In [3]:
players = pd.read_csv("players.csv")
plays = pd.read_csv("plays.csv")
playerplays = pd.read_csv("player_play.csv")

In [5]:
club_colors = {
    'ARI': '#97233F', 'ATL': '#000000', 'BAL': '#241773',
    'BUF': '#00338D', 'CAR': '#0085CA', 'CHI': '#C83803',
    'CIN': '#FB4F14', 'CLE': '#311D00', 'DAL': '#003594',
    'DEN': '#FB4F14', 'DET': '#FB4F14', 'GB': '#203731',
    'HOU': '#03202F', 'IND': '#002C5F', 'JAX': '#101820',
    'KC': '#E31837', 'LV': '#A5ACAF', 'LAC': '#0080C6',
    'LAR': '#003594', 'MIA': '#008E97', 'MIN': '#4F2683',
    'NE': '#002244', 'NO': '#D3BC8D', 'NYG': '#0B2265',
    'NYJ': '#125740', 'PHI': '#004C54', 'PIT': '#FFB612',
    'SF': '#AA0000', 'SEA': '#002244', 'TB': '#D50A0A',
    'TEN': '#0C2340', 'WAS': '#5A1414'
}

In [6]:
def voronoi_visual(samplePlay):
    samplePlay = pd.merge(
        samplePlay,
        plays[['gameId','playId','possessionTeam', 'yardlineSide', 'yardlineNumber','absoluteYardlineNumber']],
        how = 'left',
        left_on = ['gameId','playId'],
        right_on = ['gameId','playId']
    )
    samplePlayPoints = samplePlay[['nflId','x','y','club','position', 'possessionTeam', 'yardlineSide', 'yardlineNumber','absoluteYardlineNumber', 'frameId', 'playId']]
    new_rows = []

    # Loop over each row in samplePlayPoints
    for _, row in samplePlayPoints.iterrows():
        if row['position'] in ['WR', 'TE', 'RB']:
            if row['position'] in ['WR', 'TE', 'RB']:
                if row['possessionTeam'] == row['yardlineSide']:
                    if row['absoluteYardlineNumber'] < 60:
                        xpoint = row['x'] - 5
                    else:
                        xpoint = row['x'] + 5
                else:
                    if row['absoluteYardlineNumber'] > 60:
                        xpoint = row['x'] - 5
                    else:
                        xpoint = row['x'] + 5   
            new_row = {
                'x': xpoint,
                'y': row['y'],
                'position': '',
                'club': None,
                'nflId': 0
            }
            new_rows.append(new_row)

    new_rows_df = pd.DataFrame(new_rows)
    filteredSamplePlayPoints = pd.concat([samplePlayPoints, new_rows_df], ignore_index=True)
    points = filteredSamplePlayPoints[['x', 'y']].values
    x_min, x_max = 0, 120
    y_min, y_max = 0, 53.3
    boundary_points = np.array([
        [x_min, y_min],
        [x_min, y_max],
        [x_max, y_min],
        [x_max, y_max]
    ])
    all_points = np.vstack([points, boundary_points])
    vor = Voronoi(all_points)
    boundary_polygon = Polygon([(x_min, y_min), (x_min, y_max), (x_max, y_max), (x_max, y_min)])

    clipped_polygons = []
    polygon_areas = []
    for region in vor.regions:
        if not -1 in region and len(region) > 0:
            polygon = Polygon([vor.vertices[i] for i in region])
            clipped_polygon = polygon.intersection(boundary_polygon)
            if not clipped_polygon.is_empty:
                clipped_polygons.append(clipped_polygon)
                polygon_areas.append(clipped_polygon.area)

    fig, ax = plt.subplots()


    for poly in clipped_polygons:
        x, y = poly.exterior.xy
        ax.fill(x, y, alpha=0.5, edgecolor='black', facecolor='white')

    clubs = samplePlayPoints['club'].unique()
    for club in clubs:
        club_points = samplePlayPoints[samplePlayPoints['club'] == club]
        club_color = club_colors.get(club, 'grey')
        ax.scatter(club_points['x'], club_points['y'], label=club, s=10, color=club_color)

    plt.ylim(0, 53.3)
    plt.xlim(0, 120)
    plt.legend()
    
    plt.show()

    player_info = {tuple(row[['x', 'y']].values): {'nflId': row['nflId'], 'position': row['position'], 'frameId': row['frameId'], 'playId': row['playId']} 
               for idx, row in filteredSamplePlayPoints.iterrows()}

    new_rows_list = []
    for poly, area in zip(clipped_polygons, polygon_areas):
        for point_coords, info_dict in player_info.items():
            point = Point(point_coords)
            if poly.contains(point) and info_dict['position'] in ['TE', 'RB', 'WR']:
                # Create a new row
                new_row = {
                    'nflId': info_dict['nflId'],
                    'playId': info_dict['playId'],
                    'frameId': info_dict['frameId'],
                    'position': info_dict['position'],
                    'voronoiArea': area
                }
                new_rows_list.append(new_row)
    info = pd.DataFrame(new_rows_list)
    return info
    

In [7]:
def clean_data_voronoi(sampleLoop):
    sampleLoop = sampleLoop[(sampleLoop['displayName']!="football")]
    sampleLoop = sampleLoop[(sampleLoop['frameType'] == "AFTER_SNAP") | (sampleLoop['frameType']=="SNAP")]
    sampleLoop = pd.merge(
        sampleLoop,
        plays[['gameId','playId','timeToThrow','timeToSack','pff_manZone', 'possessionTeam', 'yardlineSide', 'absoluteYardlineNumber','passResult','yardlineNumber']],
        how = "left",
        left_on = ['gameId','playId'],
        right_on=['gameId','playId']
    )
    sampleLoop = sampleLoop[sampleLoop['passResult'].notna()]
    sampleLoop = sampleLoop[sampleLoop['pff_manZone']=="Man"]
    snap_frames = sampleLoop[sampleLoop['frameType'] == 'SNAP'][['gameId', 'playId', 'frameId']]
    sampleLoop = pd.merge(sampleLoop, snap_frames, on=['gameId', 'playId'], how='left', suffixes=('', '_snapframe'))
    sampleLoop['snapframe'] = sampleLoop['frameId_snapframe']
    after_snap_data = sampleLoop.drop(columns=['frameId_snapframe'])
    timeSinceSnap = (after_snap_data['frameId']-after_snap_data['snapframe'])/10
    after_snap_data = after_snap_data[(timeSinceSnap > 1)&(timeSinceSnap<after_snap_data['timeToThrow'])]
    return after_snap_data

In [8]:
def max_voronoi_per_play(after_snap_data):
    results = []
    for name, group in after_snap_data.groupby(['gameId', 'playId', 'frameId']):
        frame = group[['nflId','club','x','y','position','gameId','playId','frameId','timeToThrow','snapframe','possessionTeam', 'yardlineSide','absoluteYardlineNumber','yardlineNumber']]
        frame['timeSinceSnap'] = (frame['frameId'] - frame['snapframe'])/10
        new_rows = []
        for _, row in frame.iterrows():
            if row['position'] in ['WR', 'TE', 'RB']:
                if row['possessionTeam'] == row['yardlineSide']:
                    if row['absoluteYardlineNumber'] < 60:
                        xpoint = row['x'] - 5
                    else:
                        xpoint = row['x'] + 5
                else:
                    if row['absoluteYardlineNumber'] > 60:
                        xpoint = row['x'] - 5
                    else:
                        xpoint = row['x'] + 5  
                new_row = {
                    'x': xpoint,
                    'y': row['y'],
                    'position': '',
                    'club': None,
                    'nflId': 0,
                    'gameId': row['gameId'],
                    'playId': row['playId'],
                    'frameId': row['frameId']
                }
                new_rows.append(new_row)
        new_rows_df = pd.DataFrame(new_rows)
        newFrames = pd.concat([frame, new_rows_df], ignore_index=True)
        points = newFrames[['x', 'y']].values
        x_min, x_max = 0, 120
        y_min, y_max = 0, 53.3
        boundary_points = np.array([
            [x_min, y_min],
            [x_min, y_max],
            [x_max, y_min],
            [x_max, y_max]
        ])
        all_points = np.vstack([points, boundary_points])
        vor = Voronoi(all_points)
        boundary_polygon = Polygon([(x_min, y_min), (x_min, y_max), (x_max, y_max), (x_max, y_min)])
        
        clipped_polygons = []
        polygon_areas = []
        for region in vor.regions:
            if not -1 in region and len(region) > 0:
                polygon = Polygon([vor.vertices[i] for i in region])
                clipped_polygon = polygon.intersection(boundary_polygon)
                if not clipped_polygon.is_empty:
                    clipped_polygons.append(clipped_polygon)
                    polygon_areas.append(clipped_polygon.area)

        player_info = {tuple(row[['x', 'y']].values): {'nflId': row['nflId'], 'position': row['position'], 'TTT': row['timeToThrow'], 'timeSinceSnap': row['timeSinceSnap'], 'yardline':row['x'],'absoluteYardLine':row['absoluteYardlineNumber'], 'playYardLine':row['yardlineNumber'], 'possessionTeam':row['possessionTeam'], 'yardlineSide':row['yardlineSide']} 
                    for idx, row in newFrames.iterrows()}
        for poly, area in zip(clipped_polygons, polygon_areas):
            for point_coords, info in player_info.items():
                point = Point(point_coords)
                if poly.contains(point) and info['position'] in ['TE','RB','WR']:
                    if info['possessionTeam'] == info['yardlineSide']:
                        if info['absoluteYardLine'] < 60:
                            downfield = info['yardline'] - info['absoluteYardLine']
                        else:
                            downfield = info['absoluteYardLine'] - info['yardline']
                    else:
                        if info['absoluteYardLine'] > 60:
                            downfield = info['yardline'] - info['absoluteYardLine']
                        else:
                            downfield = info['absoluteYardLine'] - info['yardline']
                    results.append({
                        'gameId': name[0],
                        'playId': name[1],
                        'frameId': name[2],
                        'nflId': info['nflId'],
                        'position': info['position'],
                        'voronoiArea': area,
                        'timeToThrow': info['TTT'],
                        'timeSinceSnap': info['timeSinceSnap'],
                        'yardsDownField': downfield
                    })
    results = pd.DataFrame(results)
    results['valueOfSeparation'] = (results['voronoiArea'] * (1.01750249 ** results['yardsDownField']))
    return results

In [9]:
def group_voronoi_results(results):
    max_separation_per_play = results.groupby(['gameId', 'playId', 'nflId'])['valueOfSeparation'].max().reset_index()
    max_separation_per_play.columns = ['gameId', 'playId', 'routeRunnerId', 'maxValueOfSeparation']
    
    result = pd.merge(
        max_separation_per_play,
        results[['gameId', 'playId', 'nflId', 'frameId', 'valueOfSeparation', 'timeSinceSnap', 'timeToThrow', 'yardsDownField']],  # Select necessary columns
        how='left',
        left_on=['gameId', 'playId', 'routeRunnerId', 'maxValueOfSeparation'],  # Join on the maximum separation
        right_on=['gameId', 'playId', 'nflId', 'valueOfSeparation']  # Join on the separation value
    )
    result = result.drop(columns=['nflId','valueOfSeparation'])
    return result

In [10]:
weekOneTracking = pd.read_csv("tracking_week_1.csv")

In [14]:
weekOneTracking = pd.merge(
    weekOneTracking,
    players[['nflId','position']],
    how = "left",
    left_on=['nflId'],
    right_on=['nflId']
)

In [15]:
sample = weekOneTracking[weekOneTracking['gameId']==2022091106]
cleanedSample = clean_data_voronoi(sample)
voronoiAreas = max_voronoi_per_play(cleanedSample)
groupedVoronoi = group_voronoi_results(voronoiAreas)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  frame['timeSinceSnap'] = (frame['frameId'] - frame['snapframe'])/10
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  frame['timeSinceSnap'] = (frame['frameId'] - frame['snapframe'])/10
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  frame['timeSinceSnap'] = (frame['frameId'] - frame['snapframe'])/10
A

In [None]:
voronoiRoutes = pd.merge(
    groupedVoronoi,
    playerplays[['nflId','gameId','playId','routeRan']],
    how = "left",
    left_on = ['gameId','playId','routeRunnerId'],
    right_on = ['gameId','playId','nflId']
)

In [None]:
voronoiYardline = pd.merge(
    voronoiRoutes,
    plays[['gameId','playId','absoluteYardlineNumber']],
    how="left",
    left_on =['gameId','playId'],
    right_on=['gameId','playId']
)

In [None]:
voronoiWeekOne = voronoiYardline[(voronoiYardline['routeRan'].notna())]
voronoiWeekOne.to_csv('voronoiWeekOne.csv')