In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
cur_path = "/content/drive/MyDrive/BDB 2024/"
os.chdir(cur_path)
!pwd

/content/drive/MyDrive/BDB 2024


In [3]:
!pip install pyspark

# The entry point to programming Spark with the DataFrame API.
from pyspark.sql import SparkSession

spark = SparkSession.builder.master("local[4]").appName("DataFrame").getOrCreate()

Collecting pyspark
  Downloading pyspark-3.5.0.tar.gz (316.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.9/316.9 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.0-py2.py3-none-any.whl size=317425345 sha256=3a959a69ff50a1c55f4f7673128e022aace35652da1c62046924468fff5f2579
  Stored in directory: /root/.cache/pip/wheels/41/4e/10/c2cf2467f71c678cfc8a6b9ac9241e5e44a01940da8fbb17fc
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.0


In [4]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 1000)

import dateutil
from math import radians
from IPython.display import Video

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import animation
from matplotlib.animation import FFMpegWriter

from pyspark.sql.functions import *
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import sqrt

In [5]:
# tracking = pd.read_csv("C:/Users/19012/Documents/BDB 2024/clean_tracking.csv")
tracking = spark.read.option("header", True).csv(cur_path+"/clean_tracking.csv")
# plays = pd.read_csv('C:/Users/19012/Documents/BDB 2024/nfl-big-data-bowl-2024/plays.csv')
plays = spark.read.option("header", True).csv(cur_path+"/nfl-big-data-bowl-2024/plays.csv")

In [6]:
tracking.show(5)

+---+----------+------+-------+--------------+-------+--------------------+------------+----+-------------+-----+-----+----+----+----+------+------+------------+-------------+----------------------+--------------------+-------+----+---------+--------------+-------------+------------+--------------+---------+----------------+-------------------+----------+----------+------------+--------------------+----------+----------------------+----------------------+----------------+-----------------+---------------+-----------------------------+--------------------------------+---------------------------+----------------------------+--------------+-------------------+---------+---------+----------+----------+-----------+------+------------------+------------------+-------------------+-------------------+------------------+
|_c0|    gameId|playId|  nflId|   displayName|frameId|                time|jerseyNumber|club|playDirection|    x|    y|   s|   a| dis|     o|   dir|       event|ballCarrierId|b

In [7]:
tracking = tracking.withColumn("nflId",tracking.nflId.cast('int'))
total_columns = tracking.columns
tracking_carrier = tracking
for i in range(len(total_columns)):
    tracking_carrier = tracking_carrier.withColumnRenamed(total_columns[i],
                                  total_columns[i] + '_carrier')

tracking_carrier = tracking.\
                        join(tracking_carrier, on=((tracking.ballCarrierId==tracking_carrier.nflId_carrier) &
                                                    (tracking.gameId==tracking_carrier.gameId_carrier) &
                                                   (tracking.playId==tracking_carrier.playId_carrier) &
                                                   (tracking.frameId==tracking_carrier.frameId_carrier) )).\
                        select('gameId',	'playId',	'nflId',	'displayName',	'frameId',	'x',	'X_std',	'y',	'Y_std',	'abs_x',	'dir',	'dir_std',	's',	'a',	'o',	'o_std',
                               'nflId_carrier',	'x_carrier',	'X_std_carrier',	'y_carrier',	'Y_std_carrier',	'abs_x_carrier',	's_carrier',	'dir_carrier',	'dir_std_carrier',
                               'a_carrier',	'o_carrier',	'o_std_carrier',	'club',	'defensiveTeam',	'possessionTeam',	'playDirection','down','yardsToGo','yardlineNumber')


In [8]:
tracking_carrier

DataFrame[gameId: string, playId: string, nflId: int, displayName: string, frameId: string, x: string, X_std: string, y: string, Y_std: string, abs_x: string, dir: string, dir_std: string, s: string, a: string, o: string, o_std: string, nflId_carrier: int, x_carrier: string, X_std_carrier: string, y_carrier: string, Y_std_carrier: string, abs_x_carrier: string, s_carrier: string, dir_carrier: string, dir_std_carrier: string, a_carrier: string, o_carrier: string, o_std_carrier: string, club: string, defensiveTeam: string, possessionTeam: string, playDirection: string, down: string, yardsToGo: string, yardlineNumber: string]

# Distance to Ball Carrier

Players closer to the ball carrier can take a more direct path

In [9]:
tracking_carrier = tracking_carrier.withColumn('dist_to_carrier',sqrt((tracking_carrier.X_std - tracking_carrier.X_std_carrier)**2 + (tracking_carrier.Y_std - tracking_carrier.Y_std_carrier)**2))

In [10]:
tracking_carrier

DataFrame[gameId: string, playId: string, nflId: int, displayName: string, frameId: string, x: string, X_std: string, y: string, Y_std: string, abs_x: string, dir: string, dir_std: string, s: string, a: string, o: string, o_std: string, nflId_carrier: int, x_carrier: string, X_std_carrier: string, y_carrier: string, Y_std_carrier: string, abs_x_carrier: string, s_carrier: string, dir_carrier: string, dir_std_carrier: string, a_carrier: string, o_carrier: string, o_std_carrier: string, club: string, defensiveTeam: string, possessionTeam: string, playDirection: string, down: string, yardsToGo: string, yardlineNumber: string, dist_to_carrier: double]

# Nearest Offensive Player

It will probably be difficult to make a tackle if someone is blocking the defender. This will account for that.

In [11]:
nearest_offensive = tracking_carrier#[(tracking_carrier['gameId']==2022090800) & (tracking_carrier['playId']==56) & (tracking_carrier['frameId']==1)]
# nearest_offensive = nearest_offensive.merge(nearest_offensive[['gameId','playId','frameId','nflId','X_std','Y_std','abs_x','club']],
#                                             how='left',
#                                             on=['gameId','playId','frameId'],
#                                            suffixes=['_def','_off'])

total_columns = nearest_offensive.columns
tracking_def = nearest_offensive
for i in range(len(total_columns)):
    tracking_def = tracking_def.withColumnRenamed(total_columns[i],
                                  total_columns[i] + '_def')

total_columns = nearest_offensive.columns
tracking_off = nearest_offensive
for i in range(len(total_columns)):
    tracking_off = tracking_off.withColumnRenamed(total_columns[i],
                                  total_columns[i] + '_off')

nearest_offensive = tracking_def.\
                        join(tracking_off, on=(
                                                    (tracking_def.gameId_def==tracking_off.gameId_off) &
                                                   (tracking_def.playId_def==tracking_off.playId_off) &
                                                   (tracking_def.frameId_def==tracking_off.frameId_off) ))

nearest_offensive = nearest_offensive.filter((nearest_offensive['club_off'] != nearest_offensive['club_def']) & (nearest_offensive['nflId_carrier_def'] != nearest_offensive['nflId_off']))
nearest_offensive = nearest_offensive.withColumn('dist_to_off',sqrt((nearest_offensive.X_std_def - nearest_offensive.X_std_off)**2 + (nearest_offensive.Y_std_def - nearest_offensive.Y_std_off)**2))
# # rollup on closest distance to blocker
# nearest_offensive = nearest_offensive.groupby(['gameId','playId','nflId_def','frameId']).min().reset_index()#[['gameId','playId','nflId','frameId','distance_blocker']]
nearest_offensive = nearest_offensive.groupBy(['gameId_def','playId_def','nflId_def','frameId_def']).agg({'dist_to_off': 'min'})
# nearest_offensive.head()

In [12]:
nearest_offensive

DataFrame[gameId_def: string, playId_def: string, nflId_def: int, frameId_def: string, min(dist_to_off): double]

In [13]:
nearest_offensive = nearest_offensive.withColumnRenamed("min(dist_to_off)",
                                                          "dist_to_blocker")

nearest_offensive = tracking_carrier.\
                      join(nearest_offensive, on=(
                                                  (tracking_carrier.gameId==nearest_offensive.gameId_def) &
                                                  (tracking_carrier.playId==nearest_offensive.playId_def) &
                                                  (tracking_carrier.frameId==nearest_offensive.frameId_def) &
                                                  (tracking_carrier.nflId==nearest_offensive.nflId_def))
                      )

In [14]:
nearest_offensive

DataFrame[gameId: string, playId: string, nflId: int, displayName: string, frameId: string, x: string, X_std: string, y: string, Y_std: string, abs_x: string, dir: string, dir_std: string, s: string, a: string, o: string, o_std: string, nflId_carrier: int, x_carrier: string, X_std_carrier: string, y_carrier: string, Y_std_carrier: string, abs_x_carrier: string, s_carrier: string, dir_carrier: string, dir_std_carrier: string, a_carrier: string, o_carrier: string, o_std_carrier: string, club: string, defensiveTeam: string, possessionTeam: string, playDirection: string, down: string, yardsToGo: string, yardlineNumber: string, dist_to_carrier: double, gameId_def: string, playId_def: string, nflId_def: int, frameId_def: string, dist_to_blocker: double]

# Number of Blockers Between Defender and Carrier

Similar to the above where we try to account for difficulty of getting to the ball carrier.

Maybe do something similar to isBlocked. If the offensive player can get to an intersecting spot before the defensive player, then the defensive player is blocked. Count the number of offensive players that can get to the intersecting spot to determine the number of blockers remaining.

# Field Control?

I hypothesize that if the defender controls the field in between them and the ball carrier, then this will allow them to take a more direct path. Therefore, someone who doesn't control the field will have a different optimal pursuit angle than someone who controls that field.

# All Players on One Row

https://github.com/taidnguyen/big-data-bowl-2022/blob/master/main.ipynb

# Label

The label will be pursuit angle.

Use the following 3 points to create an angle:
- ball carrier
- defender
- intersection (calculated by extending the direction)

In [15]:
def line_coefs(x1, y1, dir, length=150):
    """
    length is arbitrarily long, so 150
    return coefs (A, B, C) of line equation by two points provided
    """
    x2 = x1 + length*F.sin(dir)
    y2 = y1 + length*F.cos(dir)
    A = (y1 - y2)
    B = (x2 - x1)
    C = (x1*y2 - x2*y1)
    return A, B, -C

def intersection(A_def,B_def,C_def,A_carrier,B_carrier,C_carrier):
    """
    line L is tuple size 3
    return intersection point (x, y) between blocker line, defender line
    """
    D  = A_def * B_carrier - B_def * A_carrier
    Dx = C_def * B_carrier - B_def * C_carrier
    Dy = A_def * C_carrier - C_def * A_carrier
    #if D != 0:
    # x = np.where(D != 0,  Dx / D, np.nan)
    x = when(D != 0,  Dx / D).otherwise(None)
    # y = np.where(D != 0,  Dy / D, np.nan)
    y = when(D != 0,  Dy / D).otherwise(None)
        #x = Dx / D
        #y = Dy / D
    return x,y
    #else:
     #   return False

In [16]:
defender = nearest_offensive
# convert strings to int
defender = defender.withColumn("X_std",defender.X_std.cast('int')).withColumn("Y_std",defender.Y_std.cast('int')).withColumn("dir_std",defender.dir_std.cast('int'))

In [17]:
# defender['def_line_A'] = line_coefs(defender['abs_x_defender'], defender['Y_std_defender'],  defender['dir_std_defender'], length=150)[0]
# defender['def_line_B'] = line_coefs(defender['abs_x_defender'], defender['Y_std_defender'],  defender['dir_std_defender'], length=150)[1]
# defender['def_line_C'] = line_coefs(defender['abs_x_defender'], defender['Y_std_defender'],  defender['dir_std_defender'], length=150)[2]
# nearest_offensive['def_line_A'],nearest_offensive['def_line_B'],nearest_offensive['def_line_C'] = line_coefs(nearest_offensive['X_std'], nearest_offensive['Y_std'],  nearest_offensive['dir_std'], length=150)
nearest_offensive = nearest_offensive.withColumn('def_line_A',line_coefs(nearest_offensive['X_std'], nearest_offensive['Y_std'],  nearest_offensive['dir_std'], length=150)[0])
nearest_offensive = nearest_offensive.withColumn('def_line_B',line_coefs(nearest_offensive['X_std'], nearest_offensive['Y_std'],  nearest_offensive['dir_std'], length=150)[1])
nearest_offensive = nearest_offensive.withColumn('def_line_C',line_coefs(nearest_offensive['X_std'], nearest_offensive['Y_std'],  nearest_offensive['dir_std'], length=150)[2])
# defender['def_line_A'],defender['def_line_B'],defender['def_line_C'],defender['def_line_x2'],defender['def_line_y2'] = line_coefs(defender['x_defender'], defender['y_defender'],  defender['dir_defender'], length=150)

# defender['carrier_line_A'] = line_coefs(defender['x'], defender['y'],  defender['dir_carrier'], length=150)[0]
# defender['carrier_line_B'] = line_coefs(defender['x'], defender['y'],  defender['dir_carrier'], length=150)[1]
# defender['carrier_line_C'] = line_coefs(defender['x'], defender['y'],  defender['dir_carrier'], length=150)[2]
# nearest_offensive['carrier_line_A'],nearest_offensive['carrier_line_B'],nearest_offensive['carrier_line_C'] = line_coefs(nearest_offensive['X_std_carrier'], nearest_offensive['Y_std_carrier'],  nearest_offensive['dir_std_carrier'], length=150)
nearest_offensive = nearest_offensive.withColumn('carrier_line_A',line_coefs(nearest_offensive['X_std_carrier'], nearest_offensive['Y_std_carrier'],  nearest_offensive['dir_std_carrier'], length=150)[0])
nearest_offensive = nearest_offensive.withColumn('carrier_line_B',line_coefs(nearest_offensive['X_std_carrier'], nearest_offensive['Y_std_carrier'],  nearest_offensive['dir_std_carrier'], length=150)[1])
nearest_offensive = nearest_offensive.withColumn('carrier_line_C',line_coefs(nearest_offensive['X_std_carrier'], nearest_offensive['Y_std_carrier'],  nearest_offensive['dir_std_carrier'], length=150)[2])

# nearest_offensive['intersect_x'] = intersection(nearest_offensive['def_line_A'],nearest_offensive['def_line_B'],nearest_offensive['def_line_C'],nearest_offensive['carrier_line_A'],nearest_offensive['carrier_line_B'],nearest_offensive['carrier_line_C'])[0]
nearest_offensive = nearest_offensive.withColumn("intersect_x",intersection(nearest_offensive['def_line_A'],nearest_offensive['def_line_B'],nearest_offensive['def_line_C'],nearest_offensive['carrier_line_A'],nearest_offensive['carrier_line_B'],nearest_offensive['carrier_line_C'])[0])
# nearest_offensive['intersect_y'] = intersection(nearest_offensive['def_line_A'],nearest_offensive['def_line_B'],nearest_offensive['def_line_C'],nearest_offensive['carrier_line_A'],nearest_offensive['carrier_line_B'],nearest_offensive['carrier_line_C'])[1]
nearest_offensive = nearest_offensive.withColumn("intersect_y",intersection(nearest_offensive['def_line_B'],nearest_offensive['def_line_C'],nearest_offensive['carrier_line_A'],nearest_offensive['carrier_line_B'],nearest_offensive['carrier_line_C'],nearest_offensive['carrier_line_C'])[1])

In [18]:
nearest_offensive

DataFrame[gameId: string, playId: string, nflId: int, displayName: string, frameId: string, x: string, X_std: string, y: string, Y_std: string, abs_x: string, dir: string, dir_std: string, s: string, a: string, o: string, o_std: string, nflId_carrier: int, x_carrier: string, X_std_carrier: string, y_carrier: string, Y_std_carrier: string, abs_x_carrier: string, s_carrier: string, dir_carrier: string, dir_std_carrier: string, a_carrier: string, o_carrier: string, o_std_carrier: string, club: string, defensiveTeam: string, possessionTeam: string, playDirection: string, down: string, yardsToGo: string, yardlineNumber: string, dist_to_carrier: double, gameId_def: string, playId_def: string, nflId_def: int, frameId_def: string, dist_to_blocker: double, def_line_A: double, def_line_B: double, def_line_C: double, carrier_line_A: double, carrier_line_B: double, carrier_line_C: double, intersect_x: double, intersect_y: double]

In [19]:
# nearest_offensive_pd_df = nearest_offensive.filter((nearest_offensive['gameId']==2022090800) & (nearest_offensive['playId']==101)).toPandas()
# nearest_offensive_pd_df.head()

In [20]:
tracking_carrier = tracking_carrier.filter((tracking_carrier['gameId']==2022090800) & (tracking_carrier['playId']==101)).toPandas()


ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/usr/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: ignored

In [None]:
tracking = tracking.filter((tracking['gameId']==2022090800) & (tracking['playId']==101)).toPandas()

In [None]:
# let's see what this looks like
def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=55,
                          highlight_first_down_line=False,
                          yards_to_go=10,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12, 6.33)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='darkgreen', zorder=0)

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='white')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='white')
        ax.plot([x, x], [53.0, 52.5], color='white')
        ax.plot([x, x], [22.91, 23.57], color='white')
        ax.plot([x, x], [29.73, 30.39], color='white')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        #plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
        #         color='yellow')

    if highlight_first_down_line:
        fl = hl + yards_to_go
        plt.plot([fl, fl], [0, 53.3], color='yellow')
        #plt.text(fl + 2, 50, '<- {}'.format(highlighted_name),
        #         color='yellow')
    return fig, ax

In [None]:
defender = nearest_offensive_pd_df
defender_new = defender.merge(tracking_carrier[['gameId','playId','nflId','frameId','club']],
               how='left',
               left_on=['gameId','playId','frameId','nflId'],
               right_on=['gameId','playId','frameId','nflId'],
                             suffixes=['_carrier',''])

In [None]:
defender_new = defender_new.merge(tracking[['gameId','playId','nflId','frameId','jerseyNumber']])

In [None]:
defender_new = defender_new.merge(tracking[['gameId','playId','nflId','frameId','jerseyNumber']],
                   how='left',
                   left_on=['gameId','playId','nflId_carrier','frameId'],
                   right_on=['gameId','playId','nflId','frameId'],
                  suffixes=['','_carrier'])

In [None]:
# we need to flip everything because we flipped the directions
def calculate_dx_dy_arrow(x, y, angle, speed, multiplier):
    if angle <= 90:
#         angle = angle
#         dx = np.sin(radians(angle)) * multiplier * speed
#         dy = np.cos(radians(angle)) * multiplier * speed

        angle = angle - 180
        dx = -(np.sin(radians(angle)) * multiplier * speed)
        dy = -(np.cos(radians(angle)) * multiplier * speed)
        return dx, dy
    if angle > 90 and angle <= 180:
#         angle = angle - 90
#         dx = np.sin(radians(angle)) * multiplier * speed
#         dy = -np.cos(radians(angle)) * multiplier * speed
        angle = 360 - angle
        dx = -np.sin(radians(angle)) * multiplier * speed
        dy = np.cos(radians(angle)) * multiplier * speed
        return dx, dy
    if angle > 180 and angle <= 270:
#         angle = angle - 180
#         dx = -(np.sin(radians(angle)) * multiplier * speed)
#         dy = -(np.cos(radians(angle)) * multiplier * speed)

        angle = angle
        dx = np.sin(radians(angle)) * multiplier * speed
        dy = np.cos(radians(angle)) * multiplier * speed
        return dx, dy
    if angle > 270 and angle <= 360:
#         angle = 360 - angle
#         dx = -np.sin(radians(angle)) * multiplier * speed
#         dy = np.cos(radians(angle)) * multiplier * speed
        angle = angle - 90
        dx = np.sin(radians(angle)) * multiplier * speed
        dy = -np.cos(radians(angle)) * multiplier * speed
        return dx, dy

In [None]:
def animate_player_movement_select_players(weekNumber, playId, gameId, nflId_home, nflId_away):
    weekData = pd.read_csv('C:/Users/19012/Documents/BDB 2024/nfl-big-data-bowl-2024/tracking_week_' + str(weekNumber) + '.csv')
    weekData = weekData[weekData['displayName']!='football']
    weekData = weekData[['gameId','playId','nflId','frameId','time']].drop_duplicates().merge(defender_new, how='left', on=['gameId','playId','nflId','frameId'])
    playData = pd.read_csv('C:/Users/19012/Documents/BDB 2024/nfl-big-data-bowl-2024/plays.csv')

    playHome = weekData.query('gameId==' + str(gameId) + ' and playId==' + str(playId) + ' and club == "LA"' + 'and nflId_carrier ==' + str(nflId_home))
    #playAway = weekData.query('gameId==' + str(gameId) + ' and playId==' + str(playId) + ' and club == "LA"')
    playAway = weekData[(weekData['gameId']==gameId) & (weekData['playId']==playId) & (weekData['club']=='LA') & (weekData['nflId_defender'].isin(nflId_away))]
    #playFootball = weekData.query('gameId==' + str(gameId) + ' and playId==' + str(playId) + ' and club == "football"')

    playHome['time'] = playHome['time'].apply(lambda x: dateutil.parser.parse(x).timestamp()).rank(method='dense')
    playAway['time'] = playAway['time'].apply(lambda x: dateutil.parser.parse(x).timestamp()).rank(method='dense')
    #playFootball['time'] = playFootball['time'].apply(lambda x: dateutil.parser.parse(x).timestamp()).rank(method='dense')

    maxTime = int(playAway['time'].unique().max())
    minTime = int(playAway['time'].unique().min())

    yardlineNumber = playData.query('gameId==' + str(gameId) + ' and playId==' + str(playId))['yardlineNumber'].item()
    yardsToGo = playData.query('gameId==' + str(gameId) + ' and playId==' + str(playId))['yardsToGo'].item()
    absoluteYardlineNumber = playData.query('gameId==' + str(gameId) + ' and playId==' + str(playId))['absoluteYardlineNumber'].item() - 10
    playDir = playHome.sample(1)['playDirection'].item()

    if (absoluteYardlineNumber > 50):
        yardlineNumber = 100 - yardlineNumber
    if (absoluteYardlineNumber <= 50):
        yardlineNumber = yardlineNumber

    if (playDir == 'left'):
        yardsToGo = -yardsToGo
    else:
        yardsToGo = yardsToGo

    fig, ax = create_football_field(highlight_line=True, highlight_line_number=yardlineNumber, highlight_first_down_line=True, yards_to_go=yardsToGo)
    playDesc = playData.query('gameId==' + str(gameId) + ' and playId==' + str(playId))['playDescription'].item()
    plt.title(f'Game # {gameId} Play # {playId} \n {playDesc}')

    def update_animation(time):
        patch = []

        homeX = playHome.query('time == ' + str(time))['X_std_carrier']
        homeY = playHome.query('time == ' + str(time))['Y_std_carrier']
        homeNum = playHome.query('time == ' + str(time))['jerseyNumber_carrier']
        homeOrient = playHome.query('time == ' + str(time))['o_std_carrier']
        homeDir = playHome.query('time == ' + str(time))['dir_std_carrier']
        homeSpeed = playHome.query('time == ' + str(time))['s_carrier']
        #patch.extend(plt.plot(homeX, homeY, 'o',c='gold', ms=20, mec='white'))
        patch.extend(plt.plot(np.array(homeX), np.array(homeY), 'o',c='gold', ms=20, mec='white'))

        # Home players' jersey number
        for x, y, num in zip(homeX, homeY, homeNum):
            patch.append(plt.text(x, y, int(num), va='center', ha='center', color='black', size='medium'))

        # Home players' orientation
        for x, y, orient in zip(homeX, homeY, homeOrient):
            dx, dy = calculate_dx_dy_arrow(x, y, orient, 1, 1)
            patch.append(plt.arrow(x, y, dx, dy, color='gold', width=0.5, shape='full'))

        # Home players' direction
        for x, y, direction, speed in zip(homeX, homeY, homeDir, homeSpeed):
            dx, dy = calculate_dx_dy_arrow(x, y, direction, speed, 1)
            patch.append(plt.arrow(x, y, dx, dy, color='black', width=0.25, shape='full'))

        # Home players' location
        awayX = playAway.query('time == ' + str(time))['X_std_defender']
        awayY = playAway.query('time == ' + str(time))['Y_std_defender']
        awayNum = playAway.query('time == ' + str(time))['jerseyNumber']
        awayOrient = playAway.query('time == ' + str(time))['o_std_defender']
        awayDir = playAway.query('time == ' + str(time))['dir_std_defender']
        awaySpeed = playAway.query('time == ' + str(time))['s_defender']
        #patch.extend(plt.plot(awayX, awayY, 'o',c='orangered', ms=20, mec='white'))
        patch.extend(plt.plot(np.array(awayX), np.array(awayY), 'o',c='orangered', ms=20, mec='white'))
        plt.title(f'Game # {gameId} Play # {playId} \n {playDesc} {time}')


        # Away players' jersey number
        for x, y, num in zip(awayX, awayY, awayNum):
            patch.append(plt.text(x, y, int(num), va='center', ha='center', color='white', size='medium'))

        # Away players' orientation
        for x, y, orient in zip(awayX, awayY, awayOrient):
            dx, dy = calculate_dx_dy_arrow(x, y, orient, 1, 1)
            patch.append(plt.arrow(x, y, dx, dy, color='orangered', width=0.5, shape='full'))

        # Away players' direction
        for x, y, direction, speed in zip(awayX, awayY, awayDir, awaySpeed):
            dx, dy = calculate_dx_dy_arrow(x, y, direction, speed, 1)
            patch.append(plt.arrow(x, y, dx, dy, color='black', width=0.25, shape='full'))

        # Away players' location
        #footballX = playFootball.query('time == ' + str(time))['x']
        #footballY = playFootball.query('time == ' + str(time))['y']
        #patch.extend(plt.plot(np.array(footballX), np.array(footballY), 'o', c='black', ms=10, mec='white', data=playFootball.query('time == ' + str(time))['club']))

         # Intersect location
        intersectX = playAway.query('time == ' + str(time))['intersect_x']
        intersectY = playAway.query('time == ' + str(time))['intersect_y']
        #awayNum = playAway.query('time == ' + str(time))['jerseyNumber']
        #awayOrient = playAway.query('time == ' + str(time))['o_defender']
        #awayDir = playAway.query('time == ' + str(time))['dir_defender']
        #awaySpeed = playAway.query('time == ' + str(time))['s_defender']
        #patch.extend(plt.plot(awayX, awayY, 'o',c='orangered', ms=20, mec='white'))
        patch.extend(plt.plot(np.array(intersectX), np.array(intersectY), 'x',c='red', ms=20, mec='white'))
        plt.title(f'Game # {gameId} Play # {playId} \n {playDesc} {time}')

        return patch

    ims = [[]]
    for time in np.arange(minTime, maxTime+1):
        patch = update_animation(time)
        ims.append(patch)

    anim = animation.ArtistAnimation(fig, ims, repeat=True)

    return anim

In [None]:
anim = animate_player_movement_select_players(1, 101, 2022090800,47857,[42816])

FileNotFoundError: ignored

x marks the spot. In this example, defensive player #45 should take run towards the x as this is the point of intersection. We can use this point to calculate the pursuit angle.

We need to edit the directions such that the directions match the standard x and y so we can view each play moving in same direction. This will be beneficial for the model.

In [None]:
writer = FFMpegWriter(fps=10)
anim.save('animation_notrail_intersect.mp4', writer=writer)
Video("animation_notrail_intersect.mp4")

The intersection point never settles, but I think that's fine because the directions of the players are always changing. It is cool to see how when the players direction lines almost touch, the intersection line settles on that point.

In [21]:
# use the intersecting point as the angle point
# test['pursuit_angle']
def angle_between_three_points(pointA, pointB, pointC):
    BA = pointA - pointB
    BC = pointC - pointB

    try:
        cosine_angle = np.dot(BA, BC) / (np.linalg.norm(BA) * np.linalg.norm(BC))
        angle = np.arccos(cosine_angle)
    except:
        print("exc")
        raise Exception('invalid cosine')

    return np.degrees(angle)

In [None]:
# defender_new.head()

formula: https://muthu.co/using-the-law-of-cosines-and-vector-dot-product-formula-to-find-the-angle-between-three-points/

In [None]:
# # intersection to ball carrier
# AB2 = ((defender_new['intersect_x'])**2 - (defender_new['abs_x_carrier'])**2) + ((defender_new['intersect_y'])**2 - (defender_new['Y_std_carrier'])**2)
# # intersection to defender
# AC2 = ((defender_new['intersect_x'])**2 - (defender_new['abs_x_defender'])**2) + ((defender_new['intersect_y'])**2 - (defender_new['Y_std_defender'])**2)
# # ball carrier to defender
# BC2 = ((defender_new['abs_x_carrier'])**2 - (defender_new['abs_x_defender'])**2) + ((defender_new['Y_std_carrier'])**2 - (defender_new['Y_std_defender'])**2)
# numerator = AB2 + AC2 - BC2
# # distance from intersect to ball carrier
# AB = np.sqrt(((defender_new['intersect_x']-defender_new['abs_x_carrier'])**2) + ((defender_new['intersect_y']-defender_new['Y_std_carrier'])**2))
# # distance from intersect to defender
# AC = np.sqrt(((defender_new['intersect_x']-defender_new['abs_x_defender'])**2) + ((defender_new['intersect_y']-defender_new['Y_std_defender'])**2))
# denominator = 2 * (AB * AC)
# numerator/denominator
# # defender_new['pursuit_angle'] =np.arccos(numerator/denominator)
# # defender_new

In [22]:
tracking_carrier = tracking_carrier.withColumn('dist_to_carrier',sqrt((tracking_carrier.X_std - tracking_carrier.X_std_carrier)**2 + (tracking_carrier.Y_std - tracking_carrier.Y_std_carrier)**2))

In [23]:
# intersection to ball carrier
AB2 = (nearest_offensive['intersect_x'] - nearest_offensive['X_std_carrier'])**2 + (nearest_offensive['intersect_y'] - nearest_offensive['Y_std_carrier'])**2
# intersection to defender
AC2 = (nearest_offensive['intersect_x'] - nearest_offensive['X_std'])**2 + (nearest_offensive['intersect_y'] - nearest_offensive['Y_std'])**2
# ball carrier to defender
BC2 = (nearest_offensive['X_std_carrier'] - nearest_offensive['X_std'])**2 + (nearest_offensive['Y_std_carrier'] - nearest_offensive['Y_std'])**2
numerator = AB2 + AC2 - BC2
# distance from intersect to ball carrier
AB = sqrt(((nearest_offensive['intersect_x']-nearest_offensive['X_std_carrier'])**2) + ((nearest_offensive['intersect_y']-nearest_offensive['y'])**2))
# distance from intersect to defender
AC = sqrt(((nearest_offensive['intersect_x']-nearest_offensive['X_std'])**2) + ((nearest_offensive['intersect_y']-nearest_offensive['Y_std'])**2))
denominator = 2 * (AB * AC)
# nearest_offensive['pursuit_angle'] = np.degrees(np.arccos(numerator/denominator))
nearest_offensive = nearest_offensive.withColumn('pursuit_angle',F.degrees(F.acos(numerator/denominator)))
nearest_offensive

DataFrame[gameId: string, playId: string, nflId: int, displayName: string, frameId: string, x: string, X_std: string, y: string, Y_std: string, abs_x: string, dir: string, dir_std: string, s: string, a: string, o: string, o_std: string, nflId_carrier: int, x_carrier: string, X_std_carrier: string, y_carrier: string, Y_std_carrier: string, abs_x_carrier: string, s_carrier: string, dir_carrier: string, dir_std_carrier: string, a_carrier: string, o_carrier: string, o_std_carrier: string, club: string, defensiveTeam: string, possessionTeam: string, playDirection: string, down: string, yardsToGo: string, yardlineNumber: string, dist_to_carrier: double, gameId_def: string, playId_def: string, nflId_def: int, frameId_def: string, dist_to_blocker: double, def_line_A: double, def_line_B: double, def_line_C: double, carrier_line_A: double, carrier_line_B: double, carrier_line_C: double, intersect_x: double, intersect_y: double, pursuit_angle: double]

In [None]:
# tracking_carrier['pursuit_angle'] = abs(tracking_carrier['dir_carrier'] - tracking_carrier['dir'])

# isBlocked (https://www.kaggle.com/code/jrudoler56/optimal-run-path-for-kick-returners/notebook#The-Regression)

- if blocked, then that will alter the pursuit angle
- this metric coupled with distance to nearest offensive player will work well in determining the pursuit angle. For example, if the defender is not blocked and has an offensive player within 1 yard, then the defender will still not be able to take as direct a path as they would hope to take.

# cannotCatchBallCarrier (https://www.kaggle.com/code/jrudoler56/optimal-run-path-for-kick-returners/notebook#The-Regression)

- ableToTackle (https://www.kaggle.com/code/jrudoler56/optimal-run-path-for-kick-returners/notebook#The-Regression)
    - (1-isBlocked) * (1-cannotCatchBallCarrier)
        - must = 1 in order for the defender to have the ability to tackle the ball carrier

# Final DF

In [24]:
columns = ['gameId','playId','frameId','nflId', 'X_std', 'Y_std', 'dir_std', 's', 'a', 'o_std', 'X_std_carrier', 'Y_std_carrier', 's_carrier', 'dir_std_carrier',
           'a_carrier', 'o_std_carrier','dist_to_carrier', 'dist_to_blocker', 'pursuit_angle','down', 'yardsToGo', 'yardlineNumber']

final_df = nearest_offensive.select(columns)

In [None]:
# keep defenders
# final_df = nearest_offensive[nearest_offensive['club_def']!=nearest_offensive['possessionTeam']]

# necessary columns
# final_df = final_df[['gameId','playId','nflId_def','frameId','Y_std_def','abs_x_def','dir','s',
#                               'Y_std_carrier','abs_x_carrier','dir_carrier','pursuit_angle','dist_to_carrier',
#                               'Y_std_off','abs_x_off','dist_to_off']]

In [25]:
# final_df.to_csv("C:/Users/19012/Documents/BDB 2024/data_for_model.csv")
# final_df.write.mode('overwrite').csv('./data_for_model.csv')
final_df.write.option("header",True).mode('overwrite').parquet("./data_for_model.parquet")

In [None]:
!pwd

/content/drive/MyDrive/BDB 2024
