# CSE611 NFL Project

### Acquire NFL dataset

In [None]:
!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

cp: cannot stat '/content/kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [None]:
!kaggle competitions download -c nfl-big-data-bowl-2021

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 4, in <module>
    from kaggle.cli import main
  File "/usr/local/lib/python3.12/dist-packages/kaggle/__init__.py", line 6, in <module>
    api.authenticate()
  File "/usr/local/lib/python3.12/dist-packages/kaggle/api/kaggle_api_extended.py", line 434, in authenticate
    raise IOError('Could not find {}. Make sure it\'s located in'
OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method. See setup instructions at https://github.com/Kaggle/kaggle-api/


In [None]:
!unzip nfl-big-data-bowl-2021.zip -d nfl-data/
!rm -rf nfl-big-data-bowl-2021.zip

unzip:  cannot find or open nfl-big-data-bowl-2021.zip, nfl-big-data-bowl-2021.zip.zip or nfl-big-data-bowl-2021.zip.ZIP.


### NFL Data Analysis

In [None]:
# import packages
%matplotlib notebook
from IPython import display
!pip install bezier
import os
import pandas as pd
import numpy as np
import bezier
import matplotlib.pyplot as plt
from itertools import combinations
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.animation as animation

Collecting bezier
  Downloading bezier-2024.6.20-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Downloading bezier-2024.6.20-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bezier
Successfully installed bezier-2024.6.20


In [None]:
if os.path.exists('./nfl-data'):
    playsData = pd.read_csv('./nfl-data/plays.csv')
    gamesData = pd.read_csv('./nfl-data/games.csv')

playsData.head()

NameError: name 'playsData' is not defined

In [None]:
playsData['numberOfDl'] = playsData['personnelD'].str.strip(',').str[0]

In [None]:
playsData.numberOfDl = pd.to_numeric(playsData.numberOfDl).astype('float64')

In [None]:
# We have to filter out any plays with more than 5 and less than 3 defensive linemen, because of data inconsistency (some DL are asigned as linebackers and some linebackers are asigned as DL)
blitzes1 = playsData[playsData['numberOfPassRushers'] > playsData['numberOfDl']]
blitzes2 = blitzes1[blitzes1['numberOfDl'] > 2]
blitzes3 = blitzes2[blitzes2['numberOfDl'] < 6]
print(blitzes1.shape[0])
print("Data loss from cutting out missassigned plays:", blitzes1.shape[0] - blitzes3.shape[0], " plays.")
blitzpercentage = blitzes1.shape[0]/playsData.shape[0] * 100

print(blitzpercentage)
print(playsData['yardsToGo'].mean())

In [None]:
print(f'Total number of blitz instances: {blitzes1.shape[0]} out of {playsData.shape[0]}')

In [None]:
# Putting every track data into one dataframe
if os.path.exists('./nfl-data'):
    weeks = pd.read_csv('./nfl-data/week1.csv')

    for index in range(2, 18):
      week = pd.read_csv(f'./nfl-data/week{index}.csv')
      weeks = pd.concat([weeks, week], ignore_index=True)

weeks.head()

In [None]:
blitzes3.info()

In [None]:
weeks.info()

In [None]:
# Use blitzes3. Use kmeans. similarity function would be something like into which area the players go to (gaps). And similar position of each (x,y) point RELATIVE to the qb.
#Gaps (for now) are defined as 2 yards wide each. 6 gaps on each side of the position of the QB.

In [None]:
# Function to calculate the euclidian distance
def numpy_euclidian_distance_short(point_1, point_2):
    return np.sqrt(np.sum(np.square(np.array(point_1) - np.array(point_2))))

# only distance on y axis matters for gap criteria
def distance_to_gap(gap_y, player_y):
  return np.absolute(gap_y - player_y)

# Get Gaps' position by middle point and the right side or left side of middle point
# Ex:
#                                            left | right
# F - * - E - * - D - * - C - * - B - * - A - * - M - * - A - ...
#                                     3       1       1
def get_gaps_position(LOS, middle_point, gapSide, playDirection):
  # doesnt that depend on which side the offense is going to? If offense is going towards the visitor endzone, right would be decreasing in coordinates and left would be increasing. See https://www.kaggle.com/competitions/nfl-big-data-bowl-2020/data
  if playDirection == 'left':
    if gapSide == 'right':
      return {
        'A': [middle_point + 1, LOS],
        'B': [middle_point + 3, LOS],
        'C': [middle_point + 5, LOS],
        'D': [middle_point + 7, LOS],
        'E': [middle_point + 9, LOS],
        'F': [middle_point + 11, LOS],
      }
    else:
      return {
        'A': [middle_point - 1, LOS],
        'B': [middle_point - 3, LOS],
        'C': [middle_point - 5, LOS],
        'D': [middle_point - 7, LOS],
        'E': [middle_point - 9, LOS],
        'F': [middle_point - 11, LOS],
      }
  else:
    if gapSide == 'right':
      return {
        'A': [middle_point - 1, LOS],
        'B': [middle_point - 3, LOS],
        'C': [middle_point - 5, LOS],
        'D': [middle_point - 7, LOS],
        'E': [middle_point - 9, LOS],
        'F': [middle_point - 11, LOS],
      }
    else:
      return {
        'A': [middle_point + 1, LOS],
        'B': [middle_point + 3, LOS],
        'C': [middle_point + 5, LOS],
        'D': [middle_point + 7, LOS],
        'E': [middle_point + 9, LOS],
        'F': [middle_point + 11, LOS],
      }

# Calcualte players position between each gap
def get_distance_from_each_gaps(players_position, middle_point, LOS, playDirection): #added playdirection
  gap_dict = { "A": 0, "B": 0, "C": 0, "D": 0, "E": 0, "F": 0 }

  for index, row in players_position.iterrows():
    x = row['x']
    y = row['y']
    dir = 'right' if y > middle_point else 'left'
    gaps = get_gaps_position(LOS, middle_point, dir, playDirection) #added playdirection

    for key in gaps:
      gap_dict[key] += distance_to_gap(gaps[key][0], y)
  # Calculate the average. <- Why the average?
  for key in gap_dict:
    gap_dict[key] /= len(players_position)

  return gap_dict

In [None]:
def gap_detection(x, y, LOS, middle_point, gapSide, playDirection):
  if playDirection == 'right':
    if x <= LOS and x >= LOS - 1.5:
      if abs(y - middle_point) <= 2:
        return 'A'
      if abs(y - middle_point) <= 4:
        return 'B'
      if abs(y - middle_point) <= 6:
        return 'C'
      if abs(y - middle_point) <= 8:
        return 'D'
      if abs(y - middle_point) <= 10:
        return 'E'
      if abs(y - middle_point) <= 12:
        return 'F'
  else:
    if x >= LOS and x <= LOS + 1.5:
      if middle_point - 2 <= y <= middle_point + 2:
        return 'A'
      if (middle_point - 4 <= y <= middle_point - 2) or (middle_point + 2 <= y <= middle_point + 4):
        return 'B'
      if (middle_point - 6 <= y <= middle_point - 4) or (middle_point + 4 <= y <= middle_point + 6):
        return 'C'
      if (middle_point - 8 <= y <= middle_point - 6) or (middle_point + 6 <= y <= middle_point + 8):
        return 'D'
      if (middle_point - 10 <= y <= middle_point - 8) or (middle_point + 8 <= y <= middle_point + 10):
        return 'E'
      if (middle_point - 12 <= y <= middle_point - 10) or (middle_point + 10 <= y <= middle_point + 12):
        return 'F'
  return ''

def get_gap_info(players_position, middle_point, LOS, playDirection, playId):
  gap_count_dict = { "A": 0, "B": 0, "C": 0, "D": 0, "E": 0, "F": 0 }
  counted_players = []  # Variable for us to know which players go pass the gap
  crossing_combinations = [] # Array to save every crossing combination. Format ex: ['A: SS, B: MLB', 'C: MLB, C: ILB']
  frame_dict = {} # Dictionary to know the frame when each players(nflId) go pass the gap. Format ex: { '123': 14, '234': 15 }
  gap_dict = {} # Dictionary to know the frame when each players(nflId) go pass the gap. Format ex: { '123': 'A', '234': 'B' }
  ball_snap_frame = -1
  players_positions = []
  for index, row in players_position.iterrows():

    nflId = str(int(row['nflId']))  # Need to tranform nflId into string so that we can use it as dictionary key

    # Get the frame when ball snap happened
    if ball_snap_frame < 0 and row['event'] == 'ball_snap':
      ball_snap_frame = row['frameId']

    if nflId not in counted_players:

      x = row['x']
      y = row['y']

      dir = 'right' if y > middle_point else 'left'
      position = row['position']
      # if x <= LOS and x >= LOS - 1.5:
      #   if middle_point - 2 <= y <= middle_point + 2:
      #     print(playId, " is an A has an A gap blitz with ", x, " and ", y, " and ", middle_point, " Position: ", str(position))
      # if x >= LOS and x <= LOS + 1.5:
      #   if middle_point - 2 <= y <= middle_point + 2:
      #     print(playId, " is an A has an A gap blitz with ", x, " and ", y, " and ", middle_point, " Position: ", str(position))

      gap = gap_detection(x, y, LOS, middle_point, dir, playDirection)
      # Once find the player is in certain gap, mark him as counted player
      if len(gap) > 0:  # It should be zero if the player cannot be found in any gap

        counted_players.append(nflId)
        frame_dict[nflId] = row['frameId']
        gap_dict[nflId] = gap
        gap_count_dict[gap] += 1
        players_positions.append(row['position'])

  # Get any combinations with two players
  # ex: counted_players = ['123', '234', '345'], combis = [('123', '234'), ('123', '345'), ('234', '345')]
  combis = list()
  combis = list(combinations(counted_players, 2))

  for comb in combis:
    nflid1 = comb[0]
    nflid2 = comb[1]

    frame1 = frame_dict[nflid1]
    frame2 = frame_dict[nflid2]
    gap1 = gap_dict[nflid1]
    gap2 = gap_dict[nflid2]

    player1_info = players_position[(players_position['nflId'] == float(nflid1)) & (players_position['frameId'] == frame1)]
    player2_info = players_position[(players_position['nflId'] == float(nflid2)) & (players_position['frameId'] == frame2)]

    initial_y_1 = players_position[(players_position['nflId'] == float(nflid1)) & (players_position['frameId'] == ball_snap_frame)]['y'].iloc[0]
    initial_y_2 = players_position[(players_position['nflId'] == float(nflid2)) & (players_position['frameId'] == ball_snap_frame)]['y'].iloc[0]
    last_y_1 = player1_info['y'].iloc[0]
    last_y_2 = player2_info['y'].iloc[0]
    position1 = player1_info['position'].iloc[0]
    position2 = player2_info['position'].iloc[0]

    # Determine whether there is crossing
    if initial_y_1 > initial_y_2:
      if last_y_1 < last_y_2:
        crossing_combinations.append(f'{gap1}: {position1}, {gap2}: {position2}')
    elif initial_y_1 < initial_y_2:
      if last_y_1 > last_y_2:
        crossing_combinations.append(f'{gap1}: {position1}, {gap2}: {position2}')

  return [gap_count_dict, crossing_combinations, players_positions]

### Data Processing
* Execute Time: 12min with whole data, 30s in only use PHI data

In [None]:
gaps_avg_dis = pd.DataFrame(columns=['gameId', 'playId'])
# player_abbr = {'MLB': 'Inside Linebacker', 'OLB': 'Outside Linebacker', 'SS': 'Safety', 'LB':'Linebacker', 'ILB': 'Inside Linebacker',
#               'FS': 'Safety'}

test_data = blitzes3[blitzes3['possessionTeam'] == 'PHI']

inst_count = 0

# Walkthrough all the blitz data (blitze3) and use `gameId`, `playId` to get target play
# We can use `test_data` to test our function with `PHI` data so that I won't take too long to run
for index, row in test_data.iterrows():
  gameId = row['gameId']
  playId = row['playId']
  LOS = row['absoluteYardlineNumber']

  gameData = gamesData[(gamesData['gameId'] == gameId)]
  hometeam = gameData['homeTeamAbbr'].iloc[0]
  visitorteam = gameData['visitorTeamAbbr'].iloc[0]

  # I add this part to get the defensive team name so that we can assign them to our result later
  homeOrAwayOnD = 'home' if row['possessionTeam'] != hometeam else 'away'
  defensiveTeam = hometeam if row['possessionTeam'] != hometeam else visitorteam

  # phillyIsOnDefense = (row['possessionTeam'] != "PHI") #is PHIL on defense? possessionTeam is the offensive team

  # if (hometeam == "PHI" or visitorteam == "PHI") and phillyIsOnDefense:
  target_play = weeks[(weeks['gameId'] == gameId) & (weeks['playId'] == playId)]

  playDirection = target_play['playDirection'].iloc[0] # Playdirection matters as discussed in Discord
  qb = target_play[(target_play['position'] == 'QB')]

  try:
    # Some of the play do not contain QB's data
    if len(qb) > 0:
      middle_point = qb[(qb['frameId'] == 1)]['y'].iloc[0]

        # Since we can know the defensive team is home or away. We can filter them out by column `team``
        # This one might be the part
      d_players = target_play[(target_play['team'] == homeOrAwayOnD)]

      if len(d_players) > 0:

        [gap_count, crossing_combinations, players_positions] = get_gap_info(d_players, middle_point, LOS, playDirection, str(playId))

        gaps_avg_dis = gaps_avg_dis.append({
            'gameId': str(gameId),
            'playId': str(playId),
            'team': str(defensiveTeam),
            'A': int(gap_count['A']),
            'B': int(gap_count['B']),
            'C': int(gap_count['C']),
            'D': int(gap_count['D']),
            'E': int(gap_count['E']),
            'F': int(gap_count['F']),
            'crossing': ";".join(crossing_combinations) # Separate each combinations with `;`. Format ex: 'D: OLB, B: OLB' or 'D: OLB, B: OLB;C: CB, C: OLB'
        }, ignore_index=True)

        # Check every play's crossing combinations
        # Comment this line if using large data
        # print(gap_count, crossing_combinations, str(gameId), str(playId))
        if len(crossing_combinations) > 0:
            for blitz in crossing_combinations:
                inst = blitz.split(',')
                gap1 = inst[0].split(':')[0].strip()
                player1 = inst[0].split(":")[1].strip()
                gap2 = inst[1].split(':')[0].strip()
                player2 = inst[1].split(':')[1].strip()
                display_str = "{0:<50}".format(f"{player1}-{player2} Cross Blitz to the {gap1} and {gap2} gap")
                print(f"{display_str}| GameID: {gameId}, PlayID: {playId}")
            inst_count += 1
        else:
            gaps = [key for key, value in gap_count.items() if value > 0]
            player_str = '-'.join(players_positions)
            display_str = "{0:<50}".format(f"{player_str} Blitz to the {','.join(gaps)} gap without crossing")
            print(f"{display_str}| GameID: {gameId}, PlayID: {playId}")
            inst_count += 1
  except Exception as e:
    print("The error raised is: ", e)
    # Inspect the error play
    # print(gameId, playId)
    pass
print(f'Total blitz instances - {inst_count}')




```
# This is formatted as code
```

### Visualize the play

In [None]:
def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12, 6.33)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='white', zorder=0)

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='black')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='black')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='black', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='black')
        ax.plot([x, x], [53.0, 52.5], color='black')
        ax.plot([x, x], [22.91, 23.57], color='black')
        ax.plot([x, x], [29.73, 30.39], color='black')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return fig, ax

def animate(gameId, playId):
    cross_blitz_play_home = weeks.query(f'gameId == {gameId} and playId == {playId} and team == "home"')
    cross_blitz_play_away = weeks.query(f'gameId == {gameId} and playId == {playId} and team == "away"')
    cross_blitz_play_ball = weeks.query(f'gameId == {gameId} and playId == {playId} and team == "football"')

    fig, ax = create_football_field()
    ims=[]

    for i in range(1, max(cross_blitz_play_home["frameId"]) + 1):
        im = []

        for index, row in cross_blitz_play_home.query(f'frameId == {i}').iterrows():
            coordinates = str(row['x']) + " "+  str(row['y'])
            im.append(plt.text(row['x'], row['y'], row['position'], size=10,
            bbox=dict(boxstyle="circle",
                    fc="white",
                    )
            ))

        for index, row in cross_blitz_play_away.query(f'frameId == {i}').iterrows():
            coordinates = str(row['x']) + " "+  str(row['y'])
            im.append(plt.text(row['x'], row['y'], row['position'], size=10,
                bbox=dict(boxstyle="circle",
                        fc="white",
                        )
                ))
        im.append(plt.scatter(
            cross_blitz_play_ball.query(f'frameId == {i}')['x'],
            cross_blitz_play_ball.query(f'frameId == {i}')['y'],
            marker='+',
            color='brown',
            s=100,
            zorder=100000
        ))
        ims.append(im)

    ani = animation.ArtistAnimation(fig, ims, interval=100, blit=False,
                                repeat_delay=2000)
    plt.show()
    video = ani.to_html5_video()
    html = display.HTML(video)
    display.display(html)

#### OLB Blitz to the C gap

In [None]:
animate(2018123012, 1987)

#### OLB-ILB Cross Blitz to the C and E gap

In [None]:
animate(2018122310, 4508)

### Clustering
* Need to cluster by the average numbers of players in each gap between LOS & 1.5 yards behind LOS

In [None]:
# Implement K-Means Clustering to cluster our blitzes data
from sklearn.cluster import KMeans

gaps_avg_dis = gaps_avg_dis.dropna()
# df_for_kmeans = gaps_avg_dis[['CB_A', 'CB_B', 'CB_C', 'CB_D', 'CB_E', 'CB_F',
#                               'S_A', 'S_B', 'S_C', 'S_D', 'S_E', 'S_F',
#                               'LB_A', 'LB_B', 'LB_C', 'LB_D', 'LB_E', 'LB_F']]

df_for_kmeans = gaps_avg_dis[['A', 'B', 'C', 'D', 'E', 'F']]

# Use 6 * 3 clusters since we have 6 gaps and 3 different position
kmeans = KMeans(n_clusters=18)

y = kmeans.fit_predict(df_for_kmeans)
gaps_avg_dis['Cluster'] = y
display(gaps_avg_dis)

In [None]:
# Calculate the percentage for each blitze
gaps_avg_dis['blitz_types'] = gaps_avg_dis[['A', 'B', 'C', 'D', 'E', 'F']].astype(int).astype(str).agg(','.join, axis=1)
display(gaps_avg_dis)

In [None]:
# Maybe there are some mistakes while calculating number of players crossing the gaps
# since majority of the data shows that no one cross any gap
gaps_avg_dis[gaps_avg_dis['team'] == 'PHI']['blitz_types'].value_counts()

### Construction of blitzes via bezier curves

#### Generic Outside to Inside (Double) Blitzes

In [None]:
#Can be in any gap according to a _gap_left_1 etc.
#Remember that the x axis is the length of the field and y axis is the width of the field.
#We assume that the blitzers run into the middle of the respective gaps.

#middle_point: y position of QB
#blitzing_player_x: starting x position of right linebacker
#blitzing_player_y: starting y position of right linebacker
#blitzing_player_x2: starting x position of left linebacker
#blitzing_player_y2: starting y position of left linebacker
#a_gap_left_1 Most left position of left Gap
#a_gap_right_1 Most right position of right Gap
#LOS: x position of line of scrimmage

def doubleGapOutsideToInsideBlitz(middle_point, LOS, blitzing_player_x, blitzing_player_y, blitzing_player2_x, blitzing_player2_y, a_gap_left_1, a_gap_right_1):
  a_gap_left_1 += 0.5 #find middle of gap
  a_gap_right_1 -= 0.5

  nodes = np.asfortranarray([
      [a_gap_left_1 + middle_point, -2, blitzing_player_y],
      [LOS, 5, blitzing_player_x],
  ])
  nodes2 = np.asfortranarray([
      [a_gap_right_1 + middle_point, 2, blitzing_player2_y],
      [LOS, 5, blitzing_player2_x],
  ])
  curve = bezier.Curve(nodes, degree=2)
  curve2 = bezier.Curve(nodes2, degree=2)
  return [curve, curve2]

# Cross Double A gap blitz example where middle point = 0.0 and LOS = 2.0. Linebackers are 4  yards apart. The linebackers are not staggered in this example.

curves = doubleGapOutsideToInsideBlitz(0.0, 2.0, 5.0, 2.0, 5.0, -2.0, -2.0, 2.0)
curves[0].plot(30)
curves[1].plot(30)


#the above function can be adjusted for pretty much any kind blitz that goes from outside to inside. If one wants a blitz with only one blitzer, just use only the first blitz in the returning array.
#If one needs a blitz with 3 players, just use a second function.
#Maybe it would be easier to just use a funcion that generates one bezier curve instead of 2.


###Thoughts on the Necessity of bezier curves

Maybe we dont have to use bezier curves. Maybe we can just take the initial starting position of the blitzing players, see in which gap they end up in and then estimate the trajectory that they used to get there. If they crossed another defensive blitzing defensive player, we know that it was a cross blitz. If they didnt cross anybody, we can call the blitz just "Mike A and Will B" if the Mike and the Will linebacker blitzed in the the A and B gap respectively. (Mike = middle linebacker, Will = weak side linebacker, Sam = strong side linebacker).

We have the position names of each player and we know in which gap they end up in, so there is no need for bezier curves really.


### Thoughts on how to see indicators

In order to see indicators, we can maybe use a inversed vision cone like the papers used in the presentions last tuesday. If a player2 is in that vision cone (behind player1) within a certain range, that means that player 2 is helping player 1. Or something like that.

However, that wouldn't account for bumpings where the helping players bumps the linebacker sideways.

Maybe, instead we can identify when the blitzer (player1) starts accelerating. At that point in time, player 2 should be moving towards the initial position of player 1 within a certain radius. Any player that starts moving towards that initial position of player 1 might be an indicator of player 1 blitzing.

In [None]:
blitzes3.info()

In [None]:
def classify_blitz():
    blitz_names = []
    for index, row in blitzes3.iterrows():
        gameId = row['gameId']
        playId = row['playId']
        LOS = row['absoluteYardlineNumber']
        gameData = gamesData[(gamesData['gameId'] == gameId)]
        gameData = gameData[((gameData['homeTeamAbbr']=='PHI') | (gameData['visitorTeamAbbr']=='PHI'))]
        try:
            hometeam = gameData['homeTeamAbbr'].values[0]
            visitorteam = gameData['visitorTeamAbbr'].iloc[0]
        except:
            pass
        homeOrAwayOnD = 'home' if row['possessionTeam'] != hometeam else 'away'
        defensiveTeam = hometeam if row['possessionTeam'] != hometeam else visitorteam
        if defensiveTeam == 'PHI':
            target_play = weeks[(weeks['gameId'] == gameId) & (weeks['playId'] == playId)]
            corner = target_play[(target_play['position'] == 'CB')]
            safety = target_play[(target_play['position'] == 'SS') | (target_play['position'] == 'FS')]
            lineback = target_play[(target_play['position'] == 'LB') | (target_play['position'] == 'MLB')]
            try:
                playDirection = target_play['playDirection'].iloc[0]
                middle_point = target_play[(target_play['position'] == 'QB') & (target_play['frameId'] == 1)]['y'].tolist()[0] #is this right before the snap?
                d_players = target_play[(target_play['team'] == homeOrAwayOnD)]
                counted_players = []
                for index, row in d_players.iterrows():
                    if row['nflId'] not in counted_players:
                        x = row['x']
                        y = row['y']
                        dir = 'right' if y > middle_point else 'left'
                        gap = gap_detection(x, y, LOS, middle_point, dir, playDirection)
                        if len(gap) > 0:
                            blitz_names.append(f" PlayID - {playId} \| {row['displayName']}-{gap} Blitz")
                            counted_players.append(row['nflId'])
                        else:
                            gap_dist = get_distance_from_each_gaps(lineback, middle_point, LOS, playDirection)
                            min_gap =  min(gap_dist.items(), key=lambda x: x[1])
                            max_gap =  max(gap_dist.items(), key=lambda x: x[1])
                            if row['frameId'] == d_players.iloc[index - 1]:
                                if d_players['y'].iloc[index] > d_players['y'].iloc[index - 1] and d_players['y'].iloc[index + 1] > d_players['y'].iloc[index + 2]:
                                    print(f"Linebacker cross blitz - {max_gap[0]} {min_gap[0]}")
            except:
                pass
    return blitz_names

    # todo: cross determination, determine position of players that blitz or cross (For example: Safety-Linebacker Cross Blitz to the A and B gap), only use philadelphia data.
    # How to access two different linebackers and should i compare them on two different frames?
    # We have logic for both single blitzes and cross blitzes, but how to differentiate which one is happening in the current play?
    # LH: If there is no crossing then we can say it is a single blitz

    # Li-Hsuan 8/19
    # Here is what I thought. You can determine whether there is a crossing after we determine which gap has players pass by
    # I used array **counted_players** in function **get_gap_count** to store player's nflId so that we can know who is passing the gap
    # With nflId you can know player's position (like LB, CN, etc) and its starting coordinate and its ending coordinate
    # Then you can determine whether there is a crossing by checking their starting & ending **y**
    # As for your second questions. I am not sure what your meaning. Did you mean we have two function to determine the blitz?
    # If so I think you can just put your code into the first cell of data processing. It already filter some data.
    # The thing you need to do I think is to use the data generate by function **get_gap_count** and check crossing

    # From my understanding, we need to determine crossing when we have blitz like A: 2, then we want to check whether there is a crossing
    # between these two players in gap A.
    # We might have case like A:1, B:2 blitz or odd numbers of players in the gap like A: 3 and I am not sure how to determine this one.
    # Maybe we can just select the first two players crossing the gap and just calculate those two's position?
    # As for odd number we can check like 1 vs 2 is crossing, 1 vs 3, 2 vs 3 and if there is one combination has crossing then we can mark
    # it as a crossing blitz?

# classify_blitz()

In [None]:
animate(2018120300, 2711)

### Legacy Code

In [None]:
# # Where are we differentiating between different players within a position? In order to determine the blitz, we will have to treat each player individually, not necessarily by position,
# # first we have to check which gap each individual player goes to on a given play. We can cluster by position at the end to see what kind of blitz a certain play represents.
# # We dont want the average distance of a given position to the gap.
# # We want the average distance of each INDIVIDUAL player when he is between 0 and 1.5 yards behind the line of scrimmage.

# # Cluster by the amount of players that have an average distance (between 0 and 1.5 yards behind LOS) to the gap that is within the gap
# # that way we can see how many players went through each gap on each play
# # so we gotta identify the players that went through each gap and then count them
# # for each gap

# gaps_avg_dis_lg = pd.DataFrame(columns=['gameId', 'playId'])

# # Walkthrough all the blitz data (blitze3) and use `gameId`, `playId` to get target play
# for index, row in blitzes3.iterrows():
#   gameId = row['gameId']
#   playId = row['playId']
#   LOS = row['absoluteYardlineNumber']

#   target_play = weeks[(weeks['gameId'] == gameId) & (weeks['playId'] == playId)]

#   playDirection = target_play['playDirection'].iloc[0] # Playdirection matters as discussed in Discord
#   # The starting y position of QB
#   try:
#     middle_point = target_play[(target_play['position'] == 'QB') & (target_play['frameId'] == 1)]['y'].tolist()[0]
#     # Get Corner tracking data (Use CB for test)
#     corner = target_play[(target_play['position'] == 'CB')]
#     safety = target_play[(target_play['position'] == 'SS') | (target_play['position'] == 'FS')]
#     lineback = target_play[(target_play['position'] == 'LB') | (target_play['position'] == 'MLB')]

#     # Sometimes they don't have CB in one play...
#     if len(corner) > 0 and len(safety) > 0 and len(lineback) > 0:
#       gap_cb_dis = get_distance_from_each_gaps(corner, middle_point, LOS, playDirection)
#       gap_safe_dis = get_distance_from_each_gaps(safety, middle_point, LOS, playDirection) # added playdirection
#       gap_lb_dis = get_distance_from_each_gaps(lineback, middle_point, LOS, playDirection)
#       gaps_avg_dis_lg = gaps_avg_dis_lg.append({
#           'gameId': str(gameId),
#           'playId': str(playId),
#           'CB_A': gap_cb_dis['A'],
#           'CB_B': gap_cb_dis['B'],
#           'CB_C': gap_cb_dis['C'],
#           'CB_D': gap_cb_dis['D'],
#           'CB_E': gap_cb_dis['E'],
#           'CB_F': gap_cb_dis['F'],
#           'S_A': gap_safe_dis['A'],
#           'S_B': gap_safe_dis['B'],
#           'S_C': gap_safe_dis['C'],
#           'S_D': gap_safe_dis['D'],
#           'S_E': gap_safe_dis['E'],
#           'S_F': gap_safe_dis['F'],
#           'LB_A': gap_lb_dis['A'],
#           'LB_B': gap_lb_dis['B'],
#           'LB_C': gap_lb_dis['C'],
#           'LB_D': gap_lb_dis['D'],
#           'LB_E': gap_lb_dis['E'],
#           'LB_F': gap_lb_dis['F'],
#       }, ignore_index=True)

#   except:
#     continue

# display(gaps_avg_dis_lg)