In [1]:
#August 5, 2021
#Nikhil Yerva 
#Nashville SC Intern Data Project 
#Import libraries
import pandas as pd
import math
import json
import numpy as np
import os 

In [2]:
#Store datasets
events = pd.read_csv('events.csv')
tracking = pd.read_csv('tracking.csv')
with open('metadata.json') as json_data:
    metadata = json.load(json_data)

# 1 Data Analysis and Visualization

## 1.1 Condition 1: 20m+ Forward Pass Attempt

In [3]:
events.head()

Unnamed: 0,game_id,event_id,type_id,type_name,period_id,minute,second,player_id,team_id,outcome,...,big_chance,pull_back,second_assist,through_ball,corner,switch_of_play,shot_after_1v1,shot_gk_1v1,aligned_clock,aligned_frame_idx
0,2198976,2311379125,32,start,1,0,0,0,15154,True,...,,,,,,,,,0.0,0.0
1,2198976,2311379123,32,start,1,0,0,0,1207,True,...,,,,,,,,,0.0,0.0
2,2198976,2311379133,1,pass,1,0,0,55711,1207,True,...,,,,,,,,,0.04,1.0
3,2198976,2311379157,1,pass,1,0,0,165796,1207,True,...,,,,,,,,,1.84,46.0
4,2198976,2311379171,1,pass,1,0,4,474800,1207,False,...,,,,,,,,,5.2,130.0


In [4]:
#Store next event end coordinates in the current row for future analysis
events['next_end_x'] = events.end_x.shift(1)
events['next_end_y'] = events.end_y.shift(1)

#Filter the events dataframe for passes only greater than 20 meters (meets first condition)
eventsPass = events[events['type_name'] == 'pass']
eventsPass = eventsPass[eventsPass['pass_length'] > 20]

#Clean dataset from irrelevant data
eventsPass.drop(['game_id','event_id','type_id', 'type_name', 'minute', 'second', 'period_id',
             'assist','keypass','sequence_id','possession_id',
             'xA','long_ball','duel_offensive','duel_defensive','body_part',
             'throw_in','foul','free_kick_type','cross','free_kick','free_kick_shot','xG','goal_kick',
             'gk_throw','pattern_of_play','assisted_shot','shot_blocked','xGoT','big_chance','pull_back',
             'second_assist','through_ball','corner','switch_of_play','shot_after_1v1','shot_gk_1v1',
             ], axis = 1, inplace = True)

#Ensure readability and usage after merge with renaming columns
eventsPass.rename(columns = { 'period_id' : 'period', 'aligned_frame_idx' : 'frame_idx', 
       'player_id' : 'player_id_pass', 'team_id' : 'team_id_pass'
}, inplace = True)

eventsPass.head()

Unnamed: 0,player_id_pass,team_id_pass,outcome,start_x,start_y,end_x,end_y,pass_length,pass_angle,aligned_clock,frame_idx,next_end_x,next_end_y
3,165796,1207,True,36.6,49.2,36.9,87.4,26.0,1.6,1.84,46.0,36.6,49.2
4,474800,1207,False,38.5,87.8,68.1,67.1,34.1,5.9,5.2,130.0,36.9,87.4
5,95228,15154,True,29.2,27.3,64.1,22.2,36.8,6.2,7.0,175.0,68.1,67.1
9,244766,1207,True,23.6,81.1,43.9,95.1,23.3,0.4,13.68,342.0,78.6,20.0
11,474800,1207,False,33.4,95.3,67.1,82.5,36.4,6.0,19.76,494.0,33.4,95.3


## 1.2 Condition 2: Contested Pass

In [5]:
tracking.head()

Unnamed: 0,period,frame_idx,game_clock,last_touch,live,team_id,player_id,x,y,speed,ball_x,ball_y,ball_z,ball_speed
0,1,0,0.0,15154,False,1207,165796,-9.65,-1.28,0.0,-0.0,-0.04,0.37,11.41
1,1,0,0.0,15154,False,1207,55711,0.31,-0.24,0.0,-0.0,-0.04,0.37,11.41
2,1,0,0.0,15154,False,1207,86898,-19.58,-12.52,0.0,-0.0,-0.04,0.37,11.41
3,1,0,0.0,15154,False,1207,140040,-0.16,-13.65,0.0,-0.0,-0.04,0.37,11.41
4,1,0,0.0,15154,False,1207,442451,-23.55,12.99,0.0,-0.0,-0.04,0.37,11.41


In [6]:
#Clean data set
tracking.drop(['speed', 'ball_z', 'ball_speed', 'period'], axis = 1, inplace = True)

#Merge datasets 
sBall = tracking.merge(eventsPass, on='frame_idx')

#Add distance column for contested pass
sBall['distance'] = np.sqrt((sBall['ball_x'] - sBall['x'])**2 + (sBall['ball_y'] - sBall['y'])**2)

#Only keep contested passes by the other team 
sBall = sBall[sBall['distance'] < 5]
sBall = sBall[sBall['team_id_pass'] != sBall['team_id']]

#Remove extra player coordinates for the contested pass to only have one entry for the pass event
sBall = sBall.drop_duplicates(subset='frame_idx', keep='first')

sBall.head()

Unnamed: 0,frame_idx,game_clock,last_touch,live,team_id,player_id,x,y,ball_x,ball_y,...,start_x,start_y,end_x,end_y,pass_length,pass_angle,aligned_clock,next_end_x,next_end_y,distance
37,130,5.2,1207,True,15154,480287,-2.1,27.51,-5.49,27.04,...,38.5,87.8,68.1,67.1,34.1,5.9,5.2,36.9,87.4,3.422426
47,175,7.0,15154,True,1207,55711,23.36,9.91,22.93,12.97,...,29.2,27.3,64.1,22.2,36.8,6.2,7.0,68.1,67.1,3.090065
79,342,13.68,1207,True,15154,119089,-31.36,18.93,-32.44,21.05,...,23.6,81.1,43.9,95.1,23.3,0.4,13.68,78.6,20.0,2.379244
100,494,19.76,1207,True,15154,213665,-14.2,28.59,-16.49,32.48,...,33.4,95.3,67.1,82.5,36.4,6.0,19.76,33.4,95.3,4.514
145,1909,76.36,1207,True,15154,119089,4.1,-8.09,2.97,-12.05,...,54.2,35.8,76.7,61.6,29.4,0.6,76.360001,52.5,46.1,4.11807


## 1.3 Condition 3: 180 Degree Contested Pass

In [7]:
#Method that calculates the angle of two lines
def ang(lineA, lineB):
    # Get nicer vector form
    vA = [(lineA[0][0]-lineA[1][0]), (lineA[0][1]-lineA[1][1])]
    vB = [(lineB[0][0]-lineB[1][0]), (lineB[0][1]-lineB[1][1])]
    # Get dot prod
    dot_prod = dot(vA, vB)
    # Get magnitudes
    magA = dot(vA, vA)**0.5
    magB = dot(vB, vB)**0.5
    # Get cosine value
    cos_ = dot_prod/magA/magB
    # Get angle in radians and then convert to degrees
    angle = math.acos(dot_prod/magB/magA)
    # Basically doing angle <- angle mod 360
    ang_deg = math.degrees(angle)%360

    if ang_deg-180>=0:
        # As in if statement
        return 360 - ang_deg
    else: 

        return ang_deg
  
#Call the method to calculate angle from the two passes
sBall['sbAngle'] = ang(((sBall['start_x'] , sBall['start_y']),(sBall['end_x'] , sBall['end_y'])) ,((sBall['end_x'] , sBall['end_y']),(sBall['next_end_x'] , sBall['next_end_y'])))

#Remove rows that are outside of 180 degree threshold
sBall = sBall[sBall['sbAngle'] < 180]

sBall.head()

Unnamed: 0,frame_idx,game_clock,last_touch,live,team_id,player_id,x,y,ball_x,ball_y,...,start_x,start_y,end_x,end_y,pass_length,pass_angle,aligned_clock,next_end_x,next_end_y,distance
79,342,13.68,1207,True,15154,119089,-31.36,18.93,-32.44,21.05,...,23.6,81.1,43.9,95.1,23.3,0.4,13.68,78.6,20.0,2.379244
145,1909,76.36,1207,True,15154,119089,4.1,-8.09,2.97,-12.05,...,54.2,35.8,76.7,61.6,29.4,0.6,76.360001,52.5,46.1,4.11807
217,2773,110.92,1207,True,15154,95321,-16.63,16.13,-17.66,18.63,...,36.9,77.0,74.4,79.5,39.4,0.0,110.919998,31.5,70.1,2.703868
319,5776,231.04,1207,True,15154,177928,15.77,-31.9,12.75,-30.93,...,62.0,5.5,89.2,41.1,37.4,0.7,231.039993,62.0,5.5,3.171955
377,6277,251.08,15154,True,1207,140040,30.13,-28.47,31.98,-30.13,...,22.4,91.9,43.4,91.9,22.1,0.0,251.080002,22.4,91.9,2.485578


## 1.4 Final Data Preperation

In [8]:
#Cleaned for final deliverable
final = sBall.filter(['game_clock', 'team_id_pass', 'player_id_pass', 'end_x', 'end_y'], axis = 1)

final.rename(columns = { 'game_clock' : 'Game Clock', 'team_id_pass' : 'Team', 
       'player_id_pass' : 'Player', 'end_x' : 'Location X', 'end_y' : 'Location Y'
}, inplace = True)

final = final.reset_index(drop=True)

## 1.5 Final Deliverable 

In [9]:
display(final)

Unnamed: 0,Game Clock,Team,Player,Location X,Location Y
0,13.68,1207,244766,43.9,95.1
1,76.36,1207,86898,76.7,61.6
2,110.92,1207,442451,74.4,79.5
3,231.04,1207,119644,89.2,41.1
4,251.08,15154,213213,43.4,91.9
5,254.68,15154,177928,63.5,90.4
6,364.88,1207,119644,41.8,31.9
7,374.32,1207,442451,60.8,69.5
8,461.64,15154,213213,63.8,92.8
9,817.8,1207,86898,67.2,29.7
