In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
pd.set_option('display.max_columns', 500)


from src import plotting_player_location_frame

In [62]:
df_tracking = pd.read_csv('../data/SecondSpectrum_data.csv', index_col=0)

In [63]:
df_players = pd.read_csv('../data/players_csv', index_col=0)


In [64]:
df_opta = pd.read_csv('../data/opta_data.csv', index_col=0)

In [65]:
df_marking = pd.read_csv('../data/marking_data.csv', index_col=0)

In [66]:
print('Size of frame by frame data          : {} rows and {} columns'.format(df_tracking.shape[0], df_tracking.shape[1]))
print('Size of Opta data                    : {} rows and {} columns'.format(df_opta.shape[0], df_opta.shape[1]))
print('Size of Marking data.                : {} rows and {} columns'.format(df_marking.shape[0], df_marking.shape[1]))

Size of frame by frame data          : 143165 rows and 10 columns
Size of Opta data                    : 1849 rows and 25 columns
Size of Marking data.                : 2538 rows and 64 columns


Initial look shows that Marking data and Opta data are not as vast, although they have more features which give more context to the play and the current situation on the field

To understand the features of Marking data and Opta data - take a look into the pdfs featured in the repo
* `/Intern Project/2020_01_14-2S-MLS - Tracking and Metadata Output Formats_v0.1.pdf` for tracking data
* `/Intern Project/MLS - Insight Feed Output Format.pdf` for opta and marking data 

One thing to note:
* Opta data and Marking data but contain objects that are either marking or if they are opta
* If an event is opta and marking it is included in both

## Focus on Opta Data

In [67]:
df_opta.shape

(1849, 25)

In [68]:
df_opta.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1849 entries, 0 to 1848
Data columns (total 25 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               1849 non-null   int64  
 1   eventId          1849 non-null   int64  
 2   typeId           1849 non-null   int64  
 3   periodId         1849 non-null   int64  
 4   timeMin          1849 non-null   int64  
 5   timeSec          1849 non-null   int64  
 6   contestantId     1849 non-null   object 
 7   opContestantId   1849 non-null   int64  
 8   outcome          1849 non-null   int64  
 9   x                1849 non-null   float64
 10  y                1849 non-null   float64
 11  timeStamp        1849 non-null   object 
 12  lastModified     1849 non-null   object 
 13  qualifier        1849 non-null   object 
 14  game_id          1849 non-null   object 
 15  period           1849 non-null   int64  
 16  alignedFrameIdx  1849 non-null   int64  
 17  alignedClock  

In [69]:
#only missing columns are in keyPass, assist, markingID, and less than 20 missing in opPlayerId, playerId, okayerName

In [70]:
df_opta.head()

Unnamed: 0,id,eventId,typeId,periodId,timeMin,timeSec,contestantId,opContestantId,outcome,x,y,timeStamp,lastModified,qualifier,game_id,period,alignedFrameIdx,alignedClock,sequence_idx,markingId,opPlayerId,playerId,playerName,assist,keyPass
0,2190065675,2,32,1,0,0,77o8sorlijzpr0t4s7231zzxp,1581,1,0.0,0.0,2020-03-08T23:11:20.936Z,2020-03-10T14:52:48Z,"[{'id': 2361331631, 'qualifierId': 127, 'value...",79421577-d3b0-4ff7-95b8-8dbbaa29d49b,1,0,0.0,0,,,,,,
1,2190065679,2,32,1,0,0,1oogd0fpwledjm2ycsuv7afze,15154,1,0.0,0.0,2020-03-08T23:11:20.936Z,2020-03-10T14:52:49Z,"[{'id': 2361331637, 'qualifierId': 127, 'value...",79421577-d3b0-4ff7-95b8-8dbbaa29d49b,1,0,0.0,0,,,,,,
2,2190065689,3,1,1,0,1,77o8sorlijzpr0t4s7231zzxp,1581,1,49.6,49.9,2020-03-08T23:11:22.093Z,2020-03-10T15:02:04Z,"[{'id': 2361331667, 'qualifierId': 141, 'value...",79421577-d3b0-4ff7-95b8-8dbbaa29d49b,1,1,0.04,0,54eb45e2-1933-45b8-99d2-87afdd7e440d,55817.0,e5d62a8i9q48nxddohhp80r11,D. Váleri,,
3,2190065697,4,1,1,0,3,77o8sorlijzpr0t4s7231zzxp,1581,1,35.3,41.6,2020-03-08T23:11:24.459Z,2020-03-08T23:11:27Z,"[{'id': 2361331721, 'qualifierId': 213, 'value...",79421577-d3b0-4ff7-95b8-8dbbaa29d49b,1,58,2.32,0,68b4e18d-cb3f-437b-a72d-0a51569b1093,96622.0,b4etfnmz4dzoa4r9g0403catx,D. Chará,,
4,2190065701,5,1,1,0,6,77o8sorlijzpr0t4s7231zzxp,1581,1,34.8,11.0,2020-03-08T23:11:27.180Z,2020-03-08T23:11:30Z,"[{'id': 2361331741, 'qualifierId': 213, 'value...",79421577-d3b0-4ff7-95b8-8dbbaa29d49b,1,110,4.4,0,8dff78d4-53ce-4378-a674-94724c6f8d6c,131840.0,7yuie6z5q0g0mans9pb5mdxhx,C. Duvall,,


In [71]:
shot_optaid = df_marking[~df_marking['shooterId'].isna()]['optaId']

In [72]:
df_opta[df_opta['id'].isin(shot_optaid)]['alignedFrameIdx'][48]

3524

In [73]:
plotting_player_location_frame(3524)

TypeError: string indices must be integers