In [166]:
import pandas as pd
import numpy as np
import json
import csv
import os
from os.path import exists

# Retrieving the data

In [167]:
# This would be the parameters to receive in the final function
gameId = "2017020001"
path = "data/" # "C:/UdeM-ws/DataScience-A22/IFT6758-Project/data"

# Preparing the variables with the file-names
file_name = path + "play-by-play-" + gameId + ".json"
output_csv = path + "play-by-play-" + gameId + ".csv"

# The types of events we're interested in  (referring to 'eventTypeId' from NHL live data):
play_types = ['SHOT', 'GOAL']   

In [168]:
# Open the input data-file, with the json retrieved from NHL api
with open(file_name, 'r') as f :
  data = json.load(f)

In [169]:
events = data['liveData']['plays']['allPlays']

# Filtering the events (shots and goals)

In [170]:
# Filtering SHOTs and GOALs
shots_data = filter(  lambda x: x['result']['eventTypeId'] in play_types, events)

pcolumns = ['eventIdx', 'eventTypeId', 'eventId', 'period', 'periodType', 'periodTime', 'dateTime', 'teamId', 'teamName', 'x', 'y', 'shotType', 'emptyNet', 'strength', 'shooter', 'goalie']

if exists(output_csv):
  os.remove(output_csv)
with open(output_csv, 'a', newline='') as f:
  writer = csv.writer(f)
  writer.writerow(pcolumns)
  for x in shots_data :
    # Retrieving nested json data...
    x_shooter = ""
    x_goalie = ""
    # x_shotType = ""
    x_emptyNet = ""
    x_strength = ""

    if (x['result']['eventTypeId'] == 'GOAL') :
      x_strength = x['result']['strength']['name']
      x_emptyNet = x['result']['emptyNet']

    for player in x['players'] :
      if player['playerType'] == "Shooter" :
        x_shooter = player['player']['fullName']
      if player['playerType'] == "Goalie" :
        x_goalie = player['player']['fullName']


    row_x = [x['about']['eventIdx'], 
          x['result']['eventTypeId'],           
          x['about']['eventId'],
          x['about']['period'],
          x['about']['periodType'], 
          x['about']['periodTime'], 
          x['about']['dateTime'],
          x['team']['id'],
          x['team']['name'],
          x['coordinates']['x'],
          x['coordinates']['y'],
          x['result']['secondaryType'],
          x_emptyNet,
          x_strength,
          x_shooter,
          x_goalie  ]  #])
    writer.writerow(row_x)
 

# Retrieving events (format csv)... into dataFrame

## 1. small snippet of the final dataframe

In [172]:
# Parameter will be the filepath to the csv file with the events
fp = output_csv    # the output file from previous part.

pd_events = pd.read_csv(fp, parse_dates=[2])

pd_events.head(10)

Unnamed: 0,eventIdx,eventTypeId,eventId,period,periodType,periodTime,dateTime,teamId,teamName,x,y,shotType,emptyNet,strength,shooter,goalie
0,5,SHOT,54,1,REGULAR,00:38,2017-10-04T23:18:01Z,52,Winnipeg Jets,-36.0,-28.0,Wrist Shot,,,Josh Morrissey,Frederik Andersen
1,7,SHOT,55,1,REGULAR,00:49,2017-10-04T23:18:12Z,52,Winnipeg Jets,-75.0,1.0,Wrist Shot,,,Shawn Matthias,Frederik Andersen
2,11,SHOT,58,1,REGULAR,01:03,2017-10-04T23:19:04Z,52,Winnipeg Jets,-73.0,10.0,Backhand,,,Bryan Little,Frederik Andersen
3,16,SHOT,62,1,REGULAR,01:46,2017-10-04T23:20:20Z,10,Toronto Maple Leafs,80.0,-3.0,Wrist Shot,,,Eric Fehr,Steve Mason
4,24,SHOT,68,1,REGULAR,03:42,2017-10-04T23:23:13Z,52,Winnipeg Jets,-44.0,-21.0,Snap Shot,,,Patrik Laine,Frederik Andersen
5,25,SHOT,69,1,REGULAR,03:50,2017-10-04T23:23:21Z,52,Winnipeg Jets,-38.0,-5.0,Slap Shot,,,Dustin Byfuglien,Frederik Andersen
6,27,SHOT,71,1,REGULAR,04:30,2017-10-04T23:24:01Z,52,Winnipeg Jets,-47.0,-23.0,Slap Shot,,,Tyler Myers,Frederik Andersen
7,28,SHOT,73,1,REGULAR,04:39,2017-10-04T23:24:17Z,52,Winnipeg Jets,-62.0,21.0,Wrist Shot,,,Nikolaj Ehlers,Frederik Andersen
8,29,SHOT,74,1,REGULAR,04:49,2017-10-04T23:24:20Z,52,Winnipeg Jets,-36.0,-18.0,Wrist Shot,,,Jacob Trouba,Frederik Andersen
9,31,SHOT,75,1,REGULAR,05:11,2017-10-04T23:24:43Z,10,Toronto Maple Leafs,60.0,7.0,Snap Shot,,,Patrick Marleau,Steve Mason


## 2. Discussion... how could we add the actual srength of players on the ice ?

To do so, we could include the strength of players on ice, including in the events extraction cycle those that affects such info, but only to keep count of each team stregth (not to include in the output csv or dataframe): 
1. Assign the total strength for each team, at the beginning of the game (5), as "global variables"
2. include events with TypeId = 'PENALTY' in the events-filter
3. during the events iteration, consider Penalty events only to affect the strength global variables (according to the rules of the game) :
... when a penalty appears, affect the strength for the team indicated in such event, keeping record of the type of end for it to apply for the next events, that is: keep the time to finish the penalty (e.g., 2 min) and if it also could end with a goal (e.g., Minor penalty) or for the full time indicated
4. when reading events, first verify if the strength variables should be updated (e.g., time fulfilled)... if the event is a goal, verify if apply to adjust the strength variables just after recording such event with the prior strength
... to do this in the appropiate way it is necessary to know all the rules about penalties for different cases, e.g.: 
- the penaltyMinutes are of real time or playTime (which can be stopped during the game)?
- the penalty time goes through different periods (that is, the player in penalty starts the next period under penalty if the penalty time wasn't fulfilled at the end of prior period) or the penalty is automatically ended with the end of a period?

Features to keep track of each PENALTY:
x.['result']['eventTypeId'] == 'PENALTY'
x.['result']['penaltySeverity']
x.['result']['penaltyMinutes']
x.['team']['name']
x.['team']['id']
x.['about']['period']
x.['about']['periodTime']


## 3. Discussion... 3 additional features to consider