In [119]:
import json  # https://docs.python.org/3/library/json.html
import pandas as pd

data_json = json.load(open('data.json'))

In [120]:
# extract play-by-play data
pbp_json = data_json["pbp"]
pbp_df = pd.json_normalize(data_json, record_path =['pbp']) # https://towardsdatascience.com/how-to-convert-json-into-a-pandas-dataframe-100b2ae1e0d8

pbp_df.sample(10)


Unnamed: 0,gt,clock,s1,s2,lead,tno,period,periodType,pno,player,...,shirtNumber,firstName,familyName,internationalFirstName,internationalFamilyName,firstNameInitial,familyNameInitial,internationalFirstNameInitial,internationalFamilyNameInitial,scoreboardName
41,01:32,01:32:00,66,71,-5,1,4,REGULAR,6,J. White,...,14,Jack,White,Jack,White,J,W,J,W,J. White
183,02:02,02:02:00,48,57,-9,2,3,REGULAR,4,J. Bairstow,...,21,Jarred,Bairstow,Jarred,Bairstow,J,B,J,B,J. Bairstow
490,03:49,03:49:00,11,9,2,1,1,REGULAR,5,S. Ili,...,51,Shea,Ili,Shea,Ili,S,I,S,I,Shili_
201,03:52,03:52:00,46,55,-9,2,3,REGULAR,11,J. Adams,...,14,Josh,Adams,Josh,Adams,J,A,J,A,J. Adams
110,06:26,06:26:00,63,64,-1,2,4,REGULAR,9,F. Krslovic,...,20,Fabijan,Krslovic,Fabijan,Krslovic,F,K,F,K,F. Krslovic
543,07:57,07:57:00,3,0,3,1,1,REGULAR,6,J. White,...,14,Jack,White,Jack,White,J,W,J,W,J. White
203,04:08,04:08:00,46,52,-6,1,3,REGULAR,6,J. White,...,14,Jack,White,Jack,White,J,W,J,W,J. White
50,02:04,02:04:00,64,71,-7,2,4,REGULAR,12,J. Magette,...,2,Josh,Magette,Josh,Magette,J,M,J,M,J. Magette
482,03:26,03:26:00,11,11,0,1,1,REGULAR,10,J. Lual-Acuil Jr,...,0,Jo,Lual-Acuil Jr,Jo,Lual-Acuil Jr,J,L,J,L,J. Lual-Acuil Jr
266,08:57,08:57:00,40,45,-5,2,3,REGULAR,12,J. Magette,...,2,Josh,Magette,Josh,Magette,J,M,J,M,J. Magette


In [121]:
pbp_cols = list(pbp_df.columns)
pbp_cols

['gt',
 'clock',
 's1',
 's2',
 'lead',
 'tno',
 'period',
 'periodType',
 'pno',
 'player',
 'success',
 'actionType',
 'actionNumber',
 'previousAction',
 'qualifier',
 'subType',
 'scoring',
 'shirtNumber',
 'firstName',
 'familyName',
 'internationalFirstName',
 'internationalFamilyName',
 'firstNameInitial',
 'familyNameInitial',
 'internationalFirstNameInitial',
 'internationalFamilyNameInitial',
 'scoreboardName']

We convert some data types.

* `gt` and `clock_time`. We used [Timestamp](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html), the Pandas version of Datetime.
  * One could also consider using [Timedelta](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_timedelta.html).
  * `clock time` uses `MM:SS:CC` where `CC` is hundredths of seconds, which is read as microseconds. So `00:05:10` is 00:00:05.100` which is correct.
  * We can use `.dt.time` on a `datetime` to extract just the time.
  * We can eventually do  `errors=coerece` to get `NaN` on errors.

In [128]:
pbp_df['gt'] = pd.to_datetime(pbp_df['gt'], format="%M:%S").dt.time
pbp_df['clock'] = pd.to_datetime(pbp_df['clock'], format="%M:%S:%f").dt.time

# pd.to_datetime(pbp_df['clock'], format="%M:%S:%f").apply(lambda x: pd.Timestamp(x))


pbp_df.info()
# print(pbp_df['gt'].dtypes)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 566 entries, 0 to 565
Data columns (total 27 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   gt                              566 non-null    object
 1   clock                           566 non-null    object
 2   s1                              566 non-null    int64 
 3   s2                              566 non-null    int64 
 4   lead                            566 non-null    int64 
 5   tno                             566 non-null    int64 
 6   period                          566 non-null    int64 
 7   periodType                      566 non-null    object
 8   pno                             566 non-null    int64 
 9   player                          566 non-null    object
 10  success                         566 non-null    int64 
 11  actionType                      566 non-null    object
 12  actionNumber                    566 non-null    in

In [129]:
pbp_df.head()

Unnamed: 0,gt,clock,s1,s2,lead,tno,period,periodType,pno,player,...,shirtNumber,firstName,familyName,internationalFirstName,internationalFamilyName,firstNameInitial,familyNameInitial,internationalFirstNameInitial,internationalFamilyNameInitial,scoreboardName
0,00:00:00,00:00:00,73,76,-3,0,4,REGULAR,0,,...,,,,,,,,,,
1,00:00:00,00:00:00,73,76,-3,0,4,REGULAR,0,,...,,,,,,,,,,
2,00:00:05,00:00:05.100000,73,76,-3,1,4,REGULAR,10,J. Lual-Acuil Jr,...,0.0,Jo,Lual-Acuil Jr,Jo,Lual-Acuil Jr,J,L,J,L,J. Lual-Acuil Jr
3,00:00:06,00:00:06.800000,71,76,-5,2,4,REGULAR,11,J. Adams,...,14.0,Josh,Adams,Josh,Adams,J,A,J,A,J. Adams
4,00:00:11,00:00:11.600000,71,76,-5,2,4,REGULAR,14,M. McIntosh,...,0.0,Mikyle,Mcintosh,MiKyle,McIntosh,M,M,M,M,M. Mcintosh
