# BSS: Basketball Statistic System

This system tries to replicate [euRobasketAu](https://github.com/jgalowe/euRobasketAu?organization=jgalowe&organization=jgalowe) R scripts in Python.

It scrapes the data and then converts the raw numbers into _advanced stats_.

The data is provided live by [Genius Sports ](https://developer.geniussports.com/). The documentation for the Basketball feed can be found [here](https://developer.geniussports.com/livestats/tvfeed/index_basketball.html).

Messages are sent in JSON structures and use UTF-8 format.

An example of a raw JSON file:

https://fibalivestats.dcd.shared.geniussports.com/data/2087737/data.json

In [1]:
# Let's first load all required packages...
import json  # https://docs.python.org/3/library/json.html
import os
import pandas as pd
import numpy as np
import datetime


# Load constants
from config import *
import tools

In [2]:
# Load relevant game data
game_id = 742430
game_id = 2087737


pbp_df = tools.get_raw_pbp_fibalivestats(game_id)

# pbp_df.sample(20)
pbp_df.head(15)

Game data loaded from local file: data-2087737.json


Unnamed: 0,team_name,team_short_name,clock,s1,s2,lead,tno,period,periodType,pno,player,success,actionType,actionNumber,previousAction,qualifier,subType,scoring
563,,,00:10:00,0,0,0,0,1,REGULAR,0,,1,jumpball,4,,[],startperiod,0
564,,,00:10:00,0,0,0,0,1,REGULAR,0,,1,period,2,,[],start,0
565,,,00:10:00,0,0,0,0,1,REGULAR,0,,1,game,1,,[],start,0
561,Melbourne United,United,00:09:56,0,0,0,1,1,REGULAR,10,J. Lual-Acuil Jr,1,jumpball,7,4.0,[],lost,0
562,Tasmania JackJumpers,JackJumpers,00:09:56,0,0,0,2,1,REGULAR,9,F. Krslovic,1,jumpball,6,4.0,[],won,0
560,Tasmania JackJumpers,JackJumpers,00:09:42,0,0,0,2,1,REGULAR,9,F. Krslovic,0,3pt,9,,[],jumpshot,1
559,Melbourne United,United,00:09:38,0,0,0,1,1,REGULAR,6,J. White,1,rebound,10,9.0,[],defensive,0
558,Melbourne United,United,00:09:31,0,0,0,1,1,REGULAR,6,J. White,0,3pt,11,,[],jumpshot,1
557,Tasmania JackJumpers,JackJumpers,00:09:27,0,0,0,2,1,REGULAR,9,F. Krslovic,1,rebound,12,11.0,[],defensive,0
556,Tasmania JackJumpers,JackJumpers,00:09:22,0,0,0,2,1,REGULAR,11,J. Adams,0,3pt,13,,[],pullupjumpshot,1


In [3]:
import re

# just check that no player name has a number on it or a comma
pbp_df.loc[pbp_df['player'].str.contains('\d') | pbp_df['player'].str.contains(',')]


Unnamed: 0,team_name,team_short_name,clock,s1,s2,lead,tno,period,periodType,pno,player,success,actionType,actionNumber,previousAction,qualifier,subType,scoring


In [4]:
pbp_cols = list(pbp_df.columns)
pbp_cols

['team_name',
 'team_short_name',
 'clock',
 's1',
 's2',
 'lead',
 'tno',
 'period',
 'periodType',
 'pno',
 'player',
 'success',
 'actionType',
 'actionNumber',
 'previousAction',
 'qualifier',
 'subType',
 'scoring']

In [5]:
pbp_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 566 entries, 563 to 1
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   team_name        566 non-null    object
 1   team_short_name  566 non-null    object
 2   clock            566 non-null    object
 3   s1               566 non-null    int64 
 4   s2               566 non-null    int64 
 5   lead             566 non-null    int64 
 6   tno              566 non-null    int64 
 7   period           566 non-null    int64 
 8   periodType       566 non-null    object
 9   pno              566 non-null    int64 
 10  player           566 non-null    object
 11  success          566 non-null    int64 
 12  actionType       566 non-null    object
 13  actionNumber     566 non-null    int64 
 14  previousAction   566 non-null    object
 15  qualifier        566 non-null    object
 16  subType          566 non-null    object
 17  scoring          566 non-null    in

In [6]:
# Load play by play
game_json = tools.get_json_data(game_id)

# Extract names of teams in the game
team_names = tools.get_team_names(game_json)
team_name_1, team_short_name_1 = team_names[0]
team_name_2, team_short_name_2 = team_names[1]

print(f"Game {team_name_1} ({team_short_name_1}) vs {team_name_2} ({team_short_name_2})")

# dataframe for all players of each team
starters_1 = tools.get_starters(game_json, 1)
starters_2 = tools.get_starters(game_json, 2)

starters_1

Game data loaded from local file: data-2087737.json
Game Melbourne United (United) vs Tasmania JackJumpers (JackJumpers)


{'C. Agada', 'J. Lual-Acuil Jr', 'J. White', 'M. Dellavedova', 'S. Ili'}

In [13]:
# Let's do some stats with the starting team
team = frozenset(starters_1)

pbp_stints_df = tools.pbp_get_ranges_df(pbp_df, stints1[team])
pbp_stints_df.loc[pbp_stints_df['period'] == 4]


  return pbp_df[pbp_get_ranges_mask(pbp_df, time_intervals)]


Unnamed: 0,team_name,team_short_name,clock,s1,s2,lead,tno,period,periodType,pno,player,success,actionType,actionNumber,previousAction,qualifier,subType,scoring
55,Melbourne United,United,00:02:42,64,71,-7,1,4,REGULAR,9,Y. Baba,1,substitution,690,,[],out,0
56,Melbourne United,United,00:02:42,64,71,-7,1,4,REGULAR,13,M. Dellavedova,1,substitution,691,,[],in,0
57,Tasmania JackJumpers,JackJumpers,00:02:42,64,71,-7,2,4,REGULAR,11,J. Adams,1,freethrow,689,,"[fastbreak, fromturnover]",2of2,1
58,Tasmania JackJumpers,JackJumpers,00:02:42,64,70,-6,2,4,REGULAR,11,J. Adams,1,freethrow,688,,"[fastbreak, fromturnover]",1of2,1
59,Tasmania JackJumpers,JackJumpers,00:02:42,64,69,-5,2,4,REGULAR,11,J. Adams,1,foulon,687,686.0,[],,0
60,Melbourne United,United,00:02:42,64,69,-5,1,4,REGULAR,16,C. Agada,1,foul,686,,"[shooting, 2freethrow]",personal,0
54,Melbourne United,United,00:02:24,64,71,-7,1,4,REGULAR,13,M. Dellavedova,0,2pt,695,,[pointsinthepaint],pullupjumpshot,1
52,Melbourne United,United,00:02:21,64,71,-7,1,4,REGULAR,6,J. White,0,2pt,697,,"[2ndchance, pointsinthepaint]",layup,1
53,Melbourne United,United,00:02:21,64,71,-7,1,4,REGULAR,6,J. White,1,rebound,696,695.0,[],offensive,0
51,Tasmania JackJumpers,JackJumpers,00:02:17,64,71,-7,2,4,REGULAR,14,M. McIntosh,1,rebound,698,697.0,[],defensive,0


In [11]:

start_lineup1 = tools.get_starters(game_json, 1)
stints1 = tools.pbp_stints_extract(pbp_df, start_lineup1, 1)

start_lineup2 = tools.get_starters(game_json, 2)
stints2 = tools.pbp_stints_extract(pbp_df, start_lineup2, 2)

stints1_df, pbp2_df = tools.pbp_add_stint_col(pbp_df, stints1, "stint1")
stints2_df, pbp3_df = tools.pbp_add_stint_col(pbp2_df, stints2, "stint2")

pbp_stats_df = pbp3_df.loc[(~pbp3_df['actionType'].isin(ACT_NON_STATS))]
pbp_stats_df = pbp_stats_df.loc[(~pbp3_df['subType'].isin(ACTSSUB_NON_STATS))]


# pbp_stats_df.loc[pbp_stats_df['stint1'] == -1]
pbp_stats_df

Unnamed: 0,team_name,team_short_name,clock,s1,s2,lead,tno,period,periodType,pno,player,success,actionType,actionNumber,previousAction,qualifier,subType,scoring,stint1,stint2
561,Melbourne United,United,00:09:56,0,0,0,1,1,REGULAR,10,J. Lual-Acuil Jr,1,jumpball,7,4,[],lost,0,1,1
562,Tasmania JackJumpers,JackJumpers,00:09:56,0,0,0,2,1,REGULAR,9,F. Krslovic,1,jumpball,6,4,[],won,0,1,1
560,Tasmania JackJumpers,JackJumpers,00:09:42,0,0,0,2,1,REGULAR,9,F. Krslovic,0,3pt,9,,[],jumpshot,1,1,1
559,Melbourne United,United,00:09:38,0,0,0,1,1,REGULAR,6,J. White,1,rebound,10,9,[],defensive,0,1,1
558,Melbourne United,United,00:09:31,0,0,0,1,1,REGULAR,6,J. White,0,3pt,11,,[],jumpshot,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,Melbourne United,United,00:00:19.200000,71,75,-4,1,4,REGULAR,5,S. Ili,1,foul,760,,[2freethrow],personal,0,12,19
5,Melbourne United,United,00:00:14.100000,71,76,-5,1,4,REGULAR,9,Y. Baba,0,3pt,768,,[],jumpshot,1,12,19
4,Tasmania JackJumpers,JackJumpers,00:00:11.600000,71,76,-5,2,4,REGULAR,14,M. McIntosh,1,rebound,769,768,[],defensive,0,12,19
3,Tasmania JackJumpers,JackJumpers,00:00:06.800000,71,76,-5,2,4,REGULAR,11,J. Adams,1,turnover,771,,[],badpass,0,12,19


In [24]:
pbp_stats_df.loc[4]

# teams

team_name          Tasmania JackJumpers
team_short_name             JackJumpers
clock                   00:00:11.600000
s1                                   71
s2                                   76
lead                                 -5
tno                                   2
period                                4
periodType                      REGULAR
pno                                  14
player                      M. McIntosh
success                               1
actionType                      rebound
actionNumber                        769
previousAction                      768
qualifier                            []
subType                       defensive
scoring                               0
stint1                               12
stint2                               19
Name: 4, dtype: object

In [25]:
actions = tools.pbp_get_actions(pbp_df)

actions

Unnamed: 0_level_0,subType
actionType,Unnamed: 1_level_1
2pt,layup
2pt,pullupjumpshot
2pt,drivinglayup
2pt,turnaroundjumpshot
2pt,floatingjumpshot
2pt,dunk
2pt,jumpshot
2pt,stepbackjumpshot
2pt,hookshot
2pt,fadeaway


In [29]:

# pbp_stints_stas_df = pbp_stints_df.loc[pbp_stints_df['tno'].isin(set({1,2})),['team_short_name','actionType', 'subType']]

# pbp_stints_stas_df

# # pbp_stints_stas_df
# pbp3_df = pbp3_df.loc[:, pbp3_df.columns.isin(['team_short_name', 'stint1', 'stint2', 'actionType', 'subType'])]

# https://pandas.pydata.org/docs/user_guide/reshaping.html
# https://nikgrozev.com/2015/07/01/reshaping-in-pandas-pivot-pivot-table-stack-and-unstack-explained-with-pictures/
pivot_stat = pbp_stats_df.pivot_table(index=['team_short_name', 'stint1'], columns=['actionType', 'subType'], values='actionNumber', aggfunc=len, fill_value=0)
# pivot_stat.info()
# pivot_stat.reset_index(inplace=True)

pivot_stat.columns = ['_'.join(col) for col in pivot_stat.columns.values]
pivot_stat

Unnamed: 0_level_0,Unnamed: 1_level_0,2pt_drivinglayup,2pt_dunk,2pt_fadeaway,2pt_floatingjumpshot,2pt_hookshot,2pt_jumpshot,2pt_layup,2pt_pullupjumpshot,2pt_stepbackjumpshot,2pt_turnaroundjumpshot,...,jumpball_won,rebound_defensive,rebound_offensive,rebound_offensivedeadball,steal_,timeout_full,turnover_badpass,turnover_ballhandling,turnover_offensive,turnover_outofbounds
team_short_name,stint1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
JackJumpers,1,2,0,0,0,0,2,3,2,0,0,...,1,7,4,0,4,0,0,1,2,0
JackJumpers,2,3,0,0,0,2,1,1,1,0,0,...,0,4,4,0,0,1,0,0,0,0
JackJumpers,3,0,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
JackJumpers,4,0,0,0,0,0,0,1,0,0,1,...,0,0,0,0,2,0,0,0,0,0
JackJumpers,5,0,0,0,0,0,0,4,0,0,0,...,0,0,2,0,1,0,0,1,0,0
JackJumpers,6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
JackJumpers,7,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
JackJumpers,8,0,0,0,1,0,0,0,0,0,0,...,0,1,1,0,1,0,0,0,0,0
JackJumpers,9,0,0,1,0,0,0,1,0,0,0,...,0,2,0,0,0,0,0,0,0,0
JackJumpers,10,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [30]:
pivot_stat.to_csv("stint1_stats.csv", index=False)

In [27]:
# pd.merge(pivot_stat, stints1_df, left_on="stint1", right_on="id", how="left")




Unnamed: 0,"(2pt, drivinglayup)","(2pt, dunk)","(2pt, fadeaway)","(2pt, floatingjumpshot)","(2pt, hookshot)","(2pt, jumpshot)","(2pt, layup)","(2pt, pullupjumpshot)","(2pt, stepbackjumpshot)","(2pt, turnaroundjumpshot)",...,"(substitution, in)","(substitution, out)","(timeout, full)","(turnover, badpass)","(turnover, ballhandling)","(turnover, offensive)","(turnover, outofbounds)",id,lineup,intervals
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,,,
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1.0,"(J. White, C. Agada, J. Lual-Acuil Jr, M. Dell...","[(1, 00:10:00, 00:05:53), (2, 00:10:00, 00:07:..."
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,15.0,"(M. Peatling, Y. Baba, C. Agada, J. Lual-Acuil...","[(3, 00:01:46, 00:00:00), (4, 00:10:00, 00:09:..."
3,2,0,0,0,0,2,3,2,0,0,...,5,5,0,0,1,2,0,1.0,"(J. White, C. Agada, J. Lual-Acuil Jr, M. Dell...","[(1, 00:10:00, 00:05:53), (2, 00:10:00, 00:07:..."
4,3,0,0,0,2,1,1,1,0,0,...,5,5,1,0,0,0,0,2.0,"(J. White, Y. Baba, C. Agada, J. Lual-Acuil Jr...","[(1, 00:05:53, 00:05:05), (2, 00:07:36, 00:04:..."
5,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,3.0,"(J. White, Y. Baba, B. Newley, J. Lual-Acuil J...","[(1, 00:05:05, 00:04:29)]"
6,0,0,0,0,0,0,1,0,0,1,...,4,4,0,0,0,0,0,4.0,"(M. Peatling, B. Newley, Y. Baba, J. Lual-Acui...","[(1, 00:04:29, 00:03:26)]"
7,0,0,0,0,0,0,4,0,0,0,...,0,0,0,0,1,0,0,5.0,"(M. Peatling, Y. Baba, B. Newley, A. Hukporti,...","[(1, 00:03:26, 00:01:16)]"
8,0,0,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,6.0,"(J. White, B. Newley, C. Agada, A. Hukporti, S...","[(1, 00:01:16, 00:00:55.700000)]"
9,1,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,7.0,"(J. White, C. Agada, M. Dellavedova, A. Hukpor...","[(1, 00:00:55.700000, 00:00:00), (2, 00:10:00,..."
