In [0]:
install.packages("devtools", repos="https://cran.stat.unipd.it/")
install.packages("vcd")
install.packages("plotly")
install.packages("lsr")
install.packages("tidygraph")
install.packages("CINNA")

In [0]:
devtools::install_github("sndmrc/BasketballAnalyzeR") 

In [0]:
 library(BasketballAnalyzeR)

# CHAPTER 2 
# Data and Basic Statistical Analyses 


In [0]:
PbP <- PbPmanipulation(PbP.BDB)

In [0]:
rm(list=ls())
tm <- c("BOS","CLE","GSW","HOU")
selTeams <- which(Tadd$team %in% tm)
FF.sel <- fourfactors(Tbox[selTeams,], Obox[selTeams,])
plot(FF.sel)

In [0]:
FF <- fourfactors(Tbox,Obox)
listPlots <- plot(FF)
library(gridExtra)
grid.arrange(grobs=listPlots[1:2], ncol=1)

In [0]:
rm(list=ls())
X <- data.frame(Tbox, PTS.O=Obox$PTS, TOV.O=Obox$TOV,
CONF=Tadd$Conference)
XW <- subset(X, CONF=="W")
labs <- c("Steals","Blocks","Defensive Rebounds")
barline(data=XW, id="Team", bars=c("STL","BLK","DREB"),
line="TOV.O", order.by="PTS.O", labels.bars=labs)
Pbox.HR <- subset(Pbox, Team=="Houston Rockets" &
 MIN>=500)
barline(data=Pbox.HR, id="Player",
bars=c("P2p","P3p","FTp"), line="MIN",
order.by="PM", labels.bars=c("2P%","3P%","FT%"),
title="Houston Rockets")

In [0]:
rm(list=ls())
Pbox.PG <- subset(Pbox, Player=="Russell Westbrook" |
 Player=="Stephen Curry" |
 Player=="Chris Paul" |
 Player=="Kyrie Irving" |
 Player=="Damian Lillard" |
 Player=="Kyle Lowry" |
 Player=="John Wall" |
 Player=="Rajon Rondo" |
 Player=="Kemba Walker")
attach(Pbox.PG)
X <- data.frame(P2M, P3M, FTM, REB=OREB+DREB, AST,
STL, BLK)/MIN
detach(Pbox.PG)
radialprofile(data=X, title=Pbox.PG$Player, std=FALSE)
radialprofile(data=X, title=Pbox.PG$Player, std=TRUE) 

In [0]:
rm(list=ls())
Pbox.sel <- subset(Pbox, MIN>= 500)
attach(Pbox.sel)
X <- data.frame(AST, TOV, PTS)/MIN
detach(Pbox.sel)
mypal <- colorRampPalette(c("blue","yellow","red"))
scatterplot(X, data.var=c("AST","TOV"), z.var="PTS",
labels=1:nrow(X), palette=mypal)
SAS <- which(Pbox.sel$Team=="San Antonio Spurs")
scatterplot(X, data.var=c("AST","TOV"), z.var="PTS",
labels=Pbox.sel$Player, palette=mypal,
subset=SAS)
SAS <- which(Pbox.sel$Team=="San Antonio Spurs")
scatterplot(X, data.var=c("AST","TOV"), z.var="PTS",
labels=Pbox.sel$Player, palette=mypal,
subset=SAS, zoom=c(0.20,0.325,0.05,0.10))

In [0]:
rm(list=ls())
attach(Tbox)
X <- data.frame(T=Team, P2p, P3p, FTp, AS=P2A+P3A+FTA)
detach(Tbox)
labs <- c("2-point shots (% made)",
 "3-point shots (% made)",
 "free throws (% made)",
 "Total shots attempted")
bubbleplot(X, id="T", x="P2p", y="P3p", col="FTp",
 size="AS", labels=labs)
Pbox.GSW.CC <- subset(Pbox,
 (Team=="Golden State Warriors" |
 Team =="Cleveland Cavaliers") &
 MIN>=500)
attach(Pbox.GSW.CC)
X <- data.frame(ID=Player, Team, V1=DREB/MIN, V2=STL/MIN,
V3=BLK/MIN, V4=MIN)
detach(Pbox.GSW.CC)
labs <- c("Defensive Rebounds","Steals","Blocks",
 "Total minutes played")
bubbleplot(X, id="ID", x="V1", y="V2", col="V3",
 size="V4", text.col="Team", labels=labs,
 title="GSW and CC during the regular season",
 text.legend=TRUE, text.size=3.5, scale=FALSE)

In [0]:
rm(list=ls())
Pbox.OKC <- subset(Pbox, Team=="Oklahoma City Thunder"
 & MIN>=500)
vrb1 <- variability(data=Pbox.OKC, data.var="P3p",
size.var="P3A")
vrb1 <- variability(data=Pbox.OKC, data.var="P3p",
size.var="P3A",weight=TRUE)
vrb2 <- variability(data=Pbox.OKC,
data.var=c("P2p","P3p","FTp"),
size.var=c("P2A","P3A","FTA"),
weight=TRUE)
plot(vrb2, title="Variability diagram - OKC")

In [0]:
rm(list=ls())
Pbox.BN <- subset(Pbox, Team=="Brooklyn Nets")
ineqBN <- inequality(Pbox.BN$PTS, nplayers=8)
Pbox.MB <- subset(Pbox, Team=="Milwaukee Bucks")
ineqMB <- inequality(Pbox.MB$PTS, nplayers=8)
library(gridExtra)
p1 <- plot(ineqBN, title="Brooklyn Nets")
p2 <- plot(ineqMB, title="Milwaukee Bucks")
grid.arrange(p1, p2, nrow=1)
no.teams <- nrow(Tbox)
INEQ <- array(0, no.teams)
for (k in 1:no.teams) {
Teamk <- Tbox$Team[k]
Pbox.sel <- subset(Pbox, Team==Teamk)
index <- inequality(Pbox.sel$PTS, npl=8)
INEQ[k] <- index$Gini
 }
dts <- data.frame(INEQ, PTS=Tbox$PTS,
 CONF=Tadd$Conference)
mypal <- colorRampPalette(c("blue","red"))
scatterplot(dts, data.var=c("INEQ","PTS"), z.var="CONF",
labels=Tbox$Team, palette=mypal,
repel_labels=TRUE)

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
PbP.GSW <- subset(PbP, team="GSW")
lineup <- c("Stephen Curry", "Kevin Durant",
 "Klay Thompson", "Draymond Green",
 "Zaza Pachulia")
filt5 <- apply(PbP.GSW[, 4:13], 1,
 function(x) {
 x <- as.character(x)
 sum(x %in% lineup)==5
 })
subPbP.GSW <- PbP.GSW[filt5, ]
PTS5 <- sapply(lineup,
 function(x) {
 filt <- subPbP.GSW$player==x
 sum(subPbP.GSW$points[filt], na.rm=T)
 })
inequality(PTS5,nplayer=5)

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
PbP.GSW.DET <- subset(PbP, team=="GSW" & oppTeam=="DET")
lineup <- c("Stephen Curry", "Kevin Durant",
 "Klay Thompson", "Draymond Green",
 "Zaza Pachulia")
filt5 <- apply(PbP.GSW.DET[, 4:13], 1,
 function(x) {
 x <- as.character(x)
 sum(x %in% lineup)==5
 })
subPbP.GSW.DET <- PbP.GSW.DET[filt5, ]
PTS5 <- sapply(lineup,
 function(x) {
 filt <- subPbP.GSW.DET$player==x
 sum(subPbP.GSW.DET$points[filt], na.rm=T)
 })
inequality(PTS5,nplayer=5)

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
subdata <- subset(PbP, player=="Kevin Durant")
subdata$xx <- subdata$original_x/10
subdata$yy <- subdata$original_y/10-41.75
shotchart(data=subdata, x="xx", y="yy", type=NULL,
 scatter=TRUE)
shotchart(data=subdata, x="xx", y="yy", z="result", type=NULL,
 scatter=TRUE)
shotchart(data=subdata, x="xx", y="yy", z="playlength",
 num.sect=5, type="sectors", scatter = TRUE)
shotchart(data=subdata, x="xx", y="yy", z="playlength",
 num.sect=5, type="sectors", scatter=FALSE, result="result")

# CHAPTER 3 
# Discovering Patterns in Data 

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
PbP.GSW <- subset(PbP, team=="GSW")
ev <- c("ejection","end of period","jump ball",
"start of period","unknown","violation",
"timeout","sub","foul","turnover")
event.unsel <- which(PbP.GSW$event_type %in% ev)
PbP.GSW.ev <- PbP.GSW[-event.unsel,]
attach(PbP.GSW.ev)
T <- table(oppTeam, event_type, exclude=ev)
detach(PbP.GSW.ev)
library(vcd)
assocstats(T)

In [0]:
rm(list=ls())
library(dplyr)
library(lsr)
library(tibble)
FF <- fourfactors(Tbox, Obox)
attach(Tbox)
attach(FF)
X <- data.frame(PTS, P2M, P3M, FTM, REB=OREB+DREB, AST,
STL, BLK, ORtg, DRtg)
detach(Tbox)
detach(FF)
Playoff <- Tadd$Playoff
eta <- sapply(X, function(Y){
 cm <- round(tapply(Y, Playoff, mean), 1)
 eta2 <- etaSquared(aov(Y~Playoff))[1]*100
 c(cm, round(eta2, 2))
}) %>%
 t() %>%
 as.data.frame() %>%
 rename(No=N, Yes=Y, eta2=V3) %>%
 rownames_to_column('rownm') %>%
 arrange(-eta2) %>%
 column_to_rownames('rownm')

In [0]:
rm(list=ls())
data <- subset(Pbox, MIN>=500)
attach(data)
X <- data.frame(AST, TOV)/MIN
detach(data)
cor(X$AST, X$TOV)
cor(rank(X$AST), rank(X$TOV))
cor(X$AST, X$TOV, method="spearman")
cor(X)

In [0]:
rm(list=ls())
data <- merge(Pbox, Tadd, by="Team")
data <- subset(data, MIN >= 500)
attach(data)
X <- data.frame(PTS, P3M, P2M, REB=(OREB+DREB), AST,
 TOV, STL, BLK)/MIN
X <- data.frame(X, Playoff=Playoff)
detach(data)
corrmatrix <- corranalysis(X[,1:8], threshold=0.5)
plot(corrmatrix)
scatterplot(X, data.var=1:8, z.var="Playoff",
diag=list(continuous="blankDiag"))

In [0]:
rm(list=ls())
attach(Pbox)
data <- data.frame(PTS, P3M, P2M, REB=OREB+DREB,
 AST, TOV, STL, BLK)
detach(Pbox)
data <- subset(data, Pbox$MIN>=1500)
id <- Pbox$Player[Pbox$MIN>=1500]
mds <- MDSmap(data)
plot(mds, labels=id)
selp <- which(id=="Al Horford" | id=="Kyle Korver" |
 id=="Myles Turner" | id=="Kyle Kuzma" |
 id=="Andrew Wiggins")
plot(mds, labels=id, subset=selp, col.subset="tomato")
plot(mds, labels=id, subset=selp, col.subset="tomato",
zoom=c(0,3,0,2))
plot(mds, z.var=c("P2M","P3M","AST","REB"),
level.plot=FALSE, palette=topo.colors)
plot(mds, z.var=c("P2M","P3M","AST","REB"),
contour=TRUE, palette=topo.colors)

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
PbP.GSW <- subset(PbP, team=="GSW")
netdata <- assistnet(PbP.GSW)
netdata
#RNGkind(sample.kind="Rounding")
set.seed(7)
plot(netdata)
plot(netdata, layout="circle", edge.thr=20)
cols <- paste0(c("a","h"), rep(1:5,each=2))
PbP.GSW.DG0 <- PbP.GSW[!apply(PbP.GSW[,cols], 1, "%in%",
 x="Draymond Green"),]
netdata.DG0 <- assistnet(PbP.GSW.DG0)
set.seed(1)
plot(netdata.DG0)
PbP.GSW.DG0 <- subset(PbP.GSW.DG0,
 ShotType=="2P" | ShotType=="3P")
p0 <- mean(PbP.GSW.DG0$points)
pl0 <- mean(PbP.GSW.DG0$playlength)
PbP.GSW.DG1 <- PbP.GSW[apply(PbP.GSW[,cols], 1, "%in%",
 x="Draymond Green"),]
PbP.GSW.DG1 <- subset(PbP.GSW.DG1,
 ShotType=="2P" | ShotType=="3P")
p1 <- mean(PbP.GSW.DG1$points)
pl1 <- mean(PbP.GSW.DG1$playlength)
plot(netdata, layout="circle", edge.thr=20,
node.col="FGPTS_AST", node.size="ASTPTS")
plot(netdata, layout="circle", edge.thr=20,
node.col="FGPTS", node.size="FGPTS_ASTp")

In [0]:
TAB <- netdata$assistTable
X <- netdata$nodeStats
names(X)[1] <- "Player"
data <- merge(X, Pbox, by="Player")
mypal <- colorRampPalette(c("blue","yellow","red"))
scatterplot(data, data.var=c("FGM","FGM_ASTp"),
z.var="MIN", labels=data$Player,
palette=mypal, repel_labels=TRUE)


In [0]:
sel <- which(data$MIN > 984)
tab <- TAB[sel,sel]
no.pl <- nrow(tab)
pR <- pM <- vector(no.pl, mode="list")
GiniM <- array(NA, no.pl)
GiniR <- array(NA, no.pl)
for (pl in 1:no.pl) {
ineqplM <- inequality(tab[pl,], npl=no.pl)
GiniM[pl] <- ineqplM$Gini
ineqplR <- inequality(tab[,pl], npl=no.pl)
GiniR[pl] <- ineqplR$Gini
title <- rownames(tab)[pl]
pM[[pl]] <- plot(ineqplM, title=title)
pR[[pl]] <- plot(ineqplR, title=title)
 }
library(gridExtra)
grid.arrange(grobs=pM, nrow=2)
grid.arrange(grobs=pR, nrow=2)

In [0]:
library(vcd)
assocstats(tab)

In [0]:
XX <- data.frame(X[sel,], GiniM, GiniR)
labs <- c("Gini Index for assists made",
 "Gini Index for assists received",
 "Assists received", "Assists made")
bubbleplot(XX, id="Player", x="GiniM", y="GiniR",
 col="FGM_AST", size="AST",
 labels=labs, text.size=4)

In [0]:
library(tidygraph)
library(igraph)
library(CINNA)
net1 <- as_tbl_graph(netdata$assistNet)
class(net1) <- "igraph"
centr_degree(net1)
alpha_centrality(net1)
closeness(net1, mode="all")
betweenness(net1)
calculate_centralities(net1)

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
data.team <- subset(PbP, team=="GSW" & result!="")
data.opp <- subset(PbP, team!="GSW" & result!="")
densityplot(data=data.team, shot.type="2P",
var="periodTime", best.scorer=TRUE)
densityplot(data=data.team, shot.type="2P",
var="totalTime", best.scorer=TRUE)
densityplot(data=data.team, shot.type="2P",
var="playlength", best.scorer=TRUE)
densityplot(data=data.team, shot.type="2P",
var="shot_distance", best.scorer=TRUE)
densityplot(data=data.opp, shot.type="2P",
var="periodTime", best.scorer=TRUE)
densityplot(data=data.opp, shot.type="2P",
var="totalTime",best.scorer=TRUE)
densityplot(data=data.opp, shot.type="2P",
var="playlength", best.scorer=TRUE)
densityplot(data=data.opp, shot.type="2P",
var="shot_distance", best.scorer=TRUE)
KD <- subset(PbP, player=="Kevin Durant" & result!="")
SC <- subset(PbP, player=="Stephen Curry" & result!="")
densityplot(data=KD, shot.type="field",
var="playlength")
densityplot(data=KD, shot.type="field",
var="shot_distance")
densityplot(data=SC, shot.type="field",
var="playlength")
densityplot(data=SC, shot.type="field",
var="shot_distance")

In [0]:
rm(list=ls())
PbP <- PbPmanipulation(PbP.BDB)
PbP$xx <- PbP$original_x/10
PbP$yy <- PbP$original_y/10 - 41.75
KT <- subset(PbP, player=="Klay Thompson")
shotchart(data=KT, x="xx", y="yy",
 type="density-polygons")
shotchart(data=KT, x="xx", y="yy", type="density-raster")
shotchart(data=KT, x="xx", y="yy", type="density-hexbin")
shotchart(data=KT, x="xx", y="yy",
 type="density-polygons", scatter=TRUE)
shotchart(data=KT, x="xx", y="yy", type="density-raster",
 scatter=TRUE, pt.col="tomato", pt.alpha=0.1)
shotchart(data=KT, x="xx", y="yy", type="density-hexbin",
 nbins=50, palette="bwr")

In [0]:
rm(list=ls())
data <- subset(Pbox, MIN>=500)
attach(data)
X <- data.frame(PTS, P3M, P2M, REB=OREB+DREB, AST)/MIN
detach(data)
scatterplot(X, data.var=1:5,
lower=list(continuous="density"),
diag=list(continuous="densityDiag"))