In [None]:
# Load the packages required to run the report algorithms
library(lubridate)
library(magrittr)
library(tidyverse)
library(plotly)
library(DT)

In [7]:
### Input Variables
# The following variables determine the teams and time period for analysis in the report and control the appearances of the report visualizations. 
# Adjust the team, opponent, startDate, and endDate variables before running the report based on your desired teams and time period for analysis.
team <- "New Zealand"
opponent <- "Australia"
startDate <- "2014-01-01"
startYear <- year(startDate)
endDate <- "2019-12-31" 
endYear <- year(endDate)

# Adjust the color values before running the report based on your desired appearances for the report visualizations.
pal <- c("#A369EC", "#FF8267")
pal <- setNames(pal, c(team, opponent))


In [8]:
# Load the complete aggregated dataset
data <- read.csv("https://ludis-rugby.s3.amazonaws.com/clean-data/CLEAN_CompleteTeam.csv")

# Format the dataset
data$Date <- as.Date(data$Date)
data$Team <- as.character(data$Team)
data$Opponent <- as.character(data$Opponent)


# Team Matchup History
{{team}} v. {{opponent}}, {{startYear}} - {{endYear}}

In [9]:
### Match Details Algorithm
# This function produces a table that lists the match details of all of the matches between a team and opponent passed in that occur between a date range passed in
# - The team is passed in as a string as the name of a team
# - The opponent is passed in as a string as a name of an opponent
# - The startDate is passed in as a string as the oldest date requested in the date range
# - The endDate is passed in as a string as the more recent date requested in the date range. 
#   - The startDate and endDate combine to create the date range
matchStats <- function(team, opponent, startDate, endDate){
  
  # This will filter the dataset to just those matches with the team and opponent passed in
  dataset <- data %>%
    filter(Team == team & Opponent == opponent)
  
  # If there are no matches between the team and opponent passed in, an error message is returned
  if(nrow(dataset) < 1){
    return("Error. No data on this team matchup.")
  }
  
  # Filter the dataset to just those matches that occur during the start and end data passed in
  dataset <- dataset %>%
    filter(Date >= as.Date(startDate) & Date <= as.Date(endDate))
  
  # If the are no matches between the start and end date passed in, an error message is returned
  if(nrow(dataset) < 1){
    return("Error. There are not matches between these teams in this date range.")
  }
  
  # Reduces the dataset to just the variables we want in the output of the table
  result <- dataset %>%
    select(Team, Opponent, Outcome, Date, Tournament, Location, Referee)
  
  result <- result[order(result$Date, decreasing = TRUE),]
  rownames(result) <- 1:nrow(result)
    
    # Displays the result table
 # result <- DT::datatable(as.data.frame(result), selection = "single", rownames = FALSE,
  #              options = list(paging = FALSE,
   #                            info = FALSE))  
  # Returns the result table
  return(result)
}


In [10]:
### Chart Visualizations Algorithm
# This function produces a set of chart visualizations for the match period between a team and opponent passed in that occur between a date range passed in 
# - The team is passed in as a string as the name of a team
# - The opponent is passed in as a string as a name of an opponent
# - The startDate is passed in as a string as the oldest date requested in the date range
# - The endDate is passed in as a string as the more recent date requested in the date range. The startDate and endDate combine to create the date range
# The visualizations produced are: 
# - A pie chart that details the proportion of carries by each team
# - A set of four pie charts that details the proportaion of lineouts and scrums won and lost by each team
# - Two pie charts that detail the proportions of rucks won and lost by each team
# - Two pie charts that detail the proportion of tackles made and missed by each team
# - A horizontal bar chart that shows the proportion of clean breaks, offloads, defeders beaten by each team side by side 
# - A horizontal bar chart that shows the number of yellow cards, red cards, and penalties from each team
matchStatVis <- function(team, opponent, startDate, endDate){
  # This will filter the dataset to just those matches with the team and opponent passed in where the team is "team" and the opponent is "opponent". This will provide the statistics for the team passed in
  dataset1 <- data %>%
    filter(Team == team & Opponent == opponent)
  
  # This will filter the dataset to just those matches with the team and opponent passed in where the team is "opponent" and the opponent is "team". This will provide the statistics for the oppponent passed in
  dataset2 <- data %>%
    filter(Team == opponent & Opponent == team)
  
  # Binds the datasets for the stats on the team and the stats on the opponent
  dataset <- rbind(dataset1, dataset2)
  
  # If there are no matches between the team and opponent passed in, an error message is returned
  if(nrow(dataset) < 1){
    return("Error. No data on this team matchup.")
  }
  
  # Filter the dataset to just those matches that occur during the start and end data passed in
  dataset <- dataset %>%
    filter(Date >= as.Date(startDate) & Date <= as.Date(endDate))
  
  # If the are no matches between the start and end date passed in, an error message is returned
  if(nrow(dataset) < 1){
    return("Error. There are not matches between these teams in this date range.")
  }
  
  # Creates a pie chart that details the proportion of carries by each team
  carriesVis <- plot_ly(dataset, labels = c(team, opponent), values = c(sum(dataset$Carries[which(dataset$Team == team)], na.rm = TRUE), sum(dataset$Carries[which(dataset$Team == opponent)], na.rm = TRUE)), type = 'pie', marker = list(colors = pal, color = ~Team))
  carriesVis <- carriesVis %>% layout(title = list(text = paste("Share of Carries"),
                                                   font = list(size = 14),
                                                   x = 0.1),
                                      showlegend = FALSE) %>% 
    config(displaylogo = FALSE)
  
  # TODO: split setpieceVis into scrumVis and lineoutVis, format like tacklesVis
  # creates two pie charts that detail the proportions of scrums won and lost by each team
  scrumsVis <- plot_ly(dataset, labels = c("Scrums Won", "Scrums Lost"), values = c(mean(dataset$Scrums.Won[dataset$Team == team], na.rm = TRUE), mean(dataset$Scrums.Lost[dataset$Team == team], na.rm = TRUE)), type = "pie", marker = list(colors = c("#A369EC", "black")), domain = list(x = c(0, 0.45)), name = team) %>%
    add_trace(labels = c("Scrums Won", "Scrums Lost"), values = c(mean(dataset$Scrums.Won[dataset$Team == opponent], na.rm = TRUE), mean(dataset$Scrums.Lost[dataset$Team == opponent], na.rm = TRUE)), type = "pie", marker = list(colors = c("#FF8267", "#black")), domain = list(x = c(0.55, 1)), name = opponent) %>%
    layout(annotations = list(text = paste(team),
                              x = 0.225,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))  %>%
    layout(annotations = list(text = paste(opponent),
                              x = 0.775,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))
  scrumsVis <- scrumsVis %>% layout(title = list(text = paste("Scrum Success"),
                                               font = list(size = 14),
                                               x = 0.1),
                                  showlegend = FALSE) %>%
    config(displaylogo = FALSE)
  
  # creates two pie charts that detail the proportions of lineouts won and lost by each team
  lineoutsVis <- plot_ly(dataset, labels = c("Lineouts Won", "Lineouts Lost"), values = c(mean(dataset$Lineouts.Won[dataset$Team == team], na.rm = TRUE), mean(dataset$Lineouts.Lost[dataset$Team == team], na.rm = TRUE)), type = "pie", marker = list(colors = c("#A369EC", "black")), domain = list(x = c(0, 0.45)), name = team) %>%
    add_trace(labels = c("Lineouts Won", "Lineouts Lost"), values = c(mean(dataset$Lineouts.Won[dataset$Team == opponent], na.rm = TRUE), mean(dataset$Lineouts.Lost[dataset$Team == opponent], na.rm = TRUE)), type = "pie", marker = list(colors = c("#FF8267", "#black")), domain = list(x = c(0.55, 1)), name = opponent) %>%
    layout(annotations = list(text = paste(team),
                              x = 0.225,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))  %>%
    layout(annotations = list(text = paste(opponent),
                              x = 0.775,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))
  lineoutsVis <- lineoutsVis %>% layout(title = list(text = paste("Lineout Success"),
                                                font = list(size = 14),
                                                x = 0.1),
                                   showlegend = FALSE) %>%
    config(displaylogo = FALSE)
  
  # creates two pie charts that details the proportions of rucks won and lost by each team
  rucksVis <- plot_ly(dataset, labels = c("Rucks Won", "Rucks Lost"), values = c(mean(dataset$Rucks.Won[dataset$Team == team], na.rm = TRUE), mean(dataset$Rucks.Lost[dataset$Team == team], na.rm = TRUE)), type = "pie", marker = list(colors = c("#A369EC", "black")), domain = list(x = c(0, 0.45)), name = team) %>%
    add_trace(labels = c("Rucks Won", "Rucks Lost"), values = c(mean(dataset$Rucks.Won[dataset$Team == opponent], na.rm = TRUE), mean(dataset$Rucks.Lost[dataset$Team == opponent], na.rm = TRUE)), type = "pie", marker = list(colors = c("#FF8267", "#black")), domain = list(x = c(0.55, 1)), name = opponent) %>%
    layout(annotations = list(text = paste(team),
                              x = 0.225,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))  %>%
    layout(annotations = list(text = paste(opponent),
                              x = 0.775,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))
  rucksVis <- rucksVis %>% layout(title = list(text = paste("Ruck Success"),
                                               font = list(size = 14),
                                               x = 0.1),
                                  showlegend = FALSE) %>%
    config(displaylogo = FALSE)
  
  # creates two pie charts that details the proportion of tackles made and missed by each team
  tacklesVis <- plot_ly(dataset, labels = c("Tackles", "Tackles Missed"), values = c(mean(dataset$Tackles[dataset$Team == team], na.rm = TRUE), mean(dataset$Tackles.Missed[dataset$Team == team], na.rm = TRUE)), name = team, type = "pie", marker = list(colors = c("#A369EC", "black")), domain = list(x = c(0, 0.45))) %>%
    add_trace(labels = c("Tackles", "Tackles Missed"), values = c(mean(dataset$Tackles[dataset$Team == opponent], na.rm = TRUE), mean(dataset$Tackles.Missed[dataset$Team == opponent], na.rm = TRUE)), name = opponent, type = "pie", marker = list(colors = c("#FF8267", "black")), domain = list(x = c(0.55, 1)))  %>%
    layout(annotations = list(text = paste(team),
                              x = 0.225,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))  %>%
    layout(annotations = list(text = paste(opponent),
                              x = 0.775,
                              y = -0.025,
                              xanchor = "center",
                              yanchor = "middle",
                              showarrow = FALSE,
                              font = list(size = 12)))
  tacklesVis <- tacklesVis %>% layout(title = list(text = paste("Tackle Success"),
                                                   font = list(size = 14),
                                                   x = 0.1),
                                      showlegend = FALSE) %>% 
    config(displaylogo = FALSE)
  
  # creates a horizontal bar chart that show the proportion of clean breaks, offloads, defenders beaten, and passes by each team side by side
  attackVis <- plot_ly(x = c(sum(dataset$Clean.Breaks[which(dataset$Team == team)], na.rm = TRUE), sum(dataset$Offloads[which(dataset$Team == team)], na.rm = TRUE), sum(dataset$Defenders.Beaten[which(dataset$Team == team)], na.rm = TRUE), sum(dataset$Passes[which(dataset$Team == team)], na.rm = TRUE)), y = c("Clean Breaks", "Offloads", "Defenders Beaten", "Passes"), name = team, type = 'bar', marker = list(color = "#A369EC"), orientation = "h")
  attackVis <- attackVis %>% add_trace(x = c(sum(dataset$Clean.Breaks[which(dataset$Team == opponent)], na.rm = TRUE), sum(dataset$Offloads[which(dataset$Team == opponent)], na.rm = TRUE), sum(dataset$Defenders.Beaten[which(dataset$Team == opponent)], na.rm = TRUE), sum(dataset$Passes[which(dataset$Team == opponent)], na.rm = TRUE)), name = opponent, type = 'bar', marker = list(color = "#FF8267"), orientation = "h")
  attackVis <- attackVis %>% layout(xaxis = list(title = "Count"),
                                    barmode = 'group',
                                    showlegend = FALSE) %>% 
    config(displaylogo = FALSE)
  
  # creates a horizontal bar chart that shows the number of yellow cards, red cards, and penalties from each team
  disciplineVis <- plot_ly(x = c(sum(dataset$Yellow.Cards[which(dataset$Team == team)], na.rm = TRUE), sum(dataset$Red.Cards[which(dataset$Team == team)], na.rm = TRUE), sum(dataset$Penalties.Conceded[which(dataset$Team == team)], na.rm = TRUE)), y = c("Yellow Cards", "Red Cards", "Penalties Conceded"), type = 'bar',  marker = list(color = "#A369EC"), orientation = "h", name = team)
  disciplineVis <- disciplineVis %>% add_trace(x = c(sum(dataset$Yellow.Cards[which(dataset$Team == opponent)], na.rm = TRUE), sum(dataset$Red.Cards[which(dataset$Team == opponent)], na.rm = TRUE), sum(dataset$Penalties.Conceded[which(dataset$Team == opponent)], na.rm = TRUE)), name = opponent, type = 'bar',  marker = list(color = "#FF8267"), orientation = "h")
  disciplineVis <- disciplineVis %>% layout(xaxis = list(title = "Count"),
                                            barmode = 'group',
                                            showlegend = FALSE) %>% 
    config(displaylogo = FALSE)

  # returns all of the visualizations in a list
  return(list(carriesVis, scrumsVis, lineoutsVis, rucksVis, tacklesVis, attackVis, disciplineVis))
}

In [11]:
# Run the matchStats algorithm using the input variables to get the table of match details
matchup <- matchStats(team = team, opponent = opponent, startDate = startDate, endDate = endDate)

# Get the number of games between the team and opponent during this time period from the table
numgames <- nrow(matchup)


### {{team}} and {{opponent}} have played {{numgames}} games from {{startYear}} to {{endYear}}.
{{team}} has won x games. {{opponent}} has won x games.

In [12]:
# Display the table
matchup


Unnamed: 0_level_0,Team,Opponent,Outcome,Date,Tournament,Location,Referee
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<date>,<fct>,<fct>,<fct>
1,New Zealand,Australia,31-0,2019-08-16,Internationals,Eden Park,Jaco Peyper
2,New Zealand,Australia,26-47,2019-08-09,The Rugby Championship,Optus Stadium,Jérôme Garcès
3,New Zealand,Australia,37-20,2018-10-26,Internationals,Nissan Stadium,Romain Poite
4,New Zealand,Australia,40-12,2018-08-24,The Rugby Championship,Eden Park,Wayne Barnes
5,New Zealand,Australia,38-13,2018-08-17,The Rugby Championship,ANZ Stadium,Jaco Peyper
6,New Zealand,Australia,18-23,2017-10-20,Internationals,Suncorp Stadium,Wayne Barnes
7,New Zealand,Australia,35-29,2017-08-25,The Rugby Championship,Forsyth Barr Stadium,Nigel Owens
8,New Zealand,Australia,54-34,2017-08-18,The Rugby Championship,ANZ Stadium,Wayne Barnes
9,New Zealand,Australia,37-10,2016-10-21,Internationals,Eden Park,Nigel Owens
10,New Zealand,Australia,29-9,2016-08-26,The Rugby Championship,Westpac Stadium,Romain Poite


In [13]:
# Run the matchStatVis algorithm using the input variables to get the set of chart visualizations
matchupStats <- matchStatVis(team = team, opponent = opponent, startDate = startDate, endDate = endDate)


In [None]:
matchupStats[[1]]
matchupStats[[2]]
matchupStats[[3]]
matchupStats[[4]]
matchupStats[[5]]
matchupStats[[6]]
matchupStats[[7]]
list(carriesVis, scrumsVis, lineoutsVis, rucksVis, tacklesVis, attackVis, disciplineVis))


### Attack

In [None]:
matchupStats[[6]]
matchupStats[[1]]
matchupStats[[4]]


### Defense

In [None]:
matchupStats[[5]]


### Set Piece

In [None]:
matchupStats[[2]]
matchupStats[[3]]


### Discipline

In [None]:
matchupStats[[7]]
