Skip to content

Commit

Permalink
App v1
Browse files Browse the repository at this point in the history
  • Loading branch information
kellobri committed Nov 6, 2018
0 parents commit 9591444
Show file tree
Hide file tree
Showing 8 changed files with 290 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.Rhistory
.Rproj.user/
80 changes: 80 additions & 0 deletions app.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#
# Shiny in production course
#

library(shiny)
library(shinythemes)
library(glue)
library(lime)
library(billboarder)

pred <- readRDS("lime_prediction_results.RDS")

ui <- fluidPage(
theme = shinytheme("simplex"),
titlePanel("RStudio Conf 2019 - Shiny in Production Workshop"),

sidebarLayout(
sidebarPanel(
selectInput("student", "Student Lookup:", choices = application_data$ID),
htmlOutput("risk"),
hr(),
htmlOutput("major"),
htmlOutput("minor"),
hr(),
HTML('<center><img src="rstudio.png"></center>'),
HTML('<center><p>Shiny in Production Workshop 2019</p></center>')
),

# Show a plot of the generated distribution
mainPanel(
tabsetPanel(
tabPanel("LIME Feature Plot", billboarderOutput("limeStudent")),
tabPanel("Cachable Plot", HTML('<center><p>TBD</p></center>')),
tabPanel("Data Drill Down", HTML('<center><p>TBD</p></center>'))
)
)
)
)

server <- function(input, output) {

obs_row <- reactive({
obs_num <- application_data %>% filter(ID == !!input$student)
obs_num$ID
})

output$risk <- renderText({
risk_pred <- application_data %>% filter(ID == !!input$student)
if (risk_pred$predict == 'No'){
amt <- 'Low Risk'
} else { amt <- 'Elevated Risk'}
glue('<h4 style="text-align:center; font-weight:bold; color:#7b8a8b;">Assessment: {amt}</h4>') %>% HTML
})

output$major <- renderText({
glue('<h4 style="font-weight:bold;">Major: </h4><p>{application_data[obs_row(),]$major}</p>') %>% HTML
})

output$minor <- renderText({
glue('<h4 style="font-weight:bold;">Minor: </h4><p>{application_data[obs_row(),]$minor}</p>') %>% HTML
})

output$limeStudent <- renderBillboarder({
#plot_features(pred[[obs_row()]])
prediction_data <- pred[[obs_row()]] %>% select(feature_desc, feature_weight, risk_predictor)
billboarder() %>%
bb_barchart(
data = prediction_data,
mapping = bbaes(x = feature_desc, y = feature_weight, group = risk_predictor),
rotated = TRUE,
stacked = TRUE
) %>%
bb_colors_manual('Low Risk' = "#417fe2", 'High Risk' = '#7f1c2e') %>%
bb_title(text = glue('Feature Contributions to Student Performance Risk'))
})

}

# Run the application
shinyApp(ui = ui, server = server)
13 changes: 13 additions & 0 deletions app1.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
49 changes: 49 additions & 0 deletions data-prep/fake-data-gen-steps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@


# Categorical Variables

majors <- c('shiny','rmarkdown','plumber','sparklyr','tidyverse')
minors <- c('leaflet', 'tidyr', 'rgl', 'htmlwidgets', 'Rcpp', 'keras', 'tibbletime',
'devtools', 'dplyr', 'lubridate', 'stringr', 'reticulate', 'ggplot2', 'carat',
'recipes', 'DT', 'httr', 'jsonlite', 'testthat', 'roxygen2', 'readxl', 'packrat',
'forcats', 'broom', 'purrr')

#high_lookup <- high_lookup[1:32,]
#low_lookup <- low_lookup[1:32,]

create_low_risk <- function(x){
eval(parse(text=low_lookup[x,2]))
}

low_vals <- lapply(1:nrow(low_lookup), create_low_risk)

list_low <- replicate(1200, lapply(1:nrow(low_lookup), create_low_risk), simplify=FALSE)
lf <- lapply(list_low, unlist)
low_frame <- as.data.frame(lf, stringsAsFactors = F)
low_frame <- as.data.frame(t(low_frame))
low_frame$risk <- "No"
names(low_frame) <- t(low_lookup[,1])

create_high_risk <- function(x){
eval(parse(text=high_lookup[x,2]))
}

high_vals <- lapply(1:nrow(high_lookup), create_high_risk)

list_high <- replicate(300, lapply(1:nrow(high_lookup), create_high_risk), simplify=FALSE)
hf <- lapply(list_high, unlist)
high_frame <- as.data.frame(hf, stringsAsFactors = F)
high_frame <- as.data.frame(t(high_frame))
high_frame$risk <- "Yes"
names(high_frame) <- t(high_lookup[,1])


rstudio_students <- rbind(low_frame, high_frame)
rownames(rstudio_students) <- NULL
rstudio_students$student_id <- c(1:1500)

write.csv(rstudio_students, "~/Downloads/rstudio-student-data.csv")
#colnames(rstudio_student_data)[colnames(rstudio_student_data)=="X34"] <- "risk"


skim(rstudio_students)
146 changes: 146 additions & 0 deletions data-prep/model-build-steps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
library(h2o)
library(tidyverse)
library(readxl)
library(lime)
library(recipes)

# Import Data: rstudio_students.csv
View(rstudio_student_data)

students_tbl <- rstudio_student_data %>%
mutate_if(is.character, as.factor) %>%
select(student_id, risk, major, minor, everything())

recipe_cleanup <- students_tbl %>%
recipe(formula = risk ~ .) %>%
step_rm(student_id) %>%
step_zv(all_predictors()) %>%
step_center(all_numeric()) %>%
step_scale(all_numeric()) %>%
prep(data = students_tbl)


student_bake <- bake(recipe_cleanup, newdata = students_tbl)

h2o.init()

rstudio_h2o <- as.h2o(student_bake)

h2o_split <- h2o.splitFrame(rstudio_h2o, ratios = c(0.7, 0.15), seed = 222)

train_h2o <- h2o.assign(h2o_split[[1]], "train" ) # 70%
valid_h2o <- h2o.assign(h2o_split[[2]], "valid" ) # 15%
test_h2o <- h2o.assign(h2o_split[[3]], "test" ) # 15%

y <- "risk"
x <- setdiff(names(train_h2o), c('risk'))

automl_models_h2o <- h2o.automl(
x = x,
y = y,
training_frame = train_h2o,
leaderboard_frame = valid_h2o,
max_runtime_secs = 60
)

automl_leader <- automl_models_h2o@leader

# Make Predictions on Test Data
risk_predictions <- h2o.predict(
object = automl_leader,
newdata = test_h2o)

risk_pred <- as.data.frame(risk_predictions)
risk_pred <- tibble::rowid_to_column(risk_pred, "ID")
test_data <- as.data.frame(test_h2o)
test_data <- tibble::rowid_to_column(test_data, "ID")

application_data <- left_join(risk_pred, test_data)

# LIME explainer

explainer <- lime::lime(
application_data[,-c(1:5)],
model = automl_leader,
bin_continuous = FALSE
)

obs <- 1 # This is the observation (employee position in test data set) to explain
set.seed(222)

explanation <- lime::explain(
x = application_data[obs,-c(1:5)],
explainer = explainer,
n_labels = 1,
n_features = 6,
n_permutations = 1000,
kernel_width = 1
)

plot_features(explanation)


# ------ #

create_pred_vis <- function(obs){
gen_lime_exp <- function(obs){
single_explanation <- as.data.frame(test_h2o) %>%
slice(obs) %>%
select(-risk) %>%
lime::explain(
explainer = explainer,
n_labels = 1,
n_features = 6,
n_permutations = 1000,
kernel_width = 1
) %>%
as.tibble()
return(single_explanation)
}

explanation <- gen_lime_exp(obs)

type_pal <- c('Supports', 'Contradicts')
explanation$type <- factor(ifelse(sign(explanation$feature_weight) ==
1, type_pal[1], type_pal[2]), levels = type_pal)
description <- paste0(explanation$case, "_", explanation$label)
desc_width <- max(nchar(description)) + 1
description <- paste0(format(description, width = desc_width),
explanation$feature_desc)
explanation$description <- factor(description, levels = description[order(abs(explanation$feature_weight))])
explanation$case <- factor(explanation$case, unique(explanation$case))

explanation_plot_df <- explanation %>%
mutate(risk_predictor = case_when(
(label == 'Yes' & type == 'Supports') | (label == 'No' & type == 'Contradicts') ~ 'High Risk',
(label == 'Yes' & type == 'Contradicts') | (label == 'No' & type == 'Supports') ~ 'Low Risk'
)) %>%
arrange(-abs(feature_weight)) %>%
head(20)

return(explanation_plot_df)
}

#create_pred_vis(1)

# Runs for about 45 minutes
# Produces a Large list
pred <- lapply(1:nrow(application_data), create_pred_vis)

saveRDS(pred, "lime_prediction_results.RDS")
saveRDS(application_data, "application_data.RDS")



#####
#####

billboarder() %>%
bb_barchart(
data = pred[[1]],
mapping = bbaes(x = feature_desc, y = feature_weight, group = risk_predictor),
rotated = TRUE,
stacked = TRUE
) %>%
bb_colors_manual('No' = "#95a5a6", 'Yes' = '#2C3E50') %>%
bb_title(text = glue('Feature Contributions to Student Dropout Risk'))
Binary file added data/application_data.RDS
Binary file not shown.
Binary file added data/lime_prediction_results.RDS
Binary file not shown.
Binary file added www/rstudio.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 9591444

Please sign in to comment.