further cleanup/formatting

rstudio · Dec 4, 2018 · a587c2a · a587c2a
1 parent c0b2ab0
commit a587c2a
Show file tree

Hide file tree

Showing 13 changed files with 337 additions and 265 deletions.
diff --git a/1_structured_data/1_heterogeneous_data_1.Rmd b/1_structured_data/1_heterogeneous_data_1.Rmd
@@ -187,12 +187,14 @@ Now go to `1_structureddata_quizzes.rmd` where you will be asked to use the Func
 
 # Quiz 1
 
-On the same prediction task, use the Keras Functional API such that:
+On the same prediction task, now use the Keras Functional API.
 
-- each categorical variable has its own input
-- while the continuous inputs share one single input
+Use all of the continuous variables and have them share a single input.
 
-We've copied some common code for you from above so you don't have to copy-paste the individual chunks.
+In addition, add a categorical variable of your choice (this will need to get its own input).
+In the quiz, you'll be asked to indicate the variable you chose and the accuracy you obtained.
+
+We've copied some common, reusable code for you from above so you don't have to copy-paste the individual chunks.
 
 ```{r}
 library(keras)

diff --git a/1_structured_data/1_heterogeneous_data_1_with_exercises.Rmd b/1_structured_data/1_heterogeneous_data_1_with_exercises.Rmd
@@ -187,12 +187,15 @@ Now go to `1_structureddata_quizzes.rmd` where you will be asked to use the Func
 
 # Quiz 1
 
-On the same prediction task, use the Keras Functional API such that:
+On the same prediction task, now use the Keras Functional API.
 
-- each categorical variable has its own input
-- while the continuous inputs share one single input
+Use all of the continuous variables and have them share a single input.
+
+In addition, add a categorical variable of your choice (this will need to get its own input).
+In the quiz, you'll be asked to indicate the variable you chose and the accuracy you obtained.
+
+We've copied some common, reusable code for you from above so you don't have to copy-paste the individual chunks.
 
-We've copied some common code for you from above so you don't have to copy-paste the individual chunks.
 
 ```{r}
 library(keras)
@@ -244,36 +247,14 @@ Now please continue from here.
 ```{r}
 input_continuous <- layer_input(shape = 6)
 input_workclass <- layer_input(shape = 8)
-input_education <- layer_input(shape = 17)
-input_marital_status <- layer_input(shape = 8)
-input_occupation <- layer_input(shape = 15)
-input_relationship <- layer_input(shape = 7)
-input_race <- layer_input(shape = 6)
-input_sex <- layer_input(shape = 3)
-input_native_country <- layer_input(shape = 42)
 
 dense_continuous <- input_continuous %>% layer_dense(units = 64)
 dense_workclass <- input_workclass %>% layer_dense(units = 64)
-dense_education <- input_education %>% layer_dense(units = 64)
-dense_marital_status <- input_marital_status %>% layer_dense(units = 64)
-dense_occupation <- input_occupation %>% layer_dense(units = 64)
-dense_relationship <- input_relationship %>% layer_dense(units = 64)
-dense_race <- input_race %>% layer_dense(units = 64)
-dense_sex <- input_sex %>% layer_dense(units = 64)
-dense_native_country <- input_native_country %>% layer_dense(units = 64)
 
 output <- layer_concatenate(
   list(
     dense_continuous,
-    dense_workclass,
-    dense_education,
-    dense_marital_status,
-    dense_occupation,
-    dense_relationship,
-    dense_race,
-    dense_sex,
-    dense_native_country
-  )
+    dense_workclass)
 ) %>%
   layer_dropout(rate = 0.5) %>%
   layer_dense(units = 64, activation = "relu") %>%
@@ -285,14 +266,7 @@ output <- layer_concatenate(
 model <- keras_model(
   inputs = list(
     input_continuous,
-    input_workclass,
-    input_education,
-    input_marital_status,
-    input_occupation,
-    input_relationship,
-    input_race,
-    input_sex,
-    input_native_country
+    input_workclass
   ),
   outputs = output
 )
@@ -302,14 +276,7 @@ model %>% compile(loss = "binary_crossentropy", optimizer = "adam", metrics = "a
 model %>% fit(
   x = list(
     x_train_continuous,
-    workclass,
-    education,
-    marital_status,
-    occupation,
-    relationship,
-    race,
-    sex,
-    native_country
+    workclass
   ),
   y = y_train,
   epochs = 20,
@@ -374,35 +341,14 @@ c(workclass, education, marital_status, occupation, relationship, race, sex, nat
 
 input_continuous <- layer_input(shape = 5)
 input_workclass <- layer_input(shape = 8)
-input_education <- layer_input(shape = 17)
-input_marital_status <- layer_input(shape = 8)
-input_occupation <- layer_input(shape = 15)
-input_relationship <- layer_input(shape = 7)
-input_race <- layer_input(shape = 6)
-input_sex <- layer_input(shape = 3)
-input_native_country <- layer_input(shape = 42)
 
 dense_continuous <- input_continuous %>% layer_dense(units = 64)
 dense_workclass <- input_workclass %>% layer_dense(units = 64)
-dense_education <- input_education %>% layer_dense(units = 64)
-dense_marital_status <- input_marital_status %>% layer_dense(units = 64)
-dense_occupation <- input_occupation %>% layer_dense(units = 64)
-dense_relationship <- input_relationship %>% layer_dense(units = 64)
-dense_race <- input_race %>% layer_dense(units = 64)
-dense_sex <- input_sex %>% layer_dense(units = 64)
-dense_native_country <- input_native_country %>% layer_dense(units = 64)
 
 common <- layer_concatenate(
   list(
     dense_continuous,
-    dense_workclass,
-    dense_education,
-    dense_marital_status,
-    dense_occupation,
-    dense_relationship,
-    dense_race,
-    dense_sex,
-    dense_native_country
+    dense_workclass
   )
 ) %>%
   layer_dropout(rate = 0.5) %>%
@@ -419,14 +365,7 @@ output_age <- common %>% layer_dense(units = 1, name = "output_age")
 model <- keras_model(
   inputs = list(
     input_continuous,
-    input_workclass,
-    input_education,
-    input_marital_status,
-    input_occupation,
-    input_relationship,
-    input_race,
-    input_sex,
-    input_native_country
+    input_workclass
   ),
   outputs = list(
     output_salary,
@@ -442,14 +381,7 @@ model %>% compile(
 model %>% fit(
   x = list(
     x_train_continuous,
-    workclass,
-    education,
-    marital_status,
-    occupation,
-    relationship,
-    race,
-    sex,
-    native_country
+    workclass
   ),
   y = list(y_train_salary, y_train_age),
   epochs = 20,

diff --git a/1_structured_data/structureddata_quizzes.Rmd b/1_structured_data/structureddata_quizzes.Rmd
@@ -12,25 +12,40 @@ knitr::opts_chunk$set(echo = FALSE)
 
 ## Using the Keras Functional API 
 
-The quiz starts from the `heterogeneous_data_1.Rmd` notebook.
+The quiz starts from the `1_heterogeneous_data_1.Rmd` notebook.
 
 __Note that we've copied common chunks (chunks you can re-use) to the end of the notebook so you don't have to start from scratch__.
 
 ### Quiz 1
 
-On the same prediction task, use the Keras Functional API such that:
+On the same prediction task, now use the Keras Functional API.
 
-- each categorical variable has its own input
-- the continuous inputs share one single input
+Use all of the continuous variables and have them share a single input.
 
-Train for 20 epochs just as we did before.
+In addition, add a categorical variable of your choice (this will need to get its own input).
+
+Note: All answers in this quiz will be indicated correct as we just want to see the actual accuracy you obtained.
 
 ```{r quiz1}
 quiz(
+  question("Which categorical variable did you choose?",
+    answer("workclass"),
+    answer("education"),
+    answer("marital_status"),
+    answer("occupation"),
+    answer("relationship"),
+    answer("sex"),
+    answer("race"),
+    answer("native_country")
+  ),
   question("In which region is the final accuracy on the validation split?",
-    answer("between 0.7 and 0.8"),
-    answer("between 0.8 and 0.9", correct = TRUE),
-    answer("between 0.9 and 0.95")
+    answer("lower than 0.8"),
+    answer("between 0.8 and 0.82"),
+    answer("between 0.82 and 0.83"),
+    answer("between 0.83 and 0.84"),
+    answer("between 0.84 and 0.85"),
+    answer("between 0.85 and 0.86"),
+    answer("higher than 0.86")
   )
 )
 ```
@@ -45,22 +60,22 @@ Train for 20 epochs just as we did before.
 
 ```{r quiz2}
 quiz(
-  question("What can you say regarding accuracy on salary prediction on the one hand, and mean squared error on age on the other hand?",
-    answer("accuracy on salary is about the same as before, MSE on age is around 50"),
-    answer("accuracy on salary is noticeably worse, MSE on age is around 50"),
-    answer("accuracy on salary is noticeably worse, MSE on age is around 100", correct = TRUE)
+  question("What can you say regarding accuracy on salary prediction on the one hand, and mean squared error on age on the other hand? We're referring to the validation set.",
+    answer("accuracy on salary is about the same as before, MSE on age is < 100"),
+    answer("accuracy on salary is noticeably worse, MSE on age is < 100"),
+    answer("accuracy on salary is noticeably worse, MSE on age is > 100", correct = TRUE)
   )
 )
 ```
 
 
 ## Embeddings for uncovering relationships 
 
-All quizzes in this section start from the `embeddings_so.Rmd` notebook.
+All quizzes in this section start from the `2_embeddings_so.Rmd` notebook.
 
 ### Quiz 1
 
-We just ran the embeddings model in `embeddings_so.Rmd`. 
+We just ran the embeddings model in `2_embeddings_so.Rmd`. 
 
 Find the cosine similarity between the embedded representation of `R` and
 
@@ -121,6 +136,17 @@ Some concrete ideas:
 
 Feel free to try whatever you like best / comes to mind!
 
+```{r quiz5}
+quiz(
+  question("Were you able to improve accuracy on the validation set? If so, in what area did you land?",
+    answer("No."),
+    answer("higher than 0.966"),
+    answer("higher than 0.97"),
+    answer("higher than 0.975")
+  )
+)
+```
+
 ## Embeddings on the Census Income dataset (optional)
 
 ### Quiz 1