In [12]:
### Run this cell before continuing. 
library(tidyverse)
library(repr)
library(tidymodels)
options(repr.matrix.max.rows = 6)

In [13]:
redwine <- read_csv2("winequality-red.csv")
redwine

Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.

Parsed with column specification:
cols(
  `fixed acidity` = [32mcol_number()[39m,
  `volatile acidity` = [31mcol_character()[39m,
  `citric acid` = [31mcol_character()[39m,
  `residual sugar` = [32mcol_number()[39m,
  chlorides = [31mcol_character()[39m,
  `free sulfur dioxide` = [32mcol_number()[39m,
  `total sulfur dioxide` = [32mcol_double()[39m,
  density = [31mcol_character()[39m,
  pH = [32mcol_number()[39m,
  sulphates = [31mcol_character()[39m,
  alcohol = [32mcol_number()[39m,
  quality = [32mcol_double()[39m
)

“2 parsing failures.
 row                  col               expected actual                  file
1296 total sulfur dioxide no trailing characters     .5 'winequality-red.csv'
1297 total sulfur dioxide no trailing characters     .5 'winequality-red.csv'
”


fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
<dbl>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<dbl>
74,0.7,0,19,0.076,11,34,0.9978,351,0.56,94,5
78,0.88,0,26,0.098,25,67,0.9968,32,0.68,98,5
78,0.76,0.04,23,0.092,15,54,0.997,326,0.65,98,5
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
63,0.51,0.13,23,0.076,29,40,0.99574,342,0.75,11,6
59,0.645,0.12,2,0.075,32,44,0.99547,357,0.71,102,5
6,0.31,0.47,36,0.067,18,42,0.99549,339,0.66,11,6


In [14]:
redwine_good <- redwine %>%
filter(quality > 5) %>%
mutate(quality =  "good")

redwine_bad <- redwine %>%
filter (quality <= 5) %>%
mutate(quality = "bad") 

redwine <- rbind (redwine_good, redwine_bad)

redwine <- redwine %>%
rename(volatile_acidity = "volatile acidity",
       fixed_acidity = "fixed acidity",
       citric_acid = "citric acid",
       residual_sugar = "residual sugar",
       free_sulfur_dioxide = "free sulfur dioxide",
       total_sulfur_dioxide = "total sulfur dioxide")

redwine <- redwine %>%
mutate(volatile_acidity = as.double(volatile_acidity),
      citric_acid = as.double(citric_acid),
      chlorides = as.double(chlorides),
      density = as.double(density),
      sulphates = as.double(sulphates),
      quality = as.factor(quality))

redwine

fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
112,0.28,0.56,19,0.075,17,60,0.9980,316,0.58,98,good
73,0.65,0.00,12,0.065,15,21,0.9946,339,0.47,10,good
78,0.58,0.02,2,0.073,9,18,0.9968,336,0.57,95,good
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
66,0.725,0.20,78,0.073,29,79,0.99770,329,0.54,92,bad
62,0.600,0.08,2,0.090,32,44,0.99490,345,0.58,105,bad
59,0.645,0.12,2,0.075,32,44,0.99547,357,0.71,102,bad


In [20]:
redwine_recipe <- recipe(quality ~., data = redwine) %>%
step_scale(all_predictors()) %>%
step_center(all_predictors())%>%
prep()%>%
bake(redwine)

redwine_recipe

fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
1.29548030,-1.3840105,1.483689,-0.1826498,-0.2648775,0.02323997,0.4125250,0.66406945,0.1971277,-0.4610361,-0.04317968,good
-0.06965678,0.6823394,-1.391037,-0.3482822,-0.4773483,-0.08143810,-0.7729729,-1.13741353,0.4357874,-1.1099770,-0.04317968,good
0.10536080,0.2914083,-1.288368,-0.5848997,-0.3073717,-0.39547234,-0.8641650,0.02825193,0.4046578,-0.5200308,-0.04317968,good
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
-0.3146814,1.1011941,-0.3643491,1.2133940,-0.30737169,0.6513084,0.99007528,0.5051151,0.3320223,-0.6970146,-0.04317968,bad
-0.4546954,0.4031029,-0.9803619,-0.5848997,0.05382858,0.8083256,-0.07383309,-0.9784591,0.4980464,-0.4610361,-0.04317968,bad
-0.5597060,0.6544157,-0.7750243,-0.5848997,-0.26487754,0.8083256,-0.07383309,-0.6764458,0.6225645,0.3058939,-0.04317968,bad


In [22]:
set.seed(2054)

#training dataset (60% of the total dataset is used as the training set)

fruit_split <- initial_split(redwine_recipe, prop = 0.6, strata = quality) 
fruit_train <- training(fruit_split)
fruit_test <- testing(fruit_split)

fruit_train

fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
-0.06965678,0.6823394,-1.391037,-0.3482822,-0.47734829,-0.0814381,-0.7729729,-1.13741353,0.4357874,-1.1099770,-0.04317968,good
0.10536080,0.2914083,-1.288368,-0.5848997,-0.30737169,-0.3954723,-0.8641650,0.02825193,0.4046578,-0.5200308,-0.04317968,good
0.35038540,-1.3840105,1.483689,-0.2063116,0.09632273,0.9653427,1.7196124,0.08123672,-2.7394240,0.5418724,-0.04317968,good
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
-0.3146814,1.1011941,-0.3643491,1.2133940,-0.30737169,0.6513084,0.99007528,0.5051151,0.3320223,-0.6970146,-0.04317968,bad
-0.4546954,0.4031029,-0.9803619,-0.5848997,0.05382858,0.8083256,-0.07383309,-0.9784591,0.4980464,-0.4610361,-0.04317968,bad
-0.5597060,0.6544157,-0.7750243,-0.5848997,-0.26487754,0.8083256,-0.07383309,-0.6764458,0.6225645,0.3058939,-0.04317968,bad
