# Vinho Verde: Wine Quality Analysis

In [3]:
library(tidyverse)
library(digest)
library(repr)
library(tidymodels)
options(repr.matrix.max.rows = 6)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.0.0 ──

[32m✔[39m [34mbroom       [39m 1.0.0     [32m✔[39m [34mrsample     [39m 1.0.0
[32m✔[39m [34mdials       [39m 1.0.0     [32m✔[39m [34mtune        [39m 1.0.0
[32m✔[39m [34minfer       [39m 1.0.2     [32m✔[39m [34mworkflows   [39m 1.0.0
[32m✔

# Introduction

In [4]:
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_red <- read_csv2(url)

[36mℹ[39m Using [34m[34m"','"[34m[39m as decimal and [34m[34m"'.'"[34m[39m as grouping mark. Use `read_delim()` for more control.

“One or more parsing issues, see `problems()` for details”
[1mRows: [22m[34m1599[39m [1mColumns: [22m[34m12[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ";"
[31mchr[39m (5): volatile acidity, citric acid, chlorides, density, sulphates
[32mdbl[39m (2): total sulfur dioxide, quality

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [5]:
colnames(wine_red) <- make.names(colnames(wine_red))
wine_red <- wine_red |>
            mutate(volatile.acidity = as.double(volatile.acidity),
                  citric.acid = as.double(citric.acid),
                  chlorides = as.double(chlorides),
                  alcohol = as.double(alcohol),
                  sulphates = as.double(sulphates)) |>
            select(-fixed.acidity, -free.sulfur.dioxide, -total.sulfur.dioxide, -density)

wine_red

volatile.acidity,citric.acid,residual.sugar,chlorides,pH,sulphates,alcohol,quality
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0.70,0.00,19,0.076,351,0.56,94,5
0.88,0.00,26,0.098,32,0.68,98,5
0.76,0.04,23,0.092,326,0.65,98,5
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
0.510,0.13,23,0.076,342,0.75,11,6
0.645,0.12,2,0.075,357,0.71,102,5
0.310,0.47,36,0.067,339,0.66,11,6


In [6]:
wine_split <- initial_split(wine_red, prop = 0.75, strata = quality)
wine_train <- training(wine_split)
wine_test <- testing(wine_split)

wine_train

volatile.acidity,citric.acid,residual.sugar,chlorides,pH,sulphates,alcohol,quality
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0.70,0.00,19,0.076,351,0.56,94,5
0.88,0.00,26,0.098,32,0.68,98,5
0.76,0.04,23,0.092,326,0.65,98,5
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
0.37,0.43,23,0.063,317,0.81,112,7
0.36,0.30,18,0.074,324,0.70,114,8
0.56,0.17,17,0.065,344,0.68,1055,7


In [10]:
means <- map_df(wine_train, mean)
means

volatile.acidity,citric.acid,residual.sugar,chlorides,pH,sulphates,alcohol,quality
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0.5261603,0.2737229,25.93155,0.08898164,296.8589,0.6583639,1766277000000.0,5.631886
