In [1]:
library(tidyverse)
library(repr)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [6]:
dataset_url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data"
col_names <- c("id", "RI", "Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe", "glass_type")
glass_data_raw <- read.table(dataset_url, sep =",", header = FALSE, col.names = col_names) |>
    mutate(glass_type = as_factor(glass_type))
glass_data_raw

glass_type_names <- c("building_windows_float_processed", "building_windows_non_float_processed", "vehicle_windows_float_processed", "containers", "tableware", "headlamps")
glass_data_processed <- glass_data |> select(-RI)
levels(glass_data_processed$glass_type) <- glass_type_names
glass_data_processed

id,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,glass_type
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
1,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0,0.00,1
2,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0,0.00,1
3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0.00,1
4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0.00,1
5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0.00,1
6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,1
7,1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0,0.00,1
8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0.00,1
9,1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0,0.00,1
10,1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0,0.11,1


id,Na,Mg,Al,Si,K,Ca,Ba,Fe,glass_type
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
1,13.64,4.49,1.10,71.78,0.06,8.75,0,0.00,building_windows_float_processed
2,13.89,3.60,1.36,72.73,0.48,7.83,0,0.00,building_windows_float_processed
3,13.53,3.55,1.54,72.99,0.39,7.78,0,0.00,building_windows_float_processed
4,13.21,3.69,1.29,72.61,0.57,8.22,0,0.00,building_windows_float_processed
5,13.27,3.62,1.24,73.08,0.55,8.07,0,0.00,building_windows_float_processed
6,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,building_windows_float_processed
7,13.30,3.60,1.14,73.09,0.58,8.17,0,0.00,building_windows_float_processed
8,13.15,3.61,1.05,73.24,0.57,8.24,0,0.00,building_windows_float_processed
9,14.04,3.58,1.37,72.08,0.56,8.30,0,0.00,building_windows_float_processed
10,13.00,3.60,1.36,72.99,0.57,8.40,0,0.11,building_windows_float_processed


In [30]:
glass_split <- initial_split(glass_data_processed, prop = 0.75, strata = glass_type)
glass_training <- training(glass_split)
glass_testing <- testing(glass_split)

In [31]:
glass_summary <- glass_training |>
                        group_by(glass_type) |>
                        mutate(count = 1) |>
                        summarize(count = sum(count),
                                  avg_Na = mean(Na), avg_Mg = mean(Mg), 
                                  avg_Al = mean(Al), avg_Si = mean(Si), 
                                  avg_K = mean(K), avg_Ca = mean(Ca), 
                                  avg_Ba = mean(Ba), avg_Fe = mean(Fe)) 
glass_summary

glass_type,count,avg_Na,avg_Mg,avg_Al,avg_Si,avg_K,avg_Ca,avg_Ba,avg_Fe
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
building_windows_float_processed,51,13.25882,3.5721569,1.167843,72.65667,0.4458824,8.716667,0.01529412,0.06254902
building_windows_non_float_processed,55,13.13436,3.0234545,1.384909,72.656,0.5221818,9.038364,0.01218182,0.062
vehicle_windows_float_processed,13,13.44,3.5261538,1.233846,72.51538,0.4184615,8.692308,0.01153846,0.04615385
containers,11,12.82909,0.9145455,1.979091,72.39182,1.1381818,10.286364,0.22181818,0.07181818
tableware,8,14.6775,1.25125,1.345,73.03875,0.0,9.5775,0.0,0.0
headlamps,21,14.54381,0.5538095,2.167619,72.93619,0.2757143,8.315714,1.12714286,0.01
