In [3]:
library(tidyverse)
library(tidymodels)
# for data reproducibility
set.seed(3)

# reading & wrangling
heart_disease <- read_csv("data/processed.cleveland.data",
                          col_names = c("age", "sex", "chest_pain_type", "resting_blood_sugar", "cholestorol", "fasting_blood_sugar", "electrocardio_results", "max_heart_rate", "exercise_induced_angina", "ST_depression", "ST_peak_slope", "major_vessels", "thal", "diagnosis"))

heart_disease[c("diagnosis")][which(heart_disease$diagnosis > 1), ] <- 1

heart_disease <- heart_disease %>%
    mutate(major_vessels = as.numeric(major_vessels)) %>%
    mutate(thal = as.numeric(thal)) 

heart_disease <- heart_disease %>%
    mutate(diagnosis = as.factor(ifelse(diagnosis == 1, "Presence", "Absence")))

heart_disease <- na.omit(heart_disease)

# splitting
heart_disease_split <- initial_split(heart_disease, prop = 0.8, strata = diagnosis)
heart_disease_train <- training(heart_disease_split)
heart_disease_test <- testing(heart_disease_split)

# modelling
knn_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = 10) %>%
    set_engine("kknn") %>%
    set_mode("classification")
# knn_spec

knn_recipe <- recipe(diagnosis ~ age, sex, chest_pain_type, resting_blood_sugar, cholestorol, max_heart_rate, thal, data = heart_disease_train) %>%
    step_scale(all_predictors()) %>%
    step_center(all_predictors())
# knn_recipe

knn_workflow <- workflow() %>%
    add_recipe(knn_recipe) %>%
    add_model(knn_spec) 
# knn_workflow

knn_fit <- knn_workflow %>%
    fit(data = heart_disease_train)
# knn_fit

new_patient = tibble(age = 63, sex = 0, chest_pain_type = 3, resting_blood_sugar = 200, cholestorol = 300, fasting_blood_sugar = 0, electrocardio_results = 2, max_heart_rate = 180, exercise_induced_angina = 0, ST_depression = 0.9, ST_peak_slope = 3, major_vessels = 2, thal = 6)

prediction <- predict(knn_fit, new_patient)

prediction

Parsed with column specification:
cols(
  age = [32mcol_double()[39m,
  sex = [32mcol_double()[39m,
  chest_pain_type = [32mcol_double()[39m,
  resting_blood_sugar = [32mcol_double()[39m,
  cholestorol = [32mcol_double()[39m,
  fasting_blood_sugar = [32mcol_double()[39m,
  electrocardio_results = [32mcol_double()[39m,
  max_heart_rate = [32mcol_double()[39m,
  exercise_induced_angina = [32mcol_double()[39m,
  ST_depression = [32mcol_double()[39m,
  ST_peak_slope = [32mcol_double()[39m,
  major_vessels = [31mcol_character()[39m,
  thal = [31mcol_character()[39m,
  diagnosis = [32mcol_double()[39m
)

“Problem with `mutate()` input `major_vessels`.
[34mℹ[39m NAs introduced by coercion
[34mℹ[39m Input `major_vessels` is `as.numeric(major_vessels)`.”
“NAs introduced by coercion”
“Problem with `mutate()` input `thal`.
[34mℹ[39m NAs introduced by coercion
[34mℹ[39m Input `thal` is `as.numeric(thal)`.”
“NAs introduced by coercion”


.pred_class
<fct>
Presence
