In [None]:
#load library
library(tidyverse)
library(repr)
library(tidymodels)

#set seed
set.seed(144)

In [None]:
pulsar <- read_csv('https://raw.githubusercontent.com/susanyliu/dsci100-project144/main/data/HTRU_2.csv', col_names = FALSE)

In [None]:
colnames(pulsar) <- c('Profile_mean', 'Profile_stdev', 'Profile_skewness', 'Profile_kurtosis', 
                      'DM_mean', 'DM_stdev', 'DM_skewness', 'DM_kurtosis', 'class')

In [None]:
pulsar <- pulsar |>
    mutate(class = as_factor(class))
head(pulsar)

In [None]:
pulsar_split <- initial_split(pulsar, prop = 0.75, strata = class)
pulsar_train <- training(pulsar_split)
pulsar_test <- testing(pulsar_split)

In [None]:
set.seed(1)
   pulsar_vfold <- vfold_cv(pulsar_train, v = 10, strata = class)
   pulsar_recipe <- recipe(class~ Profile_mean, Profile_stdev, Profile_skewness, Profile_kurtosis, DM_mean, data = pulsar_train) |>
     step_scale(all_predictors()) |>
     step_center(all_predictors())

   knn_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = tune()) |>
     set_engine("kknn") |>
     set_mode("classification")

   knn_results <- workflow() |>
     add_recipe(pulsar_recipe) |>
     add_model(knn_spec) |>
     tune_grid(resamples = pulsar_vfold, grid = 10) |>
     collect_metrics()

   accuracies <- knn_results |>
     filter(.metric == "accuracy")

In [None]:
cross_val_plot <- accuracies |>
       ggplot(aes(x = neighbors, y = mean)) +
       geom_point() +
       geom_line() +
       labs(x = "Neighbors", y = "Accuracy Estimate") +
       theme(text = element_text(size = 20))
cross_val_plot