In [1]:
#load libraries
library(tidyverse)
library(repr)
library(tidymodels)
options(repr.matrix.max.rows = 10)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.0.0 ──

[32m✔[39m [34mbroom       [39m 1.0.0     [32m✔[39m [34mrsample     [39m 1.0.0
[32m✔[39m [34mdials       [39m 1.0.0     [32m✔[39m [34mtune        [39m 1.0.0
[32m✔[39m [34minfer       [39m 1.0.2     [32m✔[39m [34mworkflows   [39m 1.0.0
[32m✔

In [41]:
#Add columns
unscaled_data <- read_csv("pulsar_data.csv", 
                        col_names = c("mean_integrated_profile", 
                                      "stand_dev_integrated_profile", 
                                      "exc_kurtosis_integrated_profile", 
                                      "skew_integrated_profile",
                                      "mean_dmsnr", 
                                      "stand_dev_dmsnr", 
                                      "exc_kurtosis_dmsnr", 
                                      "skew_dmsnr", "class")) 

unscaled_data <- unscaled_data |>
    mutate(class = as_factor(class))
#pulsar_data 

#Class proportions in pulsar dataset (Imbalanced)
nrow(unscaled_data)
pulsar_proportions <- unscaled_data |>
    group_by(class) |>
    summarize(n = n()) |>
    mutate(percent = 100*n/nrow(pulsar_data))
pulsar_proportions

pulsar_train <- unscaled_data |>
    select(class, skew_dmsnr, skew_integrated_profile) 
#pulsar_train

pulsar_split <- initial_split(unscaled_data, prop = 0.75, strata = class)
pulsar_train <- training(pulsar_split)
pulsar_test <- testing(pulsar_split) 

pulsar_recipe <- recipe(class ~ ., data = pulsar_train) |>
    step_scale(all_predictors()) |>
    step_center(all_predictors()) |>
    prep()
#pulsar_recipe

pulsar_data <- bake(pulsar_recipe, pulsar_train)
#scaled_pulsar

knn_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = 5) |>
  set_engine("kknn") |>
  set_mode("classification")
#knn_spec

knn_fit <- workflow() |>
  add_recipe(pulsar_recipe) |>
  add_model(knn_spec) |>
  fit(data = pulsar_train)
#knn_fit

pulsar_test_predictions <- predict(knn_fit, pulsar_test) |>
    bind_cols(pulsar_test)
#pulsar_test_predictions

pulsar_test_predictions |>
  metrics(truth = class, estimate = .pred_class) |>
  filter(.metric == "accuracy")
pulsar_test_predictions

[1mRows: [22m[34m17898[39m [1mColumns: [22m[34m9[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[32mdbl[39m (9): mean_integrated_profile, stand_dev_integrated_profile, exc_kurtosis...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


class,n,percent
<fct>,<int>,<dbl>
0,16259,121.12791
1,1639,12.21039


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
accuracy,binary,0.9765363


.pred_class,mean_integrated_profile,stand_dev_integrated_profile,exc_kurtosis_integrated_profile,skew_integrated_profile,mean_dmsnr,stand_dev_dmsnr,exc_kurtosis_dmsnr,skew_dmsnr,class
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
0,140.5625,55.68378,-0.23457141,-0.699648398,3.1998328,19.110426,7.975532,74.2422249,0
0,103.0156,39.34165,0.32332837,1.051164429,3.1212375,21.744669,7.735822,63.1719091,0
0,138.1797,51.52448,-0.03185233,0.046797173,6.3302676,31.576347,5.155940,26.1433102,0
0,109.6406,49.01765,0.13763583,-0.256699775,1.5083612,12.072901,13.367926,223.4384192,0
0,136.0938,51.69100,-0.04590893,-0.271816393,9.3428094,38.096400,4.345438,18.6736485,0
0,105.4453,41.13997,0.14265380,0.320419676,3.5518395,20.755017,7.739552,68.5197706,0
0,117.3672,53.90861,0.25795344,-0.405049077,6.0183946,24.766123,4.807783,25.5226156,0
0,106.6484,56.36718,0.37835507,-0.266371607,2.4364549,18.405371,9.378660,96.8602254,0
0,112.7188,50.30127,0.27939095,-0.129010712,8.2817726,37.810012,4.691827,21.2762098,0
0,142.0547,53.87316,-0.47077269,-0.125946417,4.4230769,27.083513,6.681658,45.9440301,0
