In [None]:
library(tidyverse)
library(repr)
library(tidymodels)
source("cleanup.R")

In [None]:
set.seed(1)

url_pl <- "https://raw.githubusercontent.com/takemil8088/ind-porject/refs/heads/main/players.csv"
players <- read_csv(url_pl) |>
select(experience,subscribe,played_hours,gender,Age) |>
filter(!is.na(experience),!is.na(subscribe),!is.na(played_hours),!is.na(gender),!is.na(Age)) |>
mutate(subscribe = as_factor(subscribe))
players

In [None]:
split <- initial_split(players, prop = 0.75, strata = subscribe)
train <- training(split)
test <- testing(split)

In [None]:
vfold <- vfold_cv(train, v = 5, strata = subscribe)

recipe <- recipe(subscribe ~ played_hours + Age, data = train) |>
  step_scale(all_predictors()) |>
  step_center(all_predictors())

knn_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = 3) |>
  set_engine("kknn") |>
  set_mode("classification")

knn_fit <- workflow() |>
  add_recipe(recipe) |>
  add_model(knn_spec) |>
  fit_resamples(resamples = vfold)|>
                  collect_metrics()

knn_fit

p_vfold <- vfold_cv(train, v = 10, strata = subscribe)

vfold_metrics <- workflow() |>
                  add_recipe(recipe) |>
                  add_model(knn_spec) |>
                  fit_resamples(resamples = p_vfold) |>
                  collect_metrics()

vfold_metrics

In [None]:
knn_spe <- nearest_neighbor(weight_func = "rectangular",
                             neighbors = tune()) |>
  set_engine("kknn") |>
  set_mode("classification")

k_vals <- tibble(neighbors = seq(from = 1, to = 100, by = 5))

knn_results <- workflow() |>
  add_recipe(recipe) |>
  add_model(knn_spe) |>
  tune_grid(resamples = p_vfold, grid = k_vals) |>
  collect_metrics()

accuracies <- knn_results |>
  filter(.metric == "accuracy")

accuracies

In [None]:
best_k <- accuracies |>
        arrange(desc(mean)) |>
        head(1) |>
        pull(neighbors)
best_k

In [None]:
precipe <- recipe(subscribe ~ played_hours + Age, data = train) |>
  step_scale(all_predictors()) |>
  step_center(all_predictors())

knn_sp <- nearest_neighbor(weight_func = "rectangular", neighbors = best_k) |>
  set_engine("kknn") |>
  set_mode("classification")

knn_fitp <- workflow() |>
  add_recipe(precipe) |>
  add_model(knn_sp) |>
  fit(data = train)

knn_fitp

In [None]:
p_predictions <- predict(knn_fitp, test) |>
  bind_cols(test)

p_predictions |>
  metrics(truth = subscribe, estimate = .pred_class) |>
  filter(.metric == "accuracy")

p_predictions |> pull(subscribe) |> levels()

In [None]:
p_predictions |>
    precision(truth = subscribe, estimate = .pred_class, event_level = "second")

In [None]:
p_predictions |>
    recall(truth = subscribe, estimate = .pred_class, event_level="second")