In [16]:
library(tidyverse)
library(repr)
library(tidymodels)
options(repr.matrix.max.rows = 6)

In [None]:
knn_spec <- nearest_neighbor(weight_func = 'rectangular', neighbors = 3) %>%
    set_engine('kknn') %>%
    set_mode('classification')

player_data_recipe <- recipe(goat_rank ~ current_rank + best_rank + turned_pro + elo_rank + best_elo_rank + peak_elo_rank + titles, data = tidy_player_stats) %>%
                        step_scale(all_predictors()) %>%
                        step_center(all_predictors())

player_data_fit <- workflow() %>%
        add_model(knn_spec) %>%
        add_recipe(player_data_recipe) %>%
        fit(data = tidy_player_stats)
player_data_fit

new_player <- tibble(current_rank = 5, best_rank = 10, turned_pro = 2004, elo_rank = 50, best_elo_rank = 30, peak_elo_rank = 2000, titles = 1)
    fruit_predicted <- predict(player_data_fit, new_player)
    fruit_predicted

In [17]:
player_stats <- read_csv("data/player_stats.csv")

colnames(player_stats) <- c(X1 = "index",
                       Age = "age",
                       Country = "country",
                       Plays = "plays",
                       Wikipedia = "wiki",
                       "Current Rank" = "current_rank",
                       "Best Rank" = "best_rank",
                       Name = "name",
                       Backhand = "backhand",
                       "Prize Money" = "prize_money", 
                       Height = "height",
                       "Favorite Surface" = "fav_surface",
                       "Turned Pro" = "turned_pro",
                       Seasons = "seasons",
                       Active = "active",
                       "Current Elo Rank" = "elo_rank",
                       "Best Elo Rank" = "best_elo_rank",
                       "Peak Elo Rank" = "peak_elo_rank",
                       "Last Appearance" = "last_app",
                       Titles = "titles",
                       "GOAT Rank" = "goat_rank",
                       "Best Season" = "best_season",
                       Retired = "retired",
                       Masters = "masters",
                       Birthplace = "birthplace",
                       Residence = "res",
                       Weight = "weight",
                       Coach = "coach",
                       Facebook = "facebook",
                       Twitter = "twitter",
                       Nicknames = "nicknames",
                       "Grand Slams" = "grand_slams",
                       "Davis Cups" = "davis_cups",
                       "Web Site" = "website",
                       "Team Cups" = "team_cups",
                       Olympics = "olympics",
                       "Weeks at No. 1" = "weeks_at_top",
                       "Tour Finals" = "tour_finals")

player_stats
colnames(player_stats)

“Missing column names filled in: 'X1' [1]”
Parsed with column specification:
cols(
  .default = col_character(),
  X1 = [32mcol_double()[39m,
  `Turned Pro` = [32mcol_double()[39m,
  Seasons = [32mcol_double()[39m,
  Titles = [32mcol_double()[39m,
  `Best Season` = [32mcol_double()[39m,
  Retired = [32mcol_double()[39m,
  Masters = [32mcol_double()[39m,
  `Grand Slams` = [32mcol_double()[39m,
  `Davis Cups` = [32mcol_double()[39m,
  `Team Cups` = [32mcol_double()[39m,
  Olympics = [32mcol_double()[39m,
  `Weeks at No. 1` = [32mcol_double()[39m,
  `Tour Finals` = [32mcol_double()[39m
)

See spec(...) for full column specifications.



index,age,country,plays,wiki,current_rank,best_rank,name,backhand,prize_money,⋯,facebook,twitter,nicknames,grand_slams,davis_cups,website,team_cups,olympics,weeks_at_top,tour_finals
<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
0,26 (25-04-1993),Brazil,Right-handed,Wikipedia,378 (97),363 (04-11-2019),Oscar Jose Gutierrez,,,⋯,,,,,,,,,,
1,18 (22-12-2001),United Kingdom,Left-handed,Wikipedia,326 (119),316 (14-10-2019),Jack Draper,Two-handed,"$59,040",⋯,,,,,,,,,,
2,32 (03-11-1987),Slovakia,Right-handed,Wikipedia,178 (280),44 (14-01-2013),Lukas Lacko,Two-handed,"US$3,261,567",⋯,,,,,,,,,,
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋱,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
497,23 (14-03-1996),Netherlands,Left-handed,Wikipedia,495 (60),342 (05-08-2019),Gijs Brouwer,,,⋯,,,,,,,,,,
498,24 (17-05-1995),Ukraine,,Wikipedia,419 (81),419 (20-01-2020),Vladyslav Orlov,,,⋯,,,,,,,,,,
499,22 (26-03-1997),Tunisia,Left-handed,Wikipedia,451 (69),408 (24-12-2018),Aziz Dougaz,Two-handed,"$61,984",⋯,,,,,,,,,,


In [23]:
tidy_player_stats <- player_stats %>%
    filter(active == "Yes") %>%
    filter(!is.na(goat_rank)) %>%
    select(current_rank, best_rank, turned_pro, seasons, elo_rank, best_elo_rank, peak_elo_rank, goat_rank, titles) %>%
    arrange(best_rank)
tidy_player_stats

current_rank,best_rank,turned_pro,seasons,elo_rank,best_elo_rank,peak_elo_rank,goat_rank,titles
<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>
3 (6590),1 (02-02-2004),1998,22,3 (2328),1 (11-08-2003),2550 (05-03-2007),1 (929),103
2 (9720),1 (04-07-2011),2003,17,2 (2390),1 (21-03-2011),2629 (01-02-2016),2 (819),77
128 (422),1 (07-11-2016),2005,15,7 (2185),2 (19-04-2009),2500 (21-11-2016),12 (317),46
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
19 (1860),8 (16-07-2018),2007,14,28 (2007),9 (26-08-2012),2148 (26-08-2012),71 (47),15
9 (2630),9 (04-11-2019),2005,11,16 (2080),9 (15-07-2019),2135 (17-10-2016),151 (18),9
12 (2310),9 (15-07-2019),2004,16,29 (2004),14 (15-04-2014),2112 (20-04-2014),151 (18),9


In [35]:
tidy_player_stats_mutated <- tidy_player_stats %>%
    mutate(current_rank = current_rank)
    #mutate(current_rank = as.numeric(current_rank)) %>%
    #mutate(best_rank = as.numeric(best_rank)) %>%
    #mutate(elo_rank = as.numeric(elo_rank)) %>%
    #mutate(best_elo_rank = as.numeric(best_elo_rank)) %>%
    #mutate(peak_current_rank = as.numeric(current_rank))
tidy_player_stats_mutated

current_rank,best_rank,turned_pro,seasons,elo_rank,best_elo_rank,peak_elo_rank,goat_rank,titles
<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>
3 (6590),1 (02-02-2004),1998,22,3 (2328),1 (11-08-2003),2550 (05-03-2007),1 (929),103
2 (9720),1 (04-07-2011),2003,17,2 (2390),1 (21-03-2011),2629 (01-02-2016),2 (819),77
128 (422),1 (07-11-2016),2005,15,7 (2185),2 (19-04-2009),2500 (21-11-2016),12 (317),46
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
19 (1860),8 (16-07-2018),2007,14,28 (2007),9 (26-08-2012),2148 (26-08-2012),71 (47),15
9 (2630),9 (04-11-2019),2005,11,16 (2080),9 (15-07-2019),2135 (17-10-2016),151 (18),9
12 (2310),9 (15-07-2019),2004,16,29 (2004),14 (15-04-2014),2112 (20-04-2014),151 (18),9
