**Part 1: Wrangling our datasets**
Taking a look into both dataset given and wrangling them so they are tidy.  
Load our R packages

In [None]:
library(tidyverse)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(forcats)
library(tidymodels)

**Reading our datasets**

In [None]:
players <- read_csv("data/players.csv")
players
sessions <- read_csv("data/sessions.csv")
sessions

Focusing on the first player dataset, I want to select the variables that aligns to my question and goal.

In [None]:
players_select <- players|>
    select(experience, hashedEmail, played_hours, Age) |>
    filter(Age != "NA")|>
    filter(played_hours != 0.0)|>
    mutate(experience = as.factor(experience))
players_select

In [None]:
avg_playtime<- players_select|>
group_by(experience)|>
summarize(avg_play = median(played_hours))
avg_playtime
experience_count<- players_select|>
group_by(experience)|>
summarize(count = n())
experience_count

In [None]:
experience_bar<- avg_playtime |>
ggplot(aes(y = avg_play, x = fct_reorder(experience, avg_play), fill = experience))+
geom_bar(stat = "identity")+
labs(x = "Minecraft Experience", y = "Average Playtime (in hours)", fill = "Minecraft Experience") +
scale_fill_brewer(palette = "BrBG") +
ggtitle("Average Playtime for Different Players")
experience_bar

In [None]:
age_experience_plot<- players_select|>
ggplot(aes(x= Age, y = played_hours, colour = experience))+
    geom_point(alpha = 0.5)+
    labs(x= "Age (years)", y = "Hours Played", colour = "Minecraft Experience")+
    ggtitle("Hours Played vs Age")
age_experience_plot

In [None]:
set.seed(13)
players_split<- initial_split(players_select, prop = 0.75, strata = played_hours)
players_training<- training(players_split)
players_testing<- testing(players_split)

In [None]:
players_knn <- nearest_neighbor(weight_func = "rectangular", neighbors = tune()) |> 
      set_engine("kknn") |>
      set_mode("regression") 

players_recipe <- recipe(played_hours ~ experience, Age, data = players_training) |>
      step_scale(all_predictors()) |>
      step_center(all_predictors())