In [None]:
### Run this cell before continuing.
library(tidyverse)
#library(repr)
#library(infer)
#library(cowplot)
#options(repr.matrix.max.rows = 6)
source("cleanup.R")

In [None]:
players <- read_csv("players.csv")
sessions <- read_csv("sessions.csv")
tail(players)
tail(sessions)

Clean data and make session data useable, combine data

In [None]:
clean_sessions <- sessions |>
    mutate(start_dt = dmy_hm(start_time),
           end_dt = dmy_hm(end_time),
           session_time_in_hrs = as.numeric(end_dt - start_dt) / 60) |>
    group_by(hashedEmail) |>
    summarise(total_session_time_hrs = round(sum(session_time_in_hrs), 2), 
              average_session_time_hrs = round(mean(session_time_in_hrs), 2),
              num_of_sessions = n()) |>
    filter(!is.na(total_session_time_hrs) | !is.na(average_session_time_hrs)) |>
    ungroup() |>
    select(hashedEmail, total_session_time_hrs, average_session_time_hrs, num_of_sessions)

clean_players <- merge(players, clean_sessions, by = "hashedEmail") |>
    select(experience, subscribe, Age, total_session_time_hrs, average_session_time_hrs, num_of_sessions)

clean_players

Plot stuff

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)

experience_vs_sub <- clean_players |>
    ggplot(aes(x = experience, fill = subscribe)) + geom_bar(position = "fill") +
    labs(x = "Experience Level of Players (Beginner, Amateur, Regular, Veteran, Pro)",
         y = "Subscribed Percentage (0.00 - 1.00)",
         fill = "Subscribed/Not Subscribed") +
    ggtitle("Experience vs Proportion of Subscribers") + 
    scale_fill_brewer(palette = "Set1") + 
    theme_minimal()

session_freq_vs_sub <- clean_players |> 
    ggplot(aes(x = num_of_sessions, fill = subscribe)) +
    geom_histogram(position = "fill", bins = 15) +
    labs(x = "Number of Sessions",
         y = "Proportion of Players",
         fill = "Subscribed Status") +
    ggtitle("Number of Sessions vs Proportion of Subscribers") +
    scale_fill_brewer(palette = "Set1") + 
    theme_minimal()

experience_vs_sub
session_freq_vs_sub