In [None]:
library(tidyverse)
library(lubridate)
library(tidymodels)
library(ggplot2)
library(readxl)

In [None]:
players <- read_excel("players.xlsx")
sessions <- read_csv("sessions.csv")

In [None]:
sessions_clean <- sessions |>
mutate(start_datetime = dmy_hm(start_time), end_datetime = dmy_hm(end_time), 
       hour_of_day = hour(start_datetime), 
       day_of_week = wday(start_datetime, label = TRUE, week_start = 1),
       date = date(start_datetime)) |>
filter(!is.na(start_datetime), !is.na(end_datetime))
sessions_clean
#data cleaning and separation

In [None]:
hourly_activity <- sessions_clean |>
count(hour_of_day) |>
arrange(desc(n))
print(hourly_activity) #analyze hourly activity

In [None]:
daily_activity <- sessions_clean |>
count(day_of_week) |>
arrange(desc(n))
print(daily_activity)

In [None]:
time_points <- seq(from = floor_date(min(sessions_clean$start_datetime), "hour"),
                to = ceiling_date(max(sessions_clean$end_datetime), "hour"), by = "2 hours")

In [None]:
simultaneous_analysis <- tibble(datetime = time_points) |>
mutate(player_count = map_dbl(datetime, ~ sum(sessions_clean$start_datetime <= .x & sessions_clean$end_datetime >= .x)),
       hour = hour(datetime), day = wday(datetime, label = TRUE, week_start = 1))
print(simultaneous_analysis)

In [None]:
peak_simultaneous_hours <- simultaneous_analysis |>
group_by(hour) |>
summarize(avg_players = mean(player_count), max_players = max(player_count),
          peak_probability = mean(player_count > quantile(player_count, 0.75))) |>
arrange(desc(avg_players))
print(peak_simultaneous_hours)

In [None]:
hour_day_activity <- sessions_clean |>
count(day_of_week, hour_of_day) |>
arrange(desc(n))
print(hour_day_activity)

In [None]:
hourly_plot <- hourly_activity |>
ggplot(aes(x = factor(hour_of_day), y = n)) +
geom_col(fill = "blue", alpha = 0.7) +
labs(title = "Gaming sessions by hour of day", x = "Hour of day in 24 hour format",
     y = "No. of sessions") +
theme(element_text(size = 20)) 

hourly_plot

In [None]:
daily_plot <- daily_activity |>
ggplot(aes(x = factor(day_of_week), y = n)) +
geom_col(fill = "red", alpha = 0.7) +
labs(title = "Gaming sessions by Day of Week", x = "Day of Week",
     y = "No. of sessions") +
theme(element_text(size = 20))

daily_plot

In [None]:
hour_day_heatmap <- sessions_clean |>
count(day_of_week, hour_of_day)

heatmap_plot <- hour_day_heatmap |>
ggplot(aes(x = factor(hour_of_day), y = day_of_week, fill = n)) +
geom_tile() + scale_fill_gradient(low = "white", high = "red") +
labs(title = "Activity: Hour vs Day of Week", x = "Hour of Day", y = "Day of Week", fill = "Sessions") +
theme(element_text(size = 20))
heatmap_plot

In [None]:
peak_simultaneous_plot <- peak_simultaneous_hours |>
ggplot(aes(x = factor(hour), y = avg_players)) +
geom_col(fill = "green", alpha = 0.7) +
labs(title = "Average Simultaneous Players by Hour", x = "Hour of Day",
     y = "Average Simultaneous Players") +
theme(element_text(size = 20))

peak_simultaneous_plot