In [None]:
set.seed(1234)
library(readr)
library(tidyverse)
library(modelr)
library(bayestestR)
library(caret)
library(ez)
library(apa)
library(readr)
library(RColorBrewer)
library(scales)
library(afex)
library(ARTool)
library(fBasics)
library(car)
library(grid)
library(gridExtra)
library(cowplot)
library(ggsignif)
library(tseries)
library(Kendall)
library(stats)
library(ez)
library(lme4)
library(report)
library(MASS)
options(warn = -1)
library(dplyr)
library(emmeans)
library(nlme)

apatheme <- theme_bw() +
  theme(
    plot.title = element_text(hjust=0.5),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    axis.line = element_line(),
    text = element_text( size = 30),
    legend.justification = c(0,0),
    strip.background = element_rect(fill="#cccccc"),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank(),
    axis.title.x=element_blank(),
    axis.line.x = element_blank(),
    legend.background=element_blank()
      
  )

In [None]:
plot_colors = c('#F8766D',
                '#C49A00',
                '#53B400',
                '#00C094',
                '#00B6EB',
                '#A58AFF',
                '#FB61D7')

two_color = c('#04bcc4',
            '#fc746c')

In [None]:
tukey_detect<-function(dv,Tukey_crit=1.5){
  IQR=IQR(dv,na.rm = TRUE)
  Quant_25=quantile(dv,probs=0.25,na.rm = TRUE)
  Quant_75=quantile(dv,probs=0.75,na.rm = TRUE)
  upper=Quant_75+Tukey_crit*IQR
  lower=Quant_25-Tukey_crit*IQR
  outlier_Tukey=ifelse(dv>upper,1,ifelse(dv<lower,1,0))
  as.numeric(paste(outlier_Tukey))
}

In [None]:
df_noClose<-read_csv("./data_processed_noclose.csv")
colnames(df_noClose)<-make.names(colnames(df_noClose))

In [None]:
df<-read_csv("./data_processed.csv")
colnames(df)<-make.names(colnames(df))

In [None]:
df_app_usage<-read_csv("./app_usage.csv")
colnames(df_app_usage)<-make.names(colnames(df_app_usage))

In [None]:
df_app_categories<-read_csv("./app_categories.csv")
colnames(df_app_categories)<-make.names(colnames(df_app_categories))

In [None]:
df_entries_per_user_hours<-read_csv("./data_users_remaining_hours.csv")
colnames(df_entries_per_user_hours)<-make.names(colnames(df_entries_per_user_hours))

df_entries_per_user_hours_result<-read_csv("./data_users_remaining_hours_result.csv")
colnames(df_entries_per_user_hours_result)<-make.names(colnames(df_entries_per_user_hours_result))

In [None]:
df_entries_per_user_days<-read_csv("./data_users_remaining_days.csv")
colnames(df_entries_per_user_days)<-make.names(colnames(df_entries_per_user_days))

df_entries_per_user_days_result<-read_csv("./data_users_remaining_days_result.csv")
colnames(df_entries_per_user_days_result)<-make.names(colnames(df_entries_per_user_days_result))

In [None]:
df_entries_per_user_weeks<-read_csv("./data_users_remaining_weeks.csv")
colnames(df_entries_per_user_weeks)<-make.names(colnames(df_entries_per_user_weeks))

df_entries_per_user_weeks_result<-read_csv("./data_users_remaining_weeks_result.csv")
colnames(df_entries_per_user_weeks_result)<-make.names(colnames(df_entries_per_user_weeks_result))

# Descriptive Statistics about Users

In [None]:
how_many_sds = 2

print(paste("Number of unique users: ", max(df_noClose$userIndex)+1))
print(paste("Total number of interactions: ", length(df_noClose$userIndex)))
print(paste("Avg. Interactions per user: ", length(df_noClose$resolution)/(max(df_noClose$userIndex)+1)))

result <- df %>%
  group_by(userIndex) %>%
  summarise(max_weeks = max(weeksSinceStart)) %>%
  summarise(avg_max_weeks = mean(max_weeks),
              sd_max_weeks = sd(max_weeks),
              max_max_weeks = max(max_weeks),
              min_max_weeks = min(max_weeks))

print(paste("Average total length (weeks): ", result$avg_max_weeks))
print(paste("SD total length (weeks): ", result$sd_max_weeks))
print(paste("Min total length (weeks): ", result$min_max_weeks))
print(paste("Max total length (weeks): ", result$max_max_weeks))

cutoff_max_weeks = result$avg_max_weeks + how_many_sds*result$sd_max_weeks
mean_max_weeks = result$avg_max_weeks
result <- df %>%
  group_by(userIndex) %>%
  summarise(max_weeks = max(weeksSinceStart)) %>%
  arrange(desc(max_weeks)) %>% 
  head(5)

# print(result)

result <- df %>%
  group_by(userIndex) %>%
  summarise(max_weeks = max(weeksSinceStart)) %>%
  arrange(desc(max_weeks)) %>% 
  tail(5)

# print(result)


result <- df %>%
  group_by(userIndex) %>%
  summarise(max_days = max(daysSinceStart)) %>%
  summarise(avg_max_days = mean(max_days),
              sd_max_days = sd(max_days))

print(paste("Average total length (days): ", result$avg_max_days))
print(paste("SD total length (days): ", result$sd_max_days))

cutoff_max_days = result$avg_max_days + how_many_sds*result$sd_max_days

result <- df %>%
  group_by(userIndex) %>%
  summarise(max_days = max(daysSinceStart)) %>%
  arrange(desc(max_days)) %>% 
  head(5)

# print(paste("Head:", result))

result <- df %>%
  group_by(userIndex) %>%
  summarise(max_days = max(minutesSinceStart)) %>%
  arrange(desc(max_days)) %>% 
  tail(5)

# print(paste("Tail:", result))

result <- df %>%
  group_by(userIndex) %>%
  summarise(max_hours = max(hoursSinceStart)) %>%
  summarise(avg_max_hours = mean(max_hours),
              sd_max_hours = sd(max_hours))

print(paste("Average total length (hours): ", result$avg_max_hours))
print(paste("SD total length (hours): ", result$sd_max_hours))

cutoff_max_hours = result$avg_max_hours + how_many_sds*result$sd_max_hours


result <- df %>%
  group_by(userIndex) %>%
  summarise(max_minutes = max(minutesSinceStart)) %>%
  summarise(avg_max_minutes = mean(max_minutes),
              sd_max_minutes = sd(max_minutes),
               max_max_minutes = max(max_minutes),
              min_max_minutes = min(max_minutes))

print(paste("Average total length (minutes): ", result$avg_max_minutes))
print(paste("SD total length (minutes): ", result$sd_max_minutes))
print(paste("Min total length (minutes): ", result$min_max_minutes))
print(paste("Max total length (minutes): ", result$max_max_minutes))

cutoff_max_minutes = result$avg_max_minutes + how_many_sds*result$sd_max_minutes

result <- df %>%
  group_by(userIndex) %>%
  summarise(max_seconds = max(secondsSinceStart)) %>%
  summarise(avg_max_seconds = mean(max_seconds),
              sd_max_seconds = sd(max_seconds),
               max_max_seconds = max(max_seconds),
              min_max_seconds = min(max_seconds))

print(paste("Average total length (seconds): ", result$avg_max_seconds))
print(paste("SD total length (seconds): ", result$sd_max_seconds))
print(paste("Min total length (seconds): ", result$min_max_seconds))
print(paste("Max total length (seconds): ", result$max_max_seconds))

print(paste("Cutoff minutes: ", cutoff_max_minutes))
print(paste("Cutoff hours: ", cutoff_max_hours))
print(paste("Cutoff days: ", cutoff_max_days))
print(paste("Cutoff weeks: ", cutoff_max_weeks))




In [None]:
df_total_effectiveness <- df_noClose %>%
    group_by(continuedToApp) %>%
    summarize(total_interactions = n(),
                percentage = n() / nrow(df_noClose) * 100)

df_total_effectiveness

In [None]:
df$lifeStatus <- factor(df$lifeStatus, levels=c('Break', 'Return', 'Dropout', 'Alive'))
df$continuedToApp <- factor(df$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

In [None]:
max(result$max_weeks)

In [None]:
result <- df %>%
  group_by(userIndex) %>%
  summarise(max_weeks = max(weeksSinceStart)) 

# result
options(repr.plot.width=18, repr.plot.height=8)
# Plot the histogram
max_time_plot <- ggplot(data = result, aes(x = max_weeks)) +
  geom_histogram(binwidth = 1, fill = two_color[1], color = "black", alpha = 0.7) +
  geom_vline(xintercept=cutoff_max_weeks, color='black', linetype = 'dashed') +
  geom_vline(xintercept=mean_max_weeks, color='black', linetype = 'dashed') +
  labs(x = "Length of Recorded Usage (Weeks)", y = "Frequency") +
  scale_x_continuous(limits=c(NA,130), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,105),  expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

max_time_plot

# ggsave('figures/max_time_weeks.pdf', width=20, height=7)

# Users in the study by week

In [None]:
options(repr.plot.width=18, repr.plot.height=8)
# Plot the histogram
users_remaining_plot <- ggplot(df_entries_per_user_weeks, aes(x = weeksSinceStart, y = user_count)) +
  geom_bar(stat = "identity", fill = two_color[1], color = 'black', alpha = 0.5) +
  geom_vline(xintercept=cutoff_max_weeks, color='black', linetype = 'dashed') +
  labs(x = "Time Using one sec (Weeks)", y = "Number of Unique Users") +
  scale_x_continuous(limits=c(NA,80), expand=c(0,0),  breaks=pretty_breaks()) +
  # scale_y_continuous(limits=c(0,1200), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

users_remaining_plot

# ggsave('figures/unique_users_weeks.pdf', width=20, height=7)

# Interactions per user per week

In [None]:
options(repr.plot.width=18, repr.plot.height=8)
# Plot the histogram
interactions_per_user_days_plot <- ggplot(df_entries_per_user_days, aes(x = daysSinceStart, y = interactions_per_user)) +
  geom_bar(stat = "identity", fill = two_color[1], color = 'black', alpha = 0.5) +
  labs(x = "Time Using one sec (Days)", y = "Interactions per User") +
  scale_x_continuous(limits=c(NA,cutoff_max_days), expand=c(0,0),  breaks=pretty_breaks()) +
  # scale_y_continuous(limits=c(0,1200), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

interactions_per_user_days_plot

In [None]:
R2nls <- function(nls.obj) {
    if (class(nls.obj) != "nls") {
        stop("The object must be of class `nls`.")
    }
    da <- eval(nls.obj$data)
    resp.name <- all.vars(summary(nls.obj)$formula)[1]
    form <- paste(resp.name, "~1", sep = "")
    m0 <- stats::lm(form, da)
    an <- stats::anova(nls.obj, m0)
    sqn <- stats::deviance(nls.obj)
    sqe <- stats::deviance(m0)
    r2 <- 1 - (sqn/sqe)
    aov <- data.frame(fv = c("regression", "residuals"),
                      gl = c(-an$Df[2], an$Res.Df[1]),
                      sq = c(-an$Sum[2], an$Res.Sum[1]))
    aov$qm <- aov$sq/aov$gl
    aov$F <- c(aov$qm[1]/aov$qm[2], NA)
    aov$"Pr(>F)" <- c(1 - stats::pf(aov$F[1],
                                    df1 = aov$gl[1],
                                    df2 = aov$gl[2]),
                      NA)
    names(aov) <- c(" ", "Df", "Sum Sq", "Mean Sq",
                    "F value", "Pr(>F)")
    return(list(anova = aov, R2 = r2))
}

In [None]:
df_exp_fit <- df_entries_per_user_days[df_entries_per_user_days$daysSinceStart <= cutoff_max_days,]

# Define the combined exponential function to fit
combined_model <- function(x, A, B, C, D) {
  A * exp(-B * x) + C * x + D
}

# Fit the model to the data
fit_exp <- nls(interactions_per_user ~ combined_model(daysSinceStart, A, B, C, D),
        data=df_exp_fit,
        start = list(A = 10, B = .7, C = 0, D=10),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
summary(fit_exp)
R2nls(fit_exp)

# Fit the model to the data
fit_lin <- nls(interactions_per_user ~ A*daysSinceStart + B,
        data=df_exp_fit,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
summary(fit_lin)
R2nls(fit_lin)


# # Create a data frame for plotting
plot_data <- data.frame(x = df_exp_fit$daysSinceStart, y = df_exp_fit$interactions_per_user, ymin=df_exp_fit$interactions_per_user-df_exp_fit$std_error_interactions, ymax=df_exp_fit$interactions_per_user+df_exp_fit$std_error_interactions,
 Fitted_exp = predict(fit_exp, newdata = data.frame(daysSinceStart = df_exp_fit$daysSinceStart)),
 Fitted_lin = predict(fit_lin, newdata = data.frame(daysSinceStart = df_exp_fit$daysSinceStart)))

# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[2], alpha = 0.5)+
  geom_point(color = two_color[2], alpha=1) + # Data points
  geom_line(aes(y = Fitted_exp), color = two_color[1], size=1) + # Fitted curve
  annotate("text", x=10, y=max(plot_data$y), label=expression(R^2 * "=.913"), parse=TRUE,vjust=1, hjust=0, size=7) +
  labs(x = "Time Using one sec (Days)", y = "App Open Attempts per User") +
  scale_x_continuous(limits=c(0,cutoff_max_days), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,30), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg

# ggsave('figures/attempts_fit.pdf', width=10, height=7)

In [None]:
m = anova(fit_lin, fit_exp)
m

In [None]:
m[2,'Sum Sq']/sum(m['Res.Sum Sq'])

In [None]:
df_exp_fit <- df_entries_per_user_days_result[(df_entries_per_user_days_result$daysSinceStart <= cutoff_max_days) & (df_entries_per_user_days_result$continuedToApp == 'didNotOpenApp'),]

# Define the combined exponential-sinusoidal function to fit
combined_model <- function(x, A, B, C, D) {
  A * exp(-B * x) + C * x + D
}

# Fit the model to the data
fit_exp <- nls(percentage ~ combined_model(daysSinceStart, A, B, C, D),
        data=df_exp_fit,
        start = list(A = 10, B = .7, C = 0, D=10),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
summary(fit_exp)
R2nls(fit_exp)

# Fit the model to the data
fit_lin <- nls(percentage ~ A*daysSinceStart + B,
        data=df_exp_fit,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
summary(fit_lin)
R2nls(fit_lin)


# # Create a data frame for plotting
plot_data <- data.frame(x = df_exp_fit$daysSinceStart, y = df_exp_fit$percentage, ymin=df_exp_fit$percentage-df_exp_fit$std_error_percentage, ymax=df_exp_fit$percentage+df_exp_fit$std_error_percentage,
 Fitted_exp = predict(fit_exp, newdata = data.frame(daysSinceStart = df_exp_fit$daysSinceStart)),
 Fitted_lin = predict(fit_lin, newdata = data.frame(daysSinceStart = df_exp_fit$daysSinceStart)))

# Create a data frame for plotting
# plot_data <- data.frame(x = df_exp_fit$daysSinceStart, y = df_exp_fit$percentage,
# Fitted = predict(fit, newdata = data.frame(x = df_exp_fit$daysSinceStart)))


# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[1], alpha = 0.5)+
  geom_point(color = two_color[1], alpha=1) + # Data points
  geom_line(aes(y = Fitted_exp), color = two_color[2], size=1) + # Fitted curve
  annotate("text", x=10, y=max(plot_data$y), label=expression(R^2 * "=.840"), parse=TRUE, vjust=1, hjust=0, size=7) +
  labs(x = "Time Using one sec (Days)", y = "Dismissed Attempts (%)") +
  scale_x_continuous(limits=c(0,cutoff_max_days), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,60), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg

# ggsave('figures/percentage_fit.pdf', width=10, height=7)


In [None]:
m = anova(fit_lin, fit_exp)
m

In [None]:
m[2,'Sum Sq']/sum(m['Res.Sum Sq'])

In [None]:
options(repr.plot.width=18, repr.plot.height=8)
# Plot the histogram
interactions_per_user_weeks_plot <- ggplot(df_entries_per_user_weeks, aes(x = weeksSinceStart, y = interactions_per_user)) +
  geom_bar(stat = "identity", fill = two_color[1], color = 'black', alpha = 0.5) +
  labs(x = "Time Using one sec (Weeks)", y = "Interactions per User") +
  scale_x_continuous(limits=c(NA,cutoff_max_weeks), expand=c(0,0),  breaks=pretty_breaks()) +
  # scale_y_continuous(limits=c(0,1200), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

interactions_per_user_weeks_plot

# Percentage of interactions where users continued to app or not

In [None]:
# Calculate the counts of each value of 'continuedToApp' for each 'weeksSinceStart'
df_summary_weeks <- df_noClose %>%
  count(weeksSinceStart, continuedToApp) %>%
  group_by(weeksSinceStart) %>%
  mutate(percentage = n / sum(n) * 100)

df_summary_weeks$continuedToApp <- factor(df_summary_weeks$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

options(repr.plot.width=18, repr.plot.height=8)

# Plot the stacked bar chart
plt <- ggplot(df_summary_weeks, aes(x = weeksSinceStart)) +
  geom_bar(stat = "identity",  aes(y = percentage, fill = continuedToApp)) +
  theme_classic() +
  labs(x = "Time Using one sec (Weeks)", y = "Percentage", fill="User Action") +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  scale_color_manual(values = two_color) + 
  scale_x_continuous(limits=c(NA,cutoff_max_weeks), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(expand=c(0,0), breaks=pretty_breaks()) +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

plt

# ggsave('figures/user_action_percentage_weeks.pdf', width=20, height=7)

In [None]:
# Calculate the counts of each value of 'continuedToApp' for each 'weeksSinceStart'
df_summary_days <- df_noClose %>%
  count(daysSinceStart, continuedToApp) %>%
  group_by(daysSinceStart) %>%
  mutate(percentage = n / sum(n) * 100)

df_summary_days$continuedToApp <- factor(df_summary_days$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

options(repr.plot.width=18, repr.plot.height=8)

# Plot the stacked bar chart
plt <- ggplot(df_summary_days, aes(x = daysSinceStart)) +
  geom_bar(stat = "identity",  aes(y = percentage, fill = continuedToApp)) +
  theme_classic() +
  labs(x = "Time Using one sec (Days)", y = "Percentage", fill="User Action") +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  scale_color_manual(values = two_color) + 
  scale_x_continuous(limits=c(NA,cutoff_max_days), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(expand=c(0,0), breaks=pretty_breaks()) +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

plt

# ggsave('figures/user_action_percentage_days.pdf', width=20, height=7)

In [None]:
# Calculate the counts of each value of 'continuedToApp' for each 'weeksSinceStart'
df_summary_hours <- df_noClose %>%
  count(hoursSinceStart, continuedToApp) %>%
  group_by(hoursSinceStart) %>%
  mutate(percentage = n / sum(n) * 100)

df_summary_hours$continuedToApp <- factor(df_summary_hours$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

options(repr.plot.width=18, repr.plot.height=8)

# Plot the stacked bar chart
plt <- ggplot(df_summary_hours, aes(x = hoursSinceStart)) +
  geom_bar(stat = "identity",  aes(y = percentage, fill = continuedToApp)) +
  theme_classic() +
  labs(x = "Time Using one sec (Hours)", y = "Percentage", fill="User Action") +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  scale_color_manual(values = two_color) + 
  scale_x_continuous(limits=c(NA,cutoff_max_hours), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(expand=c(0,0), breaks=pretty_breaks()) +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

plt

# ggsave('figures/user_action_percentage_hours.pdf', width=20, height=7)

# one sec Effectiveness and total interactions per user per week

In [None]:

# Create a dual-axis plot
plot <- ggplot(df_entries_per_user_weeks, aes(x = weeksSinceStart)) +
  geom_line(data=df_summary_weeks[df_summary_weeks$continuedToApp == 'didNotOpenApp',], aes(y = percentage*3, color = "percentage"), size = 1) +
  geom_line(data=df_entries_per_user_weeks, aes(y = interactions_per_user, color = "interactions_per_user"), size = 1) +
  scale_color_manual(values = c(two_color[2], two_color[1]), labels=c('Open Attempts per User', 'Dismissed Attempts (%)')) +
  labs(x = "Time Using one sec (Weeks)", y = "App Open Attemps per User") +
  theme_classic() +
  scale_x_continuous(limits=c(0,cutoff_max_weeks), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,150), expand=c(0,0), breaks=pretty_breaks(), 
    sec.axis = sec_axis(~./3, name = "Dismissed Attempts (%)")) +
  theme(text = element_text(size=25),  legend.position = "bottom", legend.title = element_blank(), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'))

plot

# ggsave('figures/attempts_and_effectiveness_weeks.pdf', width=20, height=7)

In [None]:

# Create a dual-axis plot
plot <- ggplot(df_entries_per_user_days, aes(x = daysSinceStart)) +
  geom_line(data=df_summary_days[df_summary_days$continuedToApp == 'didNotOpenApp',], aes(y = percentage/3, color = "percentage"), size = 1) +
  geom_line(data=df_entries_per_user_days, aes(y = interactions_per_user, color = "interactions_per_user"), size = 1) +
  scale_color_manual(values = c(two_color[2], two_color[1]), labels=c('Open Attempts per User', 'Dismissed Attempts (%)')) +
  labs(x = "Time Using one sec (Days)", y = "App Open Attemps per User") +
  theme_classic() +
  scale_x_continuous(limits=c(NA,cutoff_max_days), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,30), expand=c(0,0), breaks=pretty_breaks(), 
    sec.axis = sec_axis(~.*2, name = "Dismissed Attempts (%)")) +
  theme(text = element_text(size=25),  legend.position = "bottom", legend.title = element_blank(), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'))

plot

# ggsave('figures/attempts_and_effectiveness_days.pdf', width=20, height=7)

In [None]:

# df_summary_per_user_hours$percentage = df_summary_hours[df_summary_hours$continuedToApp == 'didNotOpenApp',]$percentage

# df_effectiveness$continuedToApp <- factor(df_effectiveness$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

# Create a dual-axis plot
plot <- ggplot(df_entries_per_user_hours, aes(x = hoursSinceStart)) +
  geom_line(data=df_summary_hours[df_summary_hours$continuedToApp == 'didNotOpenApp',], aes(y = percentage/20, color = "percentage"), size = 1) +
  geom_line(data=df_entries_per_user_hours, aes(y = interactions_per_user, color = "interactions_per_user"), size = 1) +
  scale_color_manual(values = c(two_color[2], two_color[1]), labels=c('Open Attempts per User', 'Dismissed Attempts (%)')) +
  labs(x = "Time Using one sec (Hours)", y = "App Open Attemps per User") +
  theme_classic() +
  scale_x_continuous(limits=c(NA,cutoff_max_hours), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,5), expand=c(0,0), breaks=pretty_breaks(), 
    sec.axis = sec_axis(~.*20, name = "Dismissed Attempts (%)")) +
  theme(text = element_text(size=25),  legend.position = "bottom", legend.title = element_blank(), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'))

plot

# ggsave('figures/attempts_and_effectiveness_days.pdf', width=20, height=7)

In [None]:
df_entries_per_user_weeks_result$continuedToApp <- factor(df_entries_per_user_weeks_result$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

plt <- ggplot(df_entries_per_user_weeks_result, aes(x=weeksSinceStart)) +
geom_bar(stat = "identity",  aes(y = interactions_per_user, fill = continuedToApp)) +
theme_classic() +
labs(x = "Time Using one sec (Weeks)", y = "Total App Open Attemps per User", fill="User Action") +
scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
scale_color_manual(values = two_color) + 
scale_x_continuous(limits=c(NA,15), expand=c(0,0), breaks=pretty_breaks()) +
# scale_y_continuous(limits=c(0,150), expand=c(0,0), breaks=pretty_breaks()) +
theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=12, r=12, b=0, l=0, unit='pt'))


plt

# ggsave('figures/opens_effectiveness_per_user_weeks.pdf', width=20, height=7)

In [None]:
df_entries_per_user_days_result$continuedToApp <- factor(df_entries_per_user_days_result$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

plt <- ggplot(df_entries_per_user_days_result, aes(x=daysSinceStart)) +
geom_bar(stat = "identity", aes(y = interactions_per_user, fill = continuedToApp)) +
theme_classic() +
labs(x = "Time Using one sec (Days)", y = "Total App Open Attemps per User", fill="User Action") +
scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
scale_color_manual(values = two_color) + 
scale_x_continuous(limits=c(NA,50), expand=c(0,0), breaks=pretty_breaks()) +
# scale_y_continuous(limits=c(0,30), expand=c(0,0), breaks=pretty_breaks()) +
theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=12, r=12, b=0, l=0, unit='pt'))


plt

# ggsave('figures/opens_effectiveness_per_user_days.pdf', width=20, height=7)

In [None]:
df_entries_per_user_hours_result$continuedToApp <- factor(df_entries_per_user_hours_result$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

options(repr.plot.width=18, repr.plot.height=8)

plt <- ggplot(df_entries_per_user_hours_result, aes(x=hoursSinceStart)) +
geom_bar(stat = "identity",  aes(y = interactions_per_user, fill = continuedToApp)) +
theme_classic() +
labs(x = "Time Using one sec (Hours)", y = "Total App Open Attemps per User", fill="User Action") +
scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
scale_color_manual(values = two_color) + 
scale_x_continuous(limits=c(48,96), expand=c(0,0), breaks=pretty_breaks()) +
# scale_y_continuous(limits=c(0,2000), expand=c(0,0), breaks=pretty_breaks()) +
theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=12, r=12, b=0, l=0, unit='pt'))


plt

# ggsave('figures/opens_effectiveness_per_user_hours.pdf', width=20, height=7)

## Trends and Correlations for Attempts and Effectiveness

In [None]:
trend_days <- df_entries_per_user_days[df_entries_per_user_days$daysSinceStart <= cutoff_max_days,]
trend_percentage_days <- df_entries_per_user_days_result[(df_entries_per_user_days_result$daysSinceStart <= cutoff_max_days) & (df_entries_per_user_days_result$continuedToApp == 'didNotOpenApp'),]


In [None]:
print(adf.test(trend_days$interactions_per_user))
print(adf.test(trend_percentage_days$percentage))

In [None]:
print(MannKendall(trend_days$interactions_per_user))
print(MannKendall(trend_percentage_days$percentage))

In [None]:
pearsonTest(trend_days$daysSinceStart, trend_days$interactions_per_user)
pearsonTest(trend_percentage_days$daysSinceStart, trend_percentage_days$percentage)

In [None]:
df_break <- df_noClose[df_noClose$lifeStatus %in% c("Break"), ]
# df_dropout <- df_noClose[df_noClose$lifeStatus %in% c("Dropout"), ]
df_return <- df_noClose[df_noClose$lifeStatus %in% c("Return"), ]

In [None]:
print(paste("Number of Breaks: ", length(df_break$userIndex)))

unique_count <- df_break %>%
  summarise(unique_count = n_distinct(userIndex))
print(paste("Number of Per User (for users with at least one break): ", length(df_break$userIndex)/unique_count))


temp <- df_break %>% 
    group_by(userIndex) %>%
  summarise(break_count = sum(lifeStatus == "Break", na.rm = TRUE))
print(paste("Max breaks: ", max(temp$break_count)))


print(paste("Min break length (hours): ", min(df_break$hourDiffForward)))
print(paste("Min break length (days): ", min(df_break$dayDiffForward)))
print(paste("Mean break length (hours): ", mean(df_break$hourDiffForward)))
print(paste("Mean break length (days): ", mean(df_break$dayDiffForward)))
print(paste("Max break length (hours): ", max(df_break$hourDiffForward)))
print(paste("Max break length (days): ", max(df_break$dayDiffForward)))


# First and Last Week

In [None]:
# # Sort the dataframe by 'userIndex' and 'weeksSinceStudySetup'
df <- df %>%
  arrange(userIndex, realWeeksDifference)
  
# Get the firstWeek and lastWeek for each participant
grouped_data <- df %>%
  group_by(userIndex) %>%
  summarize(firstWeek = first(realWeeksDifference), lastWeek = last(realWeeksDifference))

In [None]:
# # Sort the dataframe by 'userIndex' and 'weeksSinceStudySetup'
df <- df %>%
  arrange(userIndex, realWeeksDifference)
  
# Get the firstWeek and lastWeek for each participant
grouped_data <- df %>%
  group_by(userIndex) %>%
  summarize(firstWeek = first(timestamp), lastWeek = last(timestamp))

In [None]:
grouped_data <- grouped_data %>%
  arrange(firstWeek)
  

oldest_data_date <- first(grouped_data$firstWeek)

print(paste('Earliest data point', oldest_data_date))
# last(grouped_data$firstWeek)

grouped_data <- grouped_data %>%
  arrange(lastWeek)

first_join_date <- first(grouped_data$lastWeek)
last_join_date <- last(grouped_data$lastWeek)

print(paste('Earliest participant join date', first_join_date))
print(paste('Last participant join date', last_join_date))

In [None]:
lims <- as.POSIXct(strptime(c("2020-09-01 00:00","2023-05-01 00:00"), format = "%Y-%m-%d %H:%M"))  

# Plotting the horizontal lines with red dots at endpoints
plt <- ggplot(grouped_data, aes(x = firstWeek, xend = lastWeek, y = userIndex, yend = userIndex)) +
  geom_segment(size = 1, color='darkgrey') +
  geom_point(aes(x = firstWeek, color = "First Week"), size = 3) +
  geom_point(aes(x = lastWeek, color="Last Week"), size = 3) +
  # geom_vline(xintercept=as.POSIXct("2022-11-05 00:00:00",tz=Sys.timezone()), color='red', linetype = 'dashed', size=1) +
  # geom_vline(xintercept=as.POSIXct("2023-08-27 00:00:00",tz=Sys.timezone()), color='black', linetype = 'dashed', size=1) +
  geom_vline(xintercept=first_join_date, color='black', linetype = 'dashed', size=1) +
  geom_vline(xintercept=last_join_date, color='black', linetype = 'dashed', size=1) +
  scale_color_manual(labels=c("First Entry", "Last Entry"), values=two_color) + 
  labs(x="Date", y="User Index", color="")+
  scale_x_datetime(breaks=date_breaks('3 month'), limits=lims, labels=date_format("%Y-%m")) +
  scale_y_continuous(expand=c(0,0), breaks=pretty_breaks(), limits=c(0,max(grouped_data$userIndex)+10)) +
  theme_classic() +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal", plot.margin = margin(t=15, r=12, b=0, l=0, unit='pt'))



plt

# ggsave('figures/timeline_first_last.pdf', width=20, height=7)

# Breaks

In [None]:
# Function to get breakStart and breakEnd values
get_breaks <- function(weeks) {
  diffs <- diff(weeks)
  break_start <- weeks[-length(weeks)][diffs > 1]
  break_end <- weeks[-1][diffs > 1]
  return(data.frame(breakStart = break_start, breakEnd = break_end))
}

# Group by 'userIndex' and apply the get_breaks function
breaks_df <- df_noClose %>%
  group_by(userIndex) %>%
  do(get_breaks(.$timestamp)) %>%
  ungroup()

# Remove rows where breakStart and breakEnd are missing (i.e., single data points for each userIndex)
breaks_df <- breaks_df[complete.cases(breaks_df), ]

# Print the resulting dataframe
print(breaks_df)


# From weeksSinceStudyStart

In [None]:
# # Sort the dataframe by 'userIndex' and 'weeksSinceStudySetup'
df <- df %>%
  arrange(userIndex, daysSinceStart)
  
# Get the firstWeek and lastWeek for each participant
grouped_data <- df_noClose %>%
  group_by(userIndex) %>%
  summarize(firstDay = first(daysSinceStart), lastDay = last(daysSinceStart))


# Sort the dataframe by 'firstWeek'
sorted_df <- grouped_data[order(grouped_data$lastDay), ]

# Create a new column 'currentIndex' containing the sorted indices
sorted_df$currentIndex <- seq_len(nrow(sorted_df))

# Plotting the horizontal lines with red dots at endpoints
ggplot(sorted_df, aes(x = firstDay, xend = lastDay, y = currentIndex, yend = currentIndex)) +
  geom_segment(size = 1) +
  geom_point(aes(x = lastDay), size = 3, color=two_color[1]) +
  # scale_color_manual(values = two_color[1]) + 
  ylab("userIndex") +
  xlab("Time (Days)") +
  ggtitle("Participant Timeline") +
  theme_classic()

In [None]:
# Function to get breakStart and breakEnd values
get_breaks <- function(days) {
  diffs <- diff(days)
  break_start <- days[-length(days)][diffs > 2.7]
  break_end <- days[-1][diffs > 2.7]
  return(data.frame(breakStart = break_start, breakEnd = break_end))
}

# Group by 'userIndex' and apply the get_breaks function
breaks_df <- df_noClose %>%
  group_by(userIndex) %>%
  do(get_breaks(.$daysSinceStart)) %>%
  ungroup()

# Remove rows where breakStart and breakEnd are missing (i.e., single data points for each userIndex)
breaks_df <- breaks_df[complete.cases(breaks_df), ]

# Print the resulting dataframe
print(breaks_df)

In [None]:
breaks_df$breakLength <- breaks_df$breakEnd - breaks_df$breakStart
head(breaks_df)

# Break Lengths

In [None]:
# head(df_break)
# Create the histogram using ggplot2

histogram <- ggplot(data = df_break, aes(x = dayDiffForward)) +
  geom_histogram(binwidth = 1, fill = two_color[1], color = "black", alpha = 0.7) +
  labs(x = "Break Length (Days)", y = "Frequency") +
  scale_x_continuous(limits=c(0,21), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,500),  expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))
  

histogram

# ggsave('figures/break_length_histogram.pdf', plot=histogram, width=20, height=7)

In [None]:
print(paste('Mean break length: ', mean(df_break$dayDiffForward)))
print(paste('StD break length: ', sd(df_break$dayDiffForward)))
print(paste('Median break length: ', median(df_break$dayDiffForward)))
print(paste('Min break length: ', min(df_break$dayDiffForward)))
print(paste('Max break length: ', max(df_break$dayDiffForward)))


In [None]:
df_diff_distribution <- df_noClose[df_noClose['userDiffPrev'] != TRUE, ]

In [None]:
print(paste('Min', min(df_diff_distribution$hourDiffReverse)))
print(paste('Max', max(df_diff_distribution$hourDiffReverse)))
print(paste('Mean', mean(df_diff_distribution$hourDiffReverse)))
print(paste('StD', sd(df_diff_distribution$hourDiffReverse)))
print(paste('Median', median(df_diff_distribution$hourDiffReverse)))
print(paste('Median minutes', median(df_diff_distribution$minuteDiffReverse)))
print(paste('IQR', IQR(df_diff_distribution$hourDiffReverse)))
print(paste('Q3', quantile(df_diff_distribution$hourDiffReverse, probs=0.75,na.rm = TRUE)))
print(paste('Mean + stds', mean(df_diff_distribution$hourDiffReverse) + how_many_sds*sd(df_diff_distribution$hourDiffReverse)))

mean_timeDiff_hours = mean(df_diff_distribution$hourDiffReverse)
std_timeDiff_hours = sd(df_diff_distribution$hourDiffReverse)
break_cutoff_timeDiff_hours = mean_timeDiff_hours + how_many_sds*std_timeDiff_hours

# IQR=IQR(dv,na.rm = TRUE)
#   Quant_25=quantile(dv,probs=0.25,na.rm = TRUE)
#   Quant_75=quantile(dv,probs=0.75,na.rm = TRUE)
#   upper=Quant_75+Tukey_crit*IQR
#   lower=Quant_25-Tukey_crit*IQR
#   outlier_Tukey=ifelse(dv>upper,1,ifelse(dv<lower,1,0))
#   # print(outlier_Tukey)
#   as.numeric(paste(outlier_Tukey))


In [None]:
# head(df_break)
# Create the histogram using ggplot2
histogram <- ggplot(data = df_diff_distribution, aes(x = minuteDiffReverse)) +
  geom_histogram(binwidth = 10, fill = two_color[1], color = "black", alpha = 0.7) +
  geom_vline(xintercept=24*60, color='black', linetype = 'dashed') +
  geom_vline(xintercept=48*60, color='black', linetype = 'dashed') +
  geom_vline(xintercept=mean(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'dashed') +
  geom_vline(xintercept=median(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'dashed') +
  geom_vline(xintercept=mean(df_diff_distribution$minuteDiffReverse) + 2*sd(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'solid') +
  labs(x = "Time Gap Between Uses (minutes)", y = "Frequency") +
  scale_x_continuous(limits=c(NA,3000), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,630000), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=20, r=25, b=0, l=0, unit='pt'))
  

histogram

# ggsave('figures/usage_gap_histogram.pdf', plot=histogram, width=20, height=7)

In [None]:


# Create the histogram using ggplot2
histogram <- ggplot(data = df_diff_distribution, aes(x = minuteDiffReverse)) +
  geom_histogram(binwidth = 10, fill = two_color[1], color = "black", alpha = 0.7) +
  geom_vline(xintercept=24*60, color='black', linetype = 'dashed') +
  geom_vline(xintercept=mean(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'dashed') +
  geom_vline(xintercept=median(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'dashed') +
  labs(x = "Time Gap Between Uses (minutes)", y = "Frequency") +
  scale_x_continuous(limits=c(100,1000), expand=c(0,0), breaks = c(100,500,1000)) +
  scale_y_continuous(limits=c(0,17000), expand=c(0,0), breaks = pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=25, b=0, l=0, unit='pt'))
  

histogram

# ggsave('figures/usage_gap_histogram_zoom.pdf', plot=histogram, width=10, height=3.5)

In [None]:


# Create the histogram using ggplot2
histogram <- ggplot(data = df_diff_distribution, aes(x = minuteDiffReverse)) +
  geom_histogram(binwidth = 10, fill = two_color[1], color = "black", alpha = 0.7) +
  geom_vline(xintercept=24*60, color='black', linetype = 'dashed') +
  geom_vline(xintercept=mean(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'dashed') +
  geom_vline(xintercept=median(df_diff_distribution$minuteDiffReverse), color='black', linetype = 'dashed') +
  labs(x = "Time Gap Between Uses (minutes)", y = "Frequency") +
  scale_x_continuous(limits=c(NA,50), expand=c(0,0), breaks = c(100,500,1000)) +
#   scale_y_continuous(limits=c(0,17000), expand=c(0,0), breaks = pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=25, b=0, l=0, unit='pt'))
  

histogram

# ggsave('figures/usage_gap_histogram_zoom.pdf', plot=histogram, width=10, height=3.5)

# First Use, Leadup to Breaks and Returns

## Everyone's first use

In [None]:
df_first_use <- df_entries_per_user_hours[df_entries_per_user_hours$hoursSinceStart <= break_cutoff_timeDiff_hours,]
df_first_use_result <- df_entries_per_user_hours_result[df_entries_per_user_hours_result$hoursSinceStart <= break_cutoff_timeDiff_hours,]

In [None]:
# counted_firsts <- df %>%
#   group_by(hoursSinceStart, continuedToApp) %>%
#   summarise(avg_entries_per_user = n() /n_distinct(userIndex))


# counted_firsts <- drop_na(counted_firsts)
df_first_use_result$continuedToApp <- factor(df_first_use_result$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

first_use_plot <- ggplot(df_first_use_result, aes(x = hoursSinceStart, y = interactions_per_user, fill = continuedToApp)) +
  geom_bar(stat = "identity") +
  labs(x = "Time Since First Use (Hours)", y = "App Open Attempts per User", fill = "User Action") +
  theme(legend.position = 'none') +
  scale_x_continuous(limits=c(NA, break_cutoff_timeDiff_hours), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,10), expand=c(0,0), breaks=pretty_breaks()) +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  theme_classic() +
  theme(text = element_text(size=30), plot.margin = margin(t=10, r=10, b=0, l=5, unit='pt'))

# pre_break_plot <- pre_break_plot + theme(legend.position = 'none')
first_use_plot

In [None]:
# grouped_firsts <- counted_firsts %>%
#   group_by(hoursSinceStart, continuedToApp) %>%
#   summarize(
#     average_user_count = mean(total_user_count),
#     summed_normalized_count = sum(normalized_count)
#   ) %>%
#   mutate(
#     percentage = summed_normalized_count / sum(summed_normalized_count) * 100,
#     opens_per_user = sum(summed_normalized_count)
#   )
# df_first_use$continuedToApp <- factor(df_first_use$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))
# grouped_firsts_effectiveness <- grouped_firsts[grouped_firsts$continuedToApp == 'didNotOpenApp',]

# grouped_firsts_effectiveness <- grouped_firsts_effectiveness[grouped_firsts_effectiveness$hoursSinceStart >= 0, ]

secondary_scale <- 10

# Create a dual-axis plot
firsts_line <- ggplot(df_first_use, aes(x = hoursSinceStart)) +
  geom_line(aes(y = interactions_per_user, color = "A"), size = 1) +
  geom_line(data=df_first_use_result[df_first_use_result$continuedToApp == 'didNotOpenApp',], aes(y = percentage/secondary_scale, color = "B"), size = 1) +
  scale_color_manual(values = c(two_color[2], two_color[1]), labels=c("App Open Attempts per User","Dismissed Attempts (%)")) +
  labs(x = "Time Since First Use (Hours)", y = "App Open Attempts per User", color = "") +
  theme_classic() +
  theme(text = element_text(size=30),  legend.position = "bottom", plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt')) +
  scale_y_continuous(limits=c(0,10), expand=c(0,0), breaks=pretty_breaks(),
    sec.axis = sec_axis(~.*secondary_scale, name = "Dismissed Attempts (%)", labels=percent_format(scale = 1))) +
  scale_x_continuous(limits=c(NA, break_cutoff_timeDiff_hours), expand=c(0,0), breaks=pretty_breaks())

firsts_line



# ggsave('figures/attempts_and_effectiveness.pdf', width=20, height=7)

In [None]:
# Define the combined exponential-sinusoidal function to fit
combined_model <- function(x, A, B, C, D, E, F, G) {
  A * exp(-B * x) + C * cos(2*pi/24*D * x + E) + F * x + G
}

# Fit the model to the data
fit_combo <- nls(interactions_per_user ~ combined_model(hoursSinceStart, A, B, C, D, E, F, G),
        data=df_first_use,
        start = list(A = 5, B = 0.9, C = 0.75, D = 1, E = 0, F=0, G=1),
        control = nls.control(maxiter = 10000)
        )

# Print the summary of the fit
# summary(fit_combo)
# R2nls(fit_combo)

# Define the combined exponential-sinusoidal function to fit
exp_model <- function(x, A, B, C, D) {
  A * exp(-B * x) + C * x + D
}

# Fit the model to the data
fit_exp <- nls(interactions_per_user ~ exp_model(hoursSinceStart, A, B, C, D),
        data=df_first_use,
        start = list(A = 5, B = .7, C = 0, D=5),
        control = nls.control(maxiter = 1000)
        )

sinusoidal <- function(x, A, B, C) {
  A * cos(2*pi/24 * x + B) + C
}

# Fit the model to the data
fit_sin <- nls(interactions_per_user ~ sinusoidal(hoursSinceStart, A, B, C),
        data=df_first_use,
        start = list(A = 3, B = 0, C = 1),
        control = nls.control(maxiter = 5000)
        )

# Print the summary of the fit
# summary(fit_sin)

# Fit the model to the data
fit_lin <- nls(interactions_per_user ~ A*hoursSinceStart + B,
        data=df_first_use,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
# summary(fit_lin)



# # Create a data frame for plotting
plot_data <- data.frame(x = df_first_use$hoursSinceStart,
                        y = df_first_use$interactions_per_user,
                        ymin=df_first_use$interactions_per_user-df_first_use$std_error_interactions,
                        ymax=df_first_use$interactions_per_user+df_first_use$std_error_interactions)
                        
line_x <- seq(min(df_first_use$hoursSinceStart),max(df_first_use$hoursSinceStart),.25)
line_data <- data.frame(x = line_x,
                        ymin=line_x,
                        ymax=line_x,
                        Fitted_combo = predict(fit_combo, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = line_x))
 )



 

# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[2], alpha = 0.5)+
  geom_point(color = two_color[2], alpha=1) + # Data points
  # geom_line(aes(y = Fitted_lin), color = two_color[1], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_sin), color = two_color[2], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_exp), color = two_color[1], size=1) + # Fitted curve
  geom_line(data=line_data, aes(x=x, y = Fitted_combo), color = two_color[1], size=1) + # Fitted curve
  annotate("text", x=3, y=6, label=expression(R^2 * "=.970"), parse=TRUE, vjust=1, hjust=0, size=7) +
  labs(x = "Time Since First Use (Hours)", y = "App Open Attempts per User") +
  scale_x_continuous(limits=c(0,break_cutoff_timeDiff_hours), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,7), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg

# ggsave('figures/first_fit.pdf', width=7, height=7)


In [None]:
j=anova(fit_lin, fit_sin,fit_combo)
j
k=anova(fit_exp,fit_combo)
k
BIC(fit_lin, fit_sin, fit_exp,fit_combo)


In [None]:
j[3,'Sum Sq']/sum(j['Res.Sum Sq'])

In [None]:
df_fit <- df_first_use_result[df_first_use_result$continuedToApp == 'didNotOpenApp',]

# Define the combined exponential-sinusoidal function to fit
combined_model <- function(x, A, B, C, D, E, F, G) {
  A * exp(-B * x) + C * cos(2*pi/24*D * x + E) + F * x + G
}

# Fit the model to the data
fit_combo <- nls(percentage ~ combined_model(hoursSinceStart, A, B, C, D, E, F, G),
        data=df_fit,
        start = list(A = 5, B = 0.9, C = 0.75, D = 1, E = 0, F=0, G=1),
        control = nls.control(maxiter = 10000)
        )

# Print the summary of the fit
# summary(fit_combo)
R2nls(fit_combo)

# Define the combined exponential-sinusoidal function to fit
exp_model <- function(x, A, B, C, D) {
  A * exp(-B * x) + C * x + D
}

# Fit the model to the data
fit_exp <- nls(percentage ~ exp_model(hoursSinceStart, A, B, C, D),
        data=df_fit,
        start = list(A = 5, B = .7, C = 0, D=5),
        control = nls.control(maxiter = 1000)
        )

sinusoidal <- function(x, A, B, C) {
  A * cos(2*pi/24 * x + B) + C
}

summary(fit_exp)

# Fit the model to the data
fit_sin <- nls(percentage ~ sinusoidal(hoursSinceStart, A, B, C),
        data=df_fit,
        start = list(A = 3, B = 0, C = 1),
        control = nls.control(maxiter = 5000)
        )

# Print the summary of the fit
# summary(fit_sin)

# Fit the model to the data
fit_lin <- nls(percentage ~ A*hoursSinceStart + B,
        data=df_fit,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
# summary(fit_lin)



# # Create a data frame for plotting
plot_data <- data.frame(x = df_fit$hoursSinceStart,
                        y = df_fit$percentage,
                        ymin=df_fit$percentage-df_fit$std_error_percentage,
                        ymax=df_fit$percentage+df_fit$std_error_percentage)
line_x <- seq(min(df_fit$hoursSinceStart),max(df_fit$hoursSinceStart),.5)
line_data <- data.frame(x = line_x,
                        ymin=line_x,
                        ymax=line_x,
                        Fitted_combo = predict(fit_combo, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = line_x))
)
 




# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[1], alpha = 0.5)+
  geom_point(color = two_color[1], alpha=1) + # Data points
  # geom_line(aes(y = Fitted_lin), color = two_color[1], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_sin), color = two_color[2], size=1) + # Fitted curve
  geom_line(data=line_data, aes(x=x, y = Fitted_exp), color = two_color[2], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_combo), color = two_color[1], size=1) + # Fitted curve
  annotate("text", x=3, y=100, label=expression(R^2 * "=.807"), parse=TRUE, vjust=1, hjust=0, size=7) +
  labs(x = "Time Since First Use (Hours)", y = "Dismissed Attempts (%)") +
  scale_x_continuous(limits=c(1,break_cutoff_timeDiff_hours), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,100), expand=c(0,0), breaks=c(0,10,20,30,40)) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg  

# ggsave('figures/first_percentage_fit.pdf', width=7, height=7)


In [None]:
j=anova(fit_lin,fit_sin,fit_exp,fit_combo)
j
BIC(fit_lin,fit_sin,fit_exp,fit_combo)


In [None]:
eta_sq <- j[3,'Sum Sq']/sum(j['Res.Sum Sq'])
eta_sq

## Breaks

In [None]:
df_breaks<-read_csv("./data_breaks.csv")
colnames(df_breaks)<-make.names(colnames(df_breaks))
# head(df_breaks, 5)

In [None]:
df_breaks <- df_breaks[df_breaks$hoursSinceStart >= -break_cutoff_timeDiff_hours,]
number_users_breaks <- length(unique(df_breaks$userIndex))

counted_breaks <- df_breaks %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize(interactions_per_user = n()/number_users_breaks)

counted_breaks <- drop_na(counted_breaks)
counted_breaks$continuedToApp <- factor(counted_breaks$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

pre_break_plot <- ggplot(counted_breaks, aes(x = time_to_break, y = interactions_per_user, fill = continuedToApp)) +
  geom_bar(stat = "identity") +
  labs(x = "Time Before Break (Hours)", y = "App Open Attempts per User", fill = "User Action") +
  theme(legend.position = 'none') +
  scale_x_continuous(limits=c(- break_cutoff_timeDiff_hours ,NA), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,10), expand=c(0,0), breaks=pretty_breaks()) +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  theme_classic() +
  theme(text = element_text(size=30), plot.margin = margin(t=10, r=10, b=0, l=5, unit='pt'))

# pre_break_plot <- pre_break_plot + theme(legend.position = 'none')
pre_break_plot

In [None]:
grouped_breaks <- counted_breaks %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize(
    summed_normalized_count = sum(interactions_per_user)
  ) %>%
  mutate(
    percentage = summed_normalized_count / sum(summed_normalized_count) * 100,
    opens_per_user = sum(summed_normalized_count)
  )

grouped_breaks_effectiveness <- grouped_breaks[grouped_breaks$continuedToApp == 'didNotOpenApp',]

grouped_breaks_effectiveness <- grouped_breaks_effectiveness[grouped_breaks_effectiveness$time_to_break <= 0, ]

secondary_scale <- 10

# Create a dual-axis plot
break_line <- ggplot(grouped_breaks_effectiveness, aes(x = time_to_break)) +
  geom_line(aes(y = opens_per_user, color = "A"), size = 1) +
  geom_line(aes(y = percentage/secondary_scale, color = "B"), size = 1) +
  scale_color_manual(values = c(two_color[2], two_color[1]), labels=c("App Open Attempts per User","Dismissed Attempts (%)")) +
  labs(x = "Time Before Break (Hours)", y = "App Open Attempts per User", color = "") +
  theme_classic() +
  theme(text = element_text(size=30),  legend.position = "bottom", plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt')) +
  scale_y_continuous(limits=c(0,10), expand=c(0,0), breaks=pretty_breaks(),
    sec.axis = sec_axis(~.*secondary_scale, name = "Dismissed Attempts (%)", labels=percent_format(scale = 1))) +
  scale_x_continuous(limits=c(-break_cutoff_timeDiff_hours,NA), expand=c(0,0), breaks=pretty_breaks())

break_line

# ggsave('figures/attempts_and_effectiveness.pdf', width=20, height=7)

In [None]:
grouped_breaks <- counted_breaks %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize(
    summed_normalized_count = sum(interactions_per_user)
  ) %>%
  mutate(
    percentage = summed_normalized_count / sum(summed_normalized_count) * 100,
    opens_per_user = sum(summed_normalized_count)
  )

# grouped_breaks_effectiveness <- grouped_breaks[grouped_breaks$continuedToApp == 'didNotOpenApp',]

grouped_breaks_effectiveness <- grouped_breaks_effectiveness[grouped_breaks_effectiveness$time_to_break <= 0, ]

# count how many breaks each user had
df_breaks_per_user <- df_break %>%
  group_by(userIndex) %>%
  summarize(breaks_per_user = n())

# count total interactions per user
counted_breaks_errors <- df_breaks %>%
  group_by(userIndex, time_to_break, continuedToApp) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_breaks_errors <- left_join(counted_breaks_errors, df_breaks_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_breaks_errors$interactions_per_user <- counted_breaks_errors$interactions / counted_breaks_errors$breaks_per_user

counted_breaks_errors <- counted_breaks_errors %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize( std_error_interactions = sd(interactions_per_user)/sqrt(length(interactions_per_user))
            )
counted_breaks_errors <- left_join(counted_breaks_errors, grouped_breaks, by=c('time_to_break','continuedToApp'))

In [None]:
grouped_breaks <- counted_breaks %>%
  group_by(time_to_break) %>%
  summarize(
    interactions_per_user = sum(interactions_per_user)
  ) 

# grouped_breaks_effectiveness <- grouped_breaks[grouped_breaks$continuedToApp == 'didNotOpenApp',]

grouped_breaks_effectiveness <- grouped_breaks_effectiveness[grouped_breaks_effectiveness$time_to_break <= 0, ]

# count how many breaks each user had
df_breaks_per_user <- df_break %>%
  group_by(userIndex) %>%
  summarize(breaks_per_user = n())

# count total interactions per user
counted_breaks_errors <- df_breaks %>%
  group_by(userIndex, time_to_break) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_breaks_errors <- left_join(counted_breaks_errors, df_breaks_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_breaks_errors$interactions_per_user <- counted_breaks_errors$interactions / counted_breaks_errors$breaks_per_user

counted_breaks_errors <- counted_breaks_errors %>%
  group_by(time_to_break) %>%
  summarize( std_error_interactions = sd(interactions_per_user)/sqrt(length(interactions_per_user))
            )
counted_breaks_errors <- left_join(counted_breaks_errors, grouped_breaks, by=c('time_to_break'))

In [None]:
counted_breaks_errors$hoursSinceStart <- counted_breaks_errors$time_to_break
# Define the combined exponential-sinusoidal function to fit
combined_model <- function(x, A, B, C, D, E, F, G) {
  A * exp(B * x) + C * cos(2*pi/24*D * x + E) + F * x + G
}

# Fit the model to the data
fit_combo <- nls(interactions_per_user ~ combined_model(hoursSinceStart, A, B, C, D, E, F, G),
        data=counted_breaks_errors,
        start = list(A = 6, B = .06, C = 1, D = 1, E = 0, F=0, G=1),
        control = nls.control(maxiter = 10000)
        )

# Print the summary of the fit
summary(fit_combo)
R2nls(fit_combo)


# # Define the combined exponential-sinusoidal function to fit
# exp_model <- function(x, A, B, C, D, E) {
#   A * exp(B * x + C) + D * x + E
# }

# # Fit the model to the data
# fit_exp <- nls(interactions_per_user ~ exp_model(hoursSinceStart, A, B, C, D, E),
#         data=counted_breaks_errors,
#         start = list(A = 6, B = .06, C=1, D = 0, E=1),
#         control = nls.control(maxiter = 10000)
#         )

# # Print the summary of the fit
# summary(fit_exp)

sinusoidal <- function(x, A, B, C) {
  A * cos(2*pi/24 * x + B) + C
}

# Fit the model to the data
fit_sin <- nls(interactions_per_user ~ sinusoidal(hoursSinceStart, A, B, C),
        data=counted_breaks_errors,
        start = list(A = 3, B = 0, C = 1),
        control = nls.control(maxiter = 5000)
        )

# Print the summary of the fit
# summary(fit_sin)
# R2nls(fit_sin)

# Fit the model to the data
fit_lin <- nls(interactions_per_user ~ A*hoursSinceStart + B,
        data=counted_breaks_errors,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
# summary(fit_lin)
# R2nls(fit_lin)


# # # Create a data frame for plotting
# plot_data <- data.frame(x = df_second_use$hoursSinceStart,
#                         y = df_second_use$interactions_per_user,
#                         ymin=df_second_use$interactions_per_user-df_second_use$std_error_interactions,
#                         ymax=df_second_use$interactions_per_user+df_second_use$std_error_interactions,
#                         Fitted_combo = predict(fit_exp, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart)),
#                         Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart)),
#                         Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart)),
#                         Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart))
#  )
# # Create a data frame for plotting
plot_data <- data.frame(x = counted_breaks_errors$hoursSinceStart,
                        y = counted_breaks_errors$interactions_per_user,
                        ymin=counted_breaks_errors$interactions_per_user-counted_breaks_errors$std_error_interactions,
                        ymax=counted_breaks_errors$interactions_per_user+counted_breaks_errors$std_error_interactions)

line_x <- seq(min(counted_breaks_errors$hoursSinceStart),max(counted_breaks_errors$hoursSinceStart),.25)
line_data <- data.frame(x = line_x,
                        ymin=line_x,
                        ymax=line_x,
                        Fitted_combo = predict(fit_combo, newdata = data.frame(hoursSinceStart = line_x)),
                        # Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = line_x))
 )





# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[2], alpha = 0.5)+
  geom_point(color = two_color[2], alpha=1) + # Data points
  # geom_line(aes(y = Fitted_lin), color = two_color[1], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_sin), color = two_color[2], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_exp), color = two_color[1], size=1) + # Fitted curve
  geom_line(data=line_data, aes(x=x, y = Fitted_combo), color = two_color[1], size=1) + # Fitted curve
  labs(x = "Time Before Break (Hours)", y = "App Open Attempts per User") +
  annotate("text", x=-45, y=6, label=expression(R^2 * "=.978"), parse=TRUE, vjust=1, hjust=0, size=7) +
  scale_x_continuous(limits=c(-break_cutoff_timeDiff_hours,1), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,7), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg

# ggsave('figures/break_fit.pdf', width=7, height=7)


In [None]:
j=anova(fit_lin, fit_sin, fit_combo)
j
BIC(fit_lin, fit_sin, fit_combo)

In [None]:
eta_sq <- j[3,'Sum Sq']/sum(j['Res.Sum Sq'])
eta_sq

In [None]:
grouped_breaks <- counted_breaks %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize(
    summed_normalized_count = sum(interactions_per_user)
  ) %>%
  mutate(
    percentage = summed_normalized_count / sum(summed_normalized_count) * 100,
    opens_per_user = sum(summed_normalized_count)
  )

# grouped_breaks_effectiveness <- grouped_breaks[grouped_breaks$continuedToApp == 'didNotOpenApp',]

grouped_breaks_effectiveness <- grouped_breaks_effectiveness[grouped_breaks_effectiveness$time_to_break <= 0, ]

# count how many breaks each user had
df_breaks_per_user <- df_break %>%
  group_by(userIndex) %>%
  summarize(breaks_per_user = n())

# count total interactions per user
counted_breaks_errors <- df_breaks %>%
  group_by(userIndex, time_to_break, continuedToApp) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_breaks_errors <- left_join(counted_breaks_errors, df_breaks_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_breaks_errors$interactions_per_user <- counted_breaks_errors$interactions / counted_breaks_errors$breaks_per_user

counted_breaks_errors <- counted_breaks_errors %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize( std_error_interactions = sd(interactions_per_user)/sqrt(length(interactions_per_user))
            )
counted_breaks_errors <- left_join(counted_breaks_errors, grouped_breaks, by=c('time_to_break','continuedToApp'))
counted_breaks_errors$squared_error_ratio = (counted_breaks_errors$std_error_interactions / counted_breaks_errors$opens_per_user)^2
counted_breaks_errors <- counted_breaks_errors %>% 
  group_by(time_to_break) %>%
  mutate(error_sum = sum(std_error_interactions),
        interaction_sum = sum(opens_per_user)) %>%
  ungroup()

counted_breaks_errors$std_error_percentage <- counted_breaks_errors$percentage * sqrt(counted_breaks_errors$squared_error_ratio + (counted_breaks_errors$error_sum ^2) / (counted_breaks_errors$interaction_sum ^2))


In [None]:
df_fit <- counted_breaks_errors[counted_breaks_errors$continuedToApp == 'didNotOpenApp',]
df_fit$hoursSinceStart <- df_fit$time_to_break

# combined_model <- function(x, A, B, C, D, E, F, G) {
#   A * exp(B * x) + C * cos(2*pi/24*D * x + E) + F * x + G
# }

# # Fit the model to the data
# fit_combo <- nls(percentage ~ combined_model(hoursSinceStart, A, B, C, D, E, F, G),
#         data=df_fit,
#         start = list(A = 38, B = .02, C = 1, D = 1, E = 0, F=0, G=1),
#         control = nls.control(maxiter = 10000)
#         )

# # Print the summary of the fit
# summary(fit_combo)
# # R2nls(fit_combo)


# # # Define the combined exponential-sinusoidal function to fit
# exp_model <- function(x, A, B, C, D, E) {
#   A * exp(B * x + C) + D * x + E
# }

# # Fit the model to the data
# fit_exp <- nls(percentage ~ exp_model(hoursSinceStart, A, B, C, D, E),
#         data=df_fit,
#         start = list(A = 38, B = .002, C=1, D = 0, E=1),
#         control = nls.control(maxiter = 10000)
#         )

# # Print the summary of the fit
# summary(fit_exp)

sinusoidal <- function(x, A, B, C) {
  A * cos(2*pi/24 * x + B) + C
}

# Fit the model to the data
fit_sin <- nls(percentage ~ sinusoidal(hoursSinceStart, A, B, C),
        data=df_fit,
        start = list(A = 5, B = 0, C = 40),
        control = nls.control(maxiter = 5000)
        )

# Print the summary of the fit
# summary(fit_sin)
# R2nls(fit_sin)

# Fit the model to the data
fit_lin <- nls(percentage ~ A*hoursSinceStart + B,
        data=df_fit,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
# summary(fit_lin)
# R2nls(fit_lin)



# # Create a data frame for plotting
plot_data <- data.frame(x = df_fit$hoursSinceStart,
                        y = df_fit$percentage,
                        ymin=df_fit$percentage-df_fit$std_error_percentage,
                        ymax=df_fit$percentage+df_fit$std_error_percentage,
                        # Fitted_combo = predict(fit_combo, newdata = data.frame(hoursSinceStart = line_x)),
                        # Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = df_fit$hoursSinceStart)),
                        Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = df_fit$hoursSinceStart))
)
 




# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[1], alpha = 0.5)+
  geom_point(color = two_color[1], alpha=1) + # Data points
  # geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_line(aes(y = Fitted_lin), color = two_color[2], size=1, linetype="dashed") + # Fitted curve
  # geom_line(data=line_data, aes(x=x,y = Fitted_sin), color = two_color[2], size=1) + # Fitted curve
  # geom_line(data=line_data, aes(x=x, y = Fitted_exp), color = two_color[2], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_combo), color = two_color[1], size=1) + # Fitted curve
  annotate("text", x=-45, y=100, label=expression(R^2 * "=.00647"), parse=TRUE, vjust=1, hjust=0, size=7) +
  labs(x = "Time Before Break (Hours)", y = "Dismissed Attempts (%)") +
  scale_x_continuous(limits=c(-break_cutoff_timeDiff_hours,1), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,100), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg  

# ggsave('figures/break_percentage_fit.pdf', width=7, height=7)


In [None]:
anova(fit_sin, fit_lin )
BIC(fit_lin, fit_sin)

In [None]:
lm_model <- lm(percentage ~ hoursSinceStart, data=df_fit)
summary(lm_model)

In [None]:
glmer_model <- lmer(percentage ~ hoursSinceStart + (1|summed_normalized_count), data=df_fit)
summary(glmer_model)

In [None]:
print(report(glmer_model))

In [None]:
# count how many breaks each user had
df_breaks_per_user <- df_break %>%
  group_by(userIndex) %>%
  summarize(breaks_per_user = n())

# count total interactions per user
counted_breaks_errors <- df_breaks %>%
  group_by(userIndex, time_to_break, continuedToApp) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_breaks_errors <- left_join(counted_breaks_errors, df_breaks_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_breaks_errors$interactions_per_user <- counted_breaks_errors$interactions / counted_breaks_errors$breaks_per_user

counted_breaks_errors <- counted_breaks_errors %>%
  group_by(time_to_break, continuedToApp) %>%
  summarize(mean_interactions = mean(interactions_per_user),
            std_error_interactions = sd(interactions_per_user)/sqrt(length(interactions_per_user))
            )

In [None]:
df_returns<-read_csv("./data_returns.csv")
colnames(df_returns)<-make.names(colnames(df_returns))
# head(df_returns, 5)

In [None]:
df_returns <- df_returns[df_returns$hoursSinceStart <= break_cutoff_timeDiff_hours,]
number_users_returns <- length(unique(df_returns$userIndex))

counted_returns <- df_returns %>%
  group_by(time_after_return, continuedToApp) %>%
  summarize(interactions_per_user = n()/number_users_returns)

counted_returns$continuedToApp <- factor(counted_returns$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

post_return_plot <- ggplot(counted_returns, aes(x = time_after_return, y = interactions_per_user, fill = continuedToApp)) +
  geom_bar(stat = "identity") +
  labs(x = "Hours After Return from Break", y = "App Open Attempts per User", fill = "User Action") +
  scale_x_continuous(limits=c(NA,break_cutoff_timeDiff_hours), expand=c(0,0), breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,10), expand=c(0,0), breaks=pretty_breaks()) +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  scale_color_manual(values = two_color) + 
  theme_classic() +
  theme(text = element_text(size=30), plot.margin = margin(t=10, r=10, b=0, l=0, unit='pt'))

# post_return_plot <- post_return_plot + theme(legend.position = 'none')
post_return_plot

In [None]:
grouped_returns <- counted_returns %>%
  group_by(time_after_return, continuedToApp) %>%
  summarize(
    summed_normalized_count = sum(interactions_per_user)
  ) %>%
  mutate(
    percentage = summed_normalized_count / sum(summed_normalized_count) * 100,
    opens_per_user = sum(summed_normalized_count)
  )

grouped_returns_effectiveness <- grouped_returns[grouped_returns$continuedToApp == 'didNotOpenApp',]

grouped_returns_effectiveness <- grouped_returns_effectiveness[grouped_returns_effectiveness$time_after_return >= 0, ]

# Create a dual-axis plot
return_line <- ggplot(grouped_returns_effectiveness, aes(x = time_after_return)) +
  geom_line(aes(y = opens_per_user, color = "A"), size = 1) +
  geom_line(aes(y = percentage/secondary_scale, color = "B"), size = 1) +
  scale_color_manual(values = c(two_color[2], two_color[1]), labels=c("App Open Attempts per User","Dismissed Attempts (%)")) +
  labs(x = "Hours After Return from Break", y = "App Open Attempts per User", color = "") +
  theme_classic() +
  theme(text = element_text(size=30),  legend.position = "none", plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt')) +
  scale_y_continuous(limits=c(0,10), expand=c(0,0), breaks=pretty_breaks(),
    sec.axis = sec_axis(~.*secondary_scale, name = "Dismissed Attempts (%)", labels = percent_format(scale = 1))) +
  scale_x_continuous(limits=c(NA,break_cutoff_timeDiff_hours), expand=c(0,0), breaks=pretty_breaks())

return_line

# ggsave('figures/attempts_and_effectiveness.pdf', width=20, height=7)

In [None]:
grouped_returns <- counted_returns %>%
  group_by(time_after_return) %>%
  summarize(
    interactions_per_user = sum(interactions_per_user)
  ) 


# count how many breaks each user had
df_returns_per_user <- df_return %>%
  group_by(userIndex) %>%
  summarize(returns_per_user = n())

# count total interactions per user
counted_returns_errors <- df_returns %>%
  group_by(userIndex, time_after_return) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_returns_errors <- left_join(counted_returns_errors, df_returns_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_returns_errors$interactions_per_user <- counted_returns_errors$interactions / counted_returns_errors$returns_per_user

counted_returns_errors <- counted_returns_errors %>%
  group_by(time_after_return) %>%
  summarize( std_error_interactions = sd(interactions_per_user)/sqrt(length(interactions_per_user))
            )
counted_returns_errors <- left_join(counted_returns_errors, grouped_returns, by=c('time_after_return'))

In [None]:
counted_returns_errors$hoursSinceStart <- counted_returns_errors$time_after_return
# Define the combined exponential-sinusoidal function to fit
combined_model <- function(x, A, B, C, D, E, F, G) {
  A * exp(-B * x) + C * cos(2*pi/24*D * x + E) + F * x + G
}

# Fit the model to the data
fit_combo <- nls(interactions_per_user ~ combined_model(hoursSinceStart, A, B, C, D, E, F, G),
        data=counted_returns_errors,
        start = list(A = 1, B = 1, C = 1, D = 1, E = 0, F=0, G=1),
        control = nls.control(maxiter = 10000)
        )

# Print the summary of the fit
# summary(fit_combo)
R2nls(fit_combo)


# Define the combined exponential-sinusoidal function to fit
# exp_model <- function(x, A, B, C, D, E) {
#   A * exp(-B * x - C) + D * x + E
# }

# # Fit the model to the data
# fit_exp <- nls(interactions_per_user ~ exp_model(hoursSinceStart, A, B, C, D, E),
#         data=counted_returns_errors,
#         start = list(A = 1.1, B = 20, C=48, D = 0, E=1.1),
#         control = nls.control(maxiter = 10000)
#         )

# # Print the summary of the fit
# summary(fit_exp)

sinusoidal <- function(x, A, B, C) {
  A * cos(2*pi/24 * x + B) + C
}

# Fit the model to the data
fit_sin <- nls(interactions_per_user ~ sinusoidal(hoursSinceStart, A, B, C),
        data=counted_returns_errors,
        start = list(A = 3, B = 0, C = 1),
        control = nls.control(maxiter = 5000)
        )

# Print the summary of the fit
# summary(fit_sin)
# R2nls(fit_sin)

# Fit the model to the data
fit_lin <- nls(interactions_per_user ~ A*hoursSinceStart + B,
        data=counted_returns_errors,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )

fit_exp <- nls(interactions_per_user ~ A*hoursSinceStart + B,
        data=counted_returns_errors,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )


# Print the summary of the fit
# summary(fit_lin)
# R2nls(fit_lin)


# # # Create a data frame for plotting
# plot_data <- data.frame(x = df_second_use$hoursSinceStart,
#                         y = df_second_use$interactions_per_user,
#                         ymin=df_second_use$interactions_per_user-df_second_use$std_error_interactions,
#                         ymax=df_second_use$interactions_per_user+df_second_use$std_error_interactions,
#                         Fitted_combo = predict(fit_exp, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart)),
#                         Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart)),
#                         Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart)),
#                         Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = df_second_use$hoursSinceStart))
#  )
# # Create a data frame for plotting
plot_data <- data.frame(x = counted_returns_errors$hoursSinceStart,
                        y = counted_returns_errors$interactions_per_user,
                        ymin=counted_returns_errors$interactions_per_user-counted_returns_errors$std_error_interactions,
                        ymax=counted_returns_errors$interactions_per_user+counted_returns_errors$std_error_interactions)

line_x <- seq(min(counted_returns_errors$hoursSinceStart),max(counted_returns_errors$hoursSinceStart),.25)
line_data <- data.frame(x = line_x,
                        ymin=line_x,
                        ymax=line_x,
                        Fitted_combo = predict(fit_combo, newdata = data.frame(hoursSinceStart = line_x)),
                        # Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = line_x))
 )

# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[2], alpha = 0.5)+
  geom_point(color = two_color[2], alpha=1) + # Data points
  # geom_line(aes(y = Fitted_lin), color = two_color[1], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_sin), color = two_color[2], size=1) + # Fitted curve
  # geom_line(aes(y = Fitted_exp), color = two_color[1], size=1) + # Fitted curve
  geom_line(data=line_data, aes(x=x, y = Fitted_combo), color = two_color[1], size=1) + # Fitted curve
  labs(x = "Time After Return From Break (Hours)", y = "App Open Attempts per User") +
  annotate("text", x=3, y=6, label=expression(R^2 * "=.912"), parse=TRUE, vjust=1, hjust=0, size=7) +
  scale_x_continuous(limits=c(0,break_cutoff_timeDiff_hours), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,7), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg

# ggsave('figures/return_fit.pdf', width=7, height=7)


In [None]:
j=anova(fit_lin, fit_sin, fit_combo)
j
AIC(fit_lin, fit_sin,  fit_combo)

In [None]:
eta_sq <- j[3,'Sum Sq']/sum(j['Res.Sum Sq'])
eta_sq

In [None]:
grouped_returns <- counted_returns %>%
  group_by(time_after_return, continuedToApp) %>%
  summarize(
    summed_normalized_count = sum(interactions_per_user)
  ) %>%
  mutate(
    percentage = summed_normalized_count / sum(summed_normalized_count) * 100,
    opens_per_user = sum(summed_normalized_count)
  )

# count how many breaks each user had
df_returns_per_user <- df_return %>%
  group_by(userIndex) %>%
  summarize(returns_per_user = n())

# count total interactions per user
counted_returns_errors <- df_returns %>%
  group_by(userIndex, time_after_return, continuedToApp) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_returns_errors <- left_join(counted_returns_errors, df_returns_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_returns_errors$interactions_per_user <- counted_returns_errors$interactions / counted_returns_errors$returns_per_user

counted_returns_errors <- counted_returns_errors %>%
  group_by(time_after_return, continuedToApp) %>%
  summarize( std_error_interactions = sd(interactions_per_user)/sqrt(length(interactions_per_user))
            )
counted_returns_errors <- left_join(counted_returns_errors, grouped_returns, by=c('time_after_return','continuedToApp'))
counted_returns_errors$squared_error_ratio = (counted_returns_errors$std_error_interactions / counted_returns_errors$opens_per_user)^2
counted_returns_errors <- counted_returns_errors %>% 
  group_by(time_after_return) %>%
  mutate(error_sum = sum(std_error_interactions),
        interaction_sum = sum(opens_per_user)) %>%
  ungroup()

counted_returns_errors$std_error_percentage <- counted_returns_errors$percentage * sqrt(counted_returns_errors$squared_error_ratio + (counted_returns_errors$error_sum ^2) / (counted_returns_errors$interaction_sum ^2))


In [None]:
df_fit <- counted_returns_errors[counted_returns_errors$continuedToApp == 'didNotOpenApp',]
df_fit$hoursSinceStart <- df_fit$time_after_return

## Define the combined exponential-sinusoidal function to fit
combined_model <- function(x, A, B, C, D, E, F, G) {
  A * exp(-B * x) + C * cos(2*pi/24*D * x + E) + F * x + G
}

# Fit the model to the data
fit_combo <- nls(percentage ~ combined_model(hoursSinceStart, A, B, C, D, E, F, G),
        data=df_fit,
        start = list(A = 40, B = 0.9, C = 0.75, D = 1, E = 0, F=0, G=1),
        control = nls.control(maxiter = 10000)
        )

# Print the summary of the fit
# summary(fit_combo)
# R2nls(fit_combo)

# Define the combined exponential-sinusoidal function to fit
exp_model <- function(x, A, B, C, D) {
  A * exp(-B * x) + C * x + D
}

# Fit the model to the data
fit_exp <- nls(percentage ~ exp_model(hoursSinceStart, A, B, C, D),
        data=df_fit,
        start = list(A = 17, B = 0.1, C = 0, D=48),
        control = nls.control(maxiter = 1000)
        )

# summary(fit_exp)
R2nls(fit_exp)

sinusoidal <- function(x, A, B, C) {
  A * cos(2*pi/24 * x + B) + C
}

# Fit the model to the data
fit_sin <- nls(percentage ~ sinusoidal(hoursSinceStart, A, B, C),
        data=df_fit,
        start = list(A = 3, B = 0, C = 1),
        control = nls.control(maxiter = 5000)
        )

# Print the summary of the fit
# summary(fit_sin)
# R2nls(fit_sin)

# Fit the model to the data
fit_lin <- nls(percentage ~ A*hoursSinceStart + B,
        data=df_fit,
        start = list(A = -1, B = 16),
        control = nls.control(maxiter = 1000)
        )
# R2nls(fit_lin)


# # Create a data frame for plotting
plot_data <- data.frame(x = df_fit$hoursSinceStart,
                        y = df_fit$percentage,
                        ymin=df_fit$percentage-df_fit$std_error_percentage,
                        ymax=df_fit$percentage+df_fit$std_error_percentage)
line_x <- seq(min(df_fit$hoursSinceStart),max(df_fit$hoursSinceStart),.5)
line_data <- data.frame(x = line_x,
                        ymin=line_x,
                        ymax=line_x,
                        Fitted_combo = predict(fit_combo, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_exp = predict(fit_exp, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_sin = predict(fit_sin, newdata = data.frame(hoursSinceStart = line_x)),
                        Fitted_lin = predict(fit_lin, newdata = data.frame(hoursSinceStart = line_x))
)
 




# Create a ggplot
gg <- ggplot(plot_data, aes(x = x, y = y, ymin=ymin, ymax=ymax)) +
  geom_errorbar(width=0.2, color=two_color[1], alpha = 0.5)+
  geom_point(color = two_color[1], alpha=1) + # Data points
  # geom_line(data=line_data, aes(x=x,y = Fitted_lin), color = two_color[1], size=1) + # Fitted curve
  # geom_line(data=line_data, aes(x=x,y = Fitted_sin), color = two_color[2], size=1) + # Fitted curve
  geom_line(data=line_data, aes(x=x, y = Fitted_exp), color = two_color[2], size=1) + # Fitted curve
  # geom_line(data=line_data, aes(x=x, y = Fitted_combo), color = two_color[2], size=1) + # Fitted curve
  annotate("text", x=3, y=100, label=expression(R^2 * "=.370"), parse=TRUE, vjust=1, hjust=0, size=7) +
  labs(x = "Time After Return From Break (Hours)", y = "Dismissed Attempts (%)") +
  scale_x_continuous(limits=c(0,break_cutoff_timeDiff_hours), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,100), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=12, b=0, l=0, unit='pt'))

# Print the ggplot
gg  

# ggsave('figures/return_percentage_fit.pdf', width=7, height=7)


In [None]:
j=anova(fit_lin, fit_sin, fit_exp, fit_combo)
j
BIC(fit_lin, fit_sin, fit_exp, fit_combo)

In [None]:
eta_sq <- j[3,'Sum Sq']/sum(j['Res.Sum Sq'])
eta_sq

In [None]:
# grid_plot <- grid.arrange(pre_break_plot, post_return_plot, pre_dropout_plot, ncol = 3)
# final_plot <- arrangeGrob(grid_plot, bottom = legend_table)
# grid.draw(final_plot)
grid_plot <- grid.arrange(first_use_plot + theme(legend.position = "none"),
                            pre_break_plot + theme(legend.position = "none", axis.title.y = element_blank()),
                            post_return_plot + theme(legend.position = "none", axis.title.y = element_blank()), ncol = 3)

temp <- pre_break_plot + theme(legend.position = "bottom", legend.direction = "horizontal",text = element_text(size=25))
# Create a custom legend
legend_grid <- get_legend(temp)

# Add the legend to the combined plot
# final_plot <- arranged_plots + theme(legend.position = "none")
final_plot <- arrangeGrob(grid_plot, legend_grid, nrow=2, heights = c(0.9, 0.1))

final_plot

# ggsave('figures/grid_break_return_dropout.pdf', plot=final_plot, width=25, height=8, dpi=300)

In [None]:

# grid_plot <- grid.arrange(pre_break_plot, post_return_plot, pre_dropout_plot, ncol = 3)
# final_plot <- arrangeGrob(grid_plot, bottom = legend_table)
# grid.draw(final_plot)
grid_plot <- grid.arrange(firsts_line + theme(legend.position = "none", axis.title.y.right = element_blank()),
                            break_line + theme(legend.position = "none", axis.title.y = element_blank()),
                            return_line + theme(legend.position = "none", axis.title.y = element_blank()), ncol = 3)

temp <- break_line + theme(legend.position = "bottom", legend.direction = "horizontal", text = element_text(size=25))
# Create a custom legend
legend_grid <- get_legend(temp)

# Add the legend to the combined plot
# final_plot <- arranged_plots + theme(legend.position = "none")
final_plot <- arrangeGrob(grid_plot, legend_grid, nrow=2, heights = c(0.9, 0.1))

final_plot

# ggsave('figures/grid_line_break_return_dropout.pdf', plot=final_plot, width=25, height=8, dpi=300)

In [None]:
df_start <- df_noClose[df_noClose$hoursSinceStart <= break_cutoff_timeDiff_hours,]
counted_firsts_box <- df_start %>%
  group_by(userIndex, continuedToApp) %>%
  summarize(interactions_per_user = n())

# count how many breaks each user had
df_breaks_per_user <- df_break %>%
  group_by(userIndex) %>%
  summarize(breaks_per_user = n())

# count total interactions per user
counted_breaks_box <- df_breaks %>%
  group_by(userIndex, continuedToApp) %>%
  summarize(interactions = n())

# join the total interactions and number of breaks
counted_breaks_box <- left_join(counted_breaks_box, df_breaks_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_breaks_box$interactions_per_user <- counted_breaks_box$interactions / counted_breaks_box$breaks_per_user


# count how many returns each user had
df_returns_per_user <- df_return %>%
  group_by(userIndex) %>%
  summarize(returns_per_user = n())

counted_returns_box <- df_returns %>%
  group_by(userIndex, continuedToApp) %>%
  summarize(interactions = n()) 

# join the total interactions and number of returns
counted_returns_box <- left_join(counted_returns_box, df_returns_per_user, by='userIndex')
# create a new column where the total interactions is normalized by the number of breaks, so that it represents their use per break
counted_returns_box$interactions_per_user <- counted_returns_box$interactions / counted_returns_box$returns_per_user

counted_firsts_box$identifier <- as.factor(1) #"First"
counted_breaks_box$identifier <- as.factor(2) #"Break"
counted_returns_box$identifier <- as.factor(3) #"Return"


counted_firsts_box$identifier_text <- as.factor('First') #"First"
counted_breaks_box$identifier_text <- as.factor('Break') #"Break"
counted_returns_box$identifier_text <- as.factor('Return') #"Return"

counted_combined <- rbind(counted_firsts_box[c('userIndex','continuedToApp', 'interactions_per_user', 'identifier', 'identifier_text')], counted_breaks_box[c('userIndex','continuedToApp', 'interactions_per_user', 'identifier', 'identifier_text')], counted_returns_box[c('userIndex','continuedToApp', 'interactions_per_user', 'identifier', 'identifier_text')])


In [None]:


counted_combined$continuedToApp <- factor(counted_combined$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

combined_box <- ggplot(data=counted_combined, aes(x=identifier_text, y = interactions_per_user, fill = continuedToApp)) +
  geom_boxplot() +
  labs(y = "App Open Attempts per User", fill = "User Action") +
  # scale_x_discrete(labels=c("After First Use","Before Break", "After Return"), ) +
  # scale_y_continuous(limits=c(0,40), expand=c(0,0), breaks=pretty_breaks()) +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  scale_color_manual(values = two_color) + 
  theme_classic() +
  theme(text = element_text(size=25), legend.position = 'bottom', legend.direction='horizontal',axis.title.x=element_blank(), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'))

combined_box

# ggsave('figures/user_action_box.pdf', width=20, height=7)

In [None]:


counted_combined$continuedToApp <- factor(counted_combined$continuedToApp, levels=c('openedApp', 'didNotOpenApp'))

combined_box <- ggplot(data=counted_combined, aes(x=identifier_text, y = interactions_per_user)) +
  geom_boxplot(fill=plot_colors[1:3]) +
  labs(y = "App Open Attempts per User") +
  # scale_x_discrete(labels=c("After First Use","Before Break", "After Return"), ) +
  scale_y_continuous(limits=c(0,40), expand=c(0,0), breaks=pretty_breaks()) +
#   scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  # scale_color_manual(values = two_color) + 
  theme_classic() +
  theme(text = element_text(size=25), legend.position = 'bottom', legend.direction='horizontal',axis.title.x=element_blank(), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'))

combined_box

# ggsave('figures/user_action_box.pdf', width=20, height=7)

In [None]:
glmer_model <- lmer(interactions_per_user ~ identifier_text + (1 | userIndex), data=counted_combined)
summary(glmer_model)

In [None]:
print(report(glmer_model))

In [None]:
counted_combined_box <- counted_combined
outliers_combined <- tukey_detect(dv=counted_combined_box$interactions_per_user)
counted_combined_box[outliers_combined ==1,]$interactions_per_user <- as.integer(mean(counted_combined_box$interactions_per_user))

In [None]:


combined_box <- ggplot(data=counted_combined_box, aes(x=identifier_text, y = interactions_per_user, fill = continuedToApp)) +
  geom_boxplot() +
  labs(y = "App Open Attempts per User", fill = "User Action") +
  scale_x_discrete(labels=c("First Use","Pre-Break", "Post-Return"), ) +
  # scale_y_continuous(limits=c(0,1), expand=c(0,0), breaks=pretty_breaks()) +
  scale_fill_discrete(labels=c('Continued to App', 'Did not Continue')) +
  scale_color_manual(values = two_color) + 
  theme_classic() +
  theme(text = element_text(size=25), legend.position = 'bottom', legend.direction='horizontal',axis.title.x=element_blank(), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'))

combined_box

# ggsave('figures/user_action_box.pdf', width=20, height=7)

In [None]:
glmer_model <- lmer(interactions_per_user ~ identifier_text*continuedToApp + (1 | userIndex), data=counted_combined_box)
summary(glmer_model)

In [None]:
print(report(glmer_model))

# Statistical Testing for Pre-Post Break

In [None]:
counted_breaks$identifier <- 'break'
counted_returns$identifier <- 'return'
temp <- rbind(counted_breaks, counted_returns)

m <- aov(interactions_per_user ~ identifier * continuedToApp, data = temp)
summary(m) # sig
TukeyHSD(m) # sig Break-return and dropout-return

In [None]:
temp$identifier <- as.factor(temp$identifier)
m = art(interactions_per_user ~ identifier*continuedToApp, data = temp)
anova(m) # sig
art.con(m, ~identifier*continuedToApp) # sig Break-return and dropout-return

## Dickey Fuller Test for Stationary Time Series

In [None]:
grouped_breaks_effectiveness$condition <- "Break"
grouped_breaks_effectiveness$time <- grouped_breaks_effectiveness$time_to_break
grouped_breaks_effectiveness <- grouped_breaks_effectiveness[!is.na(grouped_breaks_effectiveness$opens_per_user),]
grouped_returns_effectiveness$condition <- "Return"
grouped_returns_effectiveness$time <- grouped_returns_effectiveness$time_after_return
grouped_returns_effectiveness <- grouped_returns_effectiveness[!is.na(grouped_returns_effectiveness$opens_per_user),]

In [None]:
print(adf.test(df_first_use$interactions_per_user))
print(adf.test(grouped_breaks_effectiveness$opens_per_user))
print(adf.test(grouped_returns_effectiveness$opens_per_user))

In [None]:
print(adf.test(df_first_use_result[df_first_use_result$continuedToApp == 'didNotOpenApp',]$percentage))
print(adf.test(grouped_breaks_effectiveness$percentage))
print(adf.test(grouped_returns_effectiveness$percentage))

## Mann-Kendall Trend Test

In [None]:
print(MannKendall(df_first_use$interactions_per_user))
print(MannKendall(grouped_breaks_effectiveness$opens_per_user))
print(MannKendall(grouped_returns_effectiveness$opens_per_user))

In [None]:
print(MannKendall(df_first_use_result[df_first_use_result$continuedToApp == 'didNotOpenApp',]$percentage))
print(MannKendall(grouped_breaks_effectiveness$percentage))
print(MannKendall(grouped_returns_effectiveness$percentage))

## Pearson Correlation

In [None]:
pearsonTest(df_first_use$hoursSinceStart, df_first_use$interactions_per_user)
pearsonTest(grouped_breaks_effectiveness$time, grouped_breaks_effectiveness$opens_per_user)
pearsonTest(grouped_returns_effectiveness$time, grouped_returns_effectiveness$opens_per_user)

# Days of the Weeks

## General usage

In [None]:
# Create a histogram
weekday_histogram <- df_noClose %>%
  group_by(day_of_week) %>%
  summarize(count = n())

# Order days of the week properly
days_ordered <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday","Sunday")
weekday_histogram$day_of_week <- factor(weekday_histogram$day_of_week, levels = days_ordered)


In [None]:
df_combo <- df_noClose[df_noClose$lifeStatus %in% c("Break","Return"), ]

df_combo$lifeStatus <- factor(df_combo$lifeStatus)


weekday_histogram_combo <- df_combo %>%
  group_by(day_of_week, lifeStatus) %>%
  summarize(count = n())

weekday_histogram_combo$day_of_week <- factor(weekday_histogram_combo$day_of_week, levels = days_ordered)

In [None]:
df_alive <- df_noClose[df_noClose$lifeStatus %in% c("Alive"), ]

df_alive$lifeStatus <- "Normal Use"
df_alive$lifeStatus <- factor(df_alive$lifeStatus)


weekday_histogram_combo <- df_alive %>%
  group_by(day_of_week, lifeStatus) %>%
  summarize(count_per_user = n()/length(unique(df_alive$userIndex)))

weekday_histogram_combo$day_of_week <- factor(weekday_histogram_combo$day_of_week, levels = days_ordered)

options(repr.plot.width=18, repr.plot.height=8)

# Create the histogram plot
ggplot(weekday_histogram_combo, aes(x = day_of_week)) +
  geom_bar(stat = "identity", position="dodge", aes(y = count_per_user, fill=lifeStatus)) +
  labs(x = "Day of the Week", y = "Count per User", fill="Status") +
  scale_color_manual(values = two_color) + 
  scale_y_continuous(expand=c(0,0))+
  theme_classic() +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal")


# ggsave('figures/day_of_week_alive.pdf', width=12, height=7)

In [None]:
df_day_break <- df_noClose[df_noClose$lifeStatus %in% c("Alive"), ]

df_alive$lifeStatus <- "Normal Use"
df_alive$lifeStatus <- factor(df_alive$lifeStatus)

weekday_histogram_combo <- df_alive %>%
  group_by(userIndex, day_of_week, lifeStatus) %>%
  summarize(count_per_user = n())

df_max_weeks <- df %>%
  group_by(userIndex) %>%
  summarise(max_weeks = max(weeksSinceStart)) 

df_weekday <- left_join(weekday_histogram_combo, df_max_weeks, by='userIndex')
df_weekday$normalized_count <- df_weekday$count_per_user / df_weekday$max_weeks



df_weekday$day_of_week <- factor(df_weekday$day_of_week, levels = days_ordered)

df_weekday_outlier <- df_weekday
outliers_days <- tukey_detect(dv=df_weekday_outlier$normalized_count)
df_weekday_outlier[outliers_days == 1,]$normalized_count <- mean(df_weekday_outlier[df_weekday_outlier$normalized_count != 'Inf',]$normalized_count)


options(repr.plot.width=18, repr.plot.height=8)

# Create the histogram plot
ggplot(df_weekday_outlier, aes(x = day_of_week, y = normalized_count, fill=lifeStatus)) +
  geom_boxplot() +
  stat_summary(fun = mean, geom = "point", shape = 4, size = 3, color = "black") +
  labs(x = "Day of the Week", y = "Count per User", fill="Status") +
  scale_color_manual(values = two_color) + 
  scale_y_continuous(limits=c(0,50))+
  theme_classic() +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal")


# ggsave('figures/day_of_week_box.pdf', width=20, height=7)

In [None]:
df_day_break <- df_noClose[df_noClose$lifeStatus %in% c("Break"), ]

df_day_break$lifeStatus <- factor(df_day_break$lifeStatus)

df_day_break_histogram_combo <- df_day_break %>%
  group_by(userIndex, day_of_week, lifeStatus) %>%
  summarize(count_per_user = n()) %>%
  group_by(day_of_week, lifeStatus) %>%
  summarize(mean_interactions = mean(count_per_user),
            std_error_interactions = sd(count_per_user)/sqrt(length(count_per_user))
            )

df_day_break_histogram_combo$day_of_week <- factor(df_day_break_histogram_combo$day_of_week, levels = days_ordered)




df_day_return <- df_noClose[df_noClose$lifeStatus %in% c("Return"), ]

df_day_return$lifeStatus <- factor(df_day_return$lifeStatus)

df_day_return_histogram_combo <- df_day_return %>%
  group_by(userIndex, day_of_week, lifeStatus) %>%
  summarize(count_per_user = n()) %>%
  group_by(day_of_week, lifeStatus) %>%
  summarize(mean_interactions = mean(count_per_user),
            std_error_interactions = sd(count_per_user)/sqrt(length(count_per_user))
            )

df_day_return_histogram_combo$day_of_week <- factor(df_day_return_histogram_combo$day_of_week, levels = days_ordered)

df_day_break_return_histogram_combo <- rbind(df_day_break_histogram_combo, df_day_return_histogram_combo)

options(repr.plot.width=18, repr.plot.height=8)

# Create the histogram plot
ggplot(df_day_break_return_histogram_combo, aes(x = day_of_week,y = mean_interactions, fill=lifeStatus, ymin=mean_interactions-std_error_interactions, ymax=mean_interactions+std_error_interactions)) +
  geom_bar( stat = "identity", position="dodge", color='black') +
  geom_errorbar(width=0.2, position=position_dodge(.9), color='black')+
  labs(x = "Day of the Week", y = "Normapized Count per User", fill="Status") +
  scale_color_manual(values = two_color) + 
  scale_y_continuous(expand=c(0,0))+
  theme_classic() +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal")


# ggsave('figures/day_of_week.pdf', width=20, height=7)

In [None]:
df_day_break <- df_noClose[df_noClose$lifeStatus %in% c("Break"), ]

df_day_break$lifeStatus <- factor(df_day_break$lifeStatus)

df_day_break_stats_combo <- df_day_break %>%
  group_by(userIndex, day_of_week, lifeStatus) %>%
  summarize(count_per_user = n())

# days_ordered <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
days_ordered <- c("Wednesday", "Thursday", "Friday", "Saturday","Sunday", "Monday", "Tuesday")
df_day_break_stats_combo$day_of_week <- factor(df_day_break_stats_combo$day_of_week, levels = days_ordered)


df_day_return <- df_noClose[df_noClose$lifeStatus %in% c("Return"), ]

df_day_return$lifeStatus <- factor(df_day_return$lifeStatus)

df_day_return_stats_combo <- df_day_return %>%
  group_by(userIndex, day_of_week, lifeStatus) %>%
  summarize(count_per_user = n())

df_day_return_stats_combo$day_of_week <- factor(df_day_return_stats_combo$day_of_week, levels = days_ordered)

df_day_break_return_stats_combo <- rbind(df_day_break_stats_combo, df_day_return_stats_combo)

In [None]:
head(df_day_break_return_stats_combo)

In [None]:
glmer_model <- lmer(count_per_user ~ day_of_week + (1 | userIndex), data = df_day_break_stats_combo)
summary(glmer_model)

In [None]:
print(report(glmer_model))

In [None]:
glmer_model <- lmer(count_per_user ~ day_of_week + (1 | userIndex), data=df_day_return_stats_combo)
summary(glmer_model)

In [None]:
print(report(glmer_model))

In [None]:
df_day_break <- df_noClose[df_noClose$lifeStatus %in% c("Break"), ]

df_day_break$lifeStatus <- factor(df_day_break$lifeStatus)


df_day_break_histogram_combo <- df_day_break %>%
  group_by(userIndex,day_of_week, lifeStatus) %>%
  summarize(count_per_user = n())

df_day_break_histogram_combo$day_of_week <- factor(df_day_break_histogram_combo$day_of_week, levels = days_ordered)


df_day_return <- df_noClose[df_noClose$lifeStatus %in% c("Return"), ]

df_day_return$lifeStatus <- factor(df_day_return$lifeStatus)


df_day_return_histogram_combo <- df_day_return %>%
  group_by(userIndex, day_of_week, lifeStatus) %>%
  summarize(count_per_user = n())

df_day_return_histogram_combo$day_of_week <- factor(df_day_return_histogram_combo$day_of_week, levels = days_ordered)

df_day_break_return_histogram_combo <- rbind(df_day_break_histogram_combo, df_day_return_histogram_combo)

options(repr.plot.width=18, repr.plot.height=8)

# Create the histogram plot
ggplot(df_day_break_return_histogram_combo, aes(x = day_of_week)) +
  geom_boxplot( aes(y = count_per_user, fill=lifeStatus)) +
  labs(x = "Day of the Week", y = "Count per User", fill="Status") +
  scale_color_manual(values = two_color) + 
  scale_y_continuous(expand=c(0,0), limits=c(0, 5))+
  theme_classic() +
  theme(text = element_text(size=25), legend.position = "bottom", legend.box="horizontal")


# ggsave('figures/day_of_week_box.pdf', width=12, height=7)

In [None]:
m = art(count_per_user ~ day_of_week*lifeStatus, data = df_day_break_return_histogram_combo)
anova(m) # sig
# art.con(m, ~day_of_week*lifeStatus) # sig Break-return and dropout-return

# Apps

In [None]:
options(repr.plot.width=10, repr.plot.height=7)

df_app_categories[df_app_categories$app_mapped == 'News & Entertainment',]$app_mapped <- 'News &\nEntertainment'

df_app_categories$app_mapped <- factor(df_app_categories$app_mapped, levels=df_app_categories$app_mapped)

# df_day_return_histogram_combo$day_of_week <- factor(df_day_return_histogram_combo$day_of_week, levels = days_ordered)

# Plot the histogram
app_category_plot <- ggplot(df_app_categories, aes(x = app_mapped, y = percentage)) +
  geom_bar(stat = "identity", fill = two_color[1], color = 'black', alpha = 0.5) +
  geom_text(aes(label = round(percentage,1)), vjust = -0.5, size=7.5) +
  labs(x = "Target App Category", y = "% of all App Open Attempts") +
#   scale_x_discrete(limits=c(NA,80), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,100), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=0, b=0, l=5, unit='pt'), axis.text.x = element_text(angle = 45, hjust = 1))

app_category_plot

# ggsave('figures/app_categories.pdf', width=10, height=7)

In [None]:
options(repr.plot.width=10, repr.plot.height=7)

df_app_usage_temp <- df_app_usage[0:10,]

df_app_usage_temp$app <- c('Instagram', 'Twitter', 'YouTube', 'TikTok', 'Facebook', 'Reddit', 'Snapchat', 'Apollo (Reddit)', 'Telegram', 'LinkedIn')

df_app_usage_temp$app <- factor(df_app_usage_temp$app, levels=df_app_usage_temp$app)



# Plot the histogram
app_plot <- ggplot(df_app_usage_temp, aes(x = app, y = percentage)) +
  geom_bar(stat = "identity", fill = two_color[1], color = 'black', alpha = 0.5) +
  geom_text(aes(label = round(percentage,1)), vjust = -0.5, size=7.5) +
  labs(x = "Target App", y = "% of all App Open Attempts") +
#   scale_x_discrete(limits=c(NA,80), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,100), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=0, b=0, l=5, unit='pt'), axis.text.x = element_text(angle = 45, hjust = 1))

app_plot

# ggsave('figures/apps_top10.pdf', width=10, height=7)

# Interventions

In [None]:
intervention_user_counts <- df_noClose %>%
  group_by(interventionType) %>%
  summarize(user_count = n_distinct(userIndex)) %>%
  ungroup()%>%
  arrange(desc(user_count))

intervention_user_counts <- intervention_user_counts[!is.na(intervention_user_counts$interventionType),]
intervention_user_counts

In [None]:
intervention_user_counts$interventionType <- c('Breathing Exercise', 'Minimal Breathing\nExercise', 'Follow the Dot', 'Mirror', 'Black Screen', 'Rotate Phone')
intervention_user_counts$interventionType <- factor(intervention_user_counts$interventionType, levels=intervention_user_counts$interventionType)

options(repr.plot.width=20, repr.plot.height=7)

# Plot the histogram
interventions_plot <- ggplot(intervention_user_counts, aes(x = interventionType, y = user_count)) +
  geom_bar(stat = "identity", fill = two_color[1], color = 'black', alpha = 0.5) +
  geom_text(aes(label = user_count), vjust = -0.5, size=7.5) +
  labs(x = "Intervention Type", y = "Unique User Count") +
  # scale_x_discrete(limits=c(NA,80), expand=c(0,0),  breaks=pretty_breaks()) +
  scale_y_continuous(limits=c(0,1010), expand=c(0,0), breaks=pretty_breaks()) +
  theme_classic() +
  theme(text = element_text(size=25), plot.margin = margin(t=10, r=0, b=0, l=0, unit='pt'), axis.text.x = element_text(angle = 45, hjust = 1))

interventions_plot

# ggsave('figures/interventions.pdf', width=20, height=7)