# Effect of AI on Customer Churn


## Load the required libraries

In [1]:
install.packages("tidymodels");
install.packages("aod")
install.packages("glmnet");

“installation of package ‘tidymodels’ had non-zero exit status”
Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

also installing the dependency ‘RcppEigen’


“installation of package ‘RcppEigen’ had non-zero exit status”
“installation of package ‘glmnet’ had non-zero exit status”
Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [2]:
# Installing libraries
#library(tidymodels)
library(tidyverse)
library(ggcorrplot)
library(readr)
library(aod)
library(glmnet)
library(caret)

# Setting a seed so our analysis is random but reproducible
set.seed(2005)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.3     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


ERROR: Error in library(glmnet): there is no package called ‘glmnet’


## Read the data 

In [None]:
#read data
training_data <- read.csv("train.csv")
testing_data <- read.csv("test.csv")

#convert Churn from interger to factor
training_data$Customer_Churn <- as.factor(training_data$Customer_Churn)

#preview the dataset
glimpse(training_data)


## Data summary


In [None]:
# Display the observations of customer churn

total_observation_count <- nrow(training_data)

observation_count_summary <- training_data |>
    group_by(Customer_Churn) |>
    summarize(number_of_observations = n(),
              percentage = (n() / total_observation_count) * 100)

observation_count_summary

In [None]:
# Display the average value of all predictor variables

average_value_summary <- training_data |>
    group_by(Customer_Churn) |>
 rename_with(~ paste(.x, "average", sep = "_"), -Customer_Churn) |>
    summarize(across(everything(), ~ mean(.x, na.rm = TRUE)))

average_value_summary

## Visualization of training data

In [None]:
# Visualize the distribution of our relevant feature variables
options(repr.plot.width = 14, repr.plot.height = 8)

age_plot <- ggplot(training_data, aes(Age, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

ai_interaction_plot <- ggplot(training_data, aes(AI_Interaction_Level, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

satisfaction_plot <- ggplot(training_data, aes(Satisfaction_with_AI_Services, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

persionalization_plot <- ggplot(training_data, aes(AI_Personalization_Effectiveness, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

response_time_plot <- ggplot(training_data, aes(AI_Response_Time, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

usage_frequency_plot <- ggplot(training_data, aes(Overall_Usage_Frequency, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

customer_service_int_plot <- ggplot(training_data, aes(Customer_Service_Interactions, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

usage_patterns_plot <- ggplot(training_data, aes(Change_in_Usage_Patterns, fill = Customer_Churn)) +
    geom_bar() +
    coord_flip()

grid.arrange(age_plot, ai_interaction_plot, satisfaction_plot, ncol=3)
grid.arrange(persionalization_plot, customer_service_int_plot,usage_patterns_plot, ncol=3)

## Correlation between variables 

In [None]:
train_non_factor <- training_data
train_non_factor$Customer_Churn <- as.numeric(training_data$Customer_Churn)

# Correlation for different combinations of variables
correlation<- train_non_factor |> cor(use="complete.obs") # find the correlation
ggcorrplot(correlation, 
          hc.order = TRUE, 
          lab = TRUE, 
          type = "lower");

# Comparing the Variables
compare_customers <- function(){
    ggplot(churn, aes(x = Customer_Churn, color = as.factor(AI_Interaction_Level))) +
    geom_histogram(fill = "white", bins = 10)
}

table(train$Customer_Churn)

## Performing logistic regression

In [None]:
# Define the logistic regression model with penalty and mixture hyperparameters
log_reg <- logistic_reg(mixture = tune(), penalty = tune(), engine = "glmnet")

# Define the grid search for the hyperparameters
grid <- grid_regular(mixture(), penalty(), levels = c(mixture = 4, penalty = 3))

# Define the workflow for the model
log_reg_wf <- workflow() %>%
  add_model(log_reg) %>%
  add_formula(y ~ .)

# Define the resampling method for the grid search
folds <- vfold_cv(training_data, v = 5)

# Tune the hyperparameters using the grid search
log_reg_tuned <- tune_grid(
  log_reg_wf,
  resamples = folds,
  grid = grid,
  control = control_grid(save_pred = TRUE))

select_best(log_reg_tuned, metric = "roc_auc")