# Effect of AI on Customer Churn


## Load the required libraries

In [None]:
install.packages("tidymodels")
install.packages("aod")
install.packages("glmnet")

also installing the dependencies ‘dials’, ‘hardhat’, ‘infer’, ‘modeldata’, ‘parsnip’, ‘recipes’, ‘rsample’, ‘tidyr’, ‘tune’, ‘workflows’, ‘workflowsets’, ‘yardstick’


“installation of package ‘dials’ had non-zero exit status”


In [None]:
# Installing libraries
library(tidymodels)
library(tidyverse)
library(glmnet)

# Setting a seed so our analysis is random but reproducible
set.seed(2005)

## Read the data 

In [None]:
#read data
training_data <- read.csv("train.csv")
testing_data <- read.csv("test.csv")

#convert Churn from interger to factor from training_data
training_data$Customer_Churn <- as.factor(training_data$Customer_Churn)

#convert Churn from interger to factor from testing_data
testing_data$Customer_Churn <- as.factor(testing_data$Customer_Churn)

#preview the dataset
glimpse(training_data)

## Making predictions based on the testing data

In [None]:
# Train a logistic regression model
model <- logistic_reg(mixture = double(1), penalty = double(1)) %>%
  set_engine("glmnet") %>%
  set_mode("classification") %>%
  fit(Customer_Churn ~ ., data = training_data)

# Model summary
tidy(model)

# Class Predictions
pred_class <- predict(model,
                      new_data = testing_data,
                      type = "class")

# Class Probabilities
pred_proba <- predict(model,
                      new_data = testing_data,
                      type = "prob")

## Performing logistic regression - Hyperparameter Tuning

In [None]:
# Define the logistic regression model with penalty and mixture hyperparameters
log_reg <- logistic_reg(mixture = tune(), penalty = tune(), engine = "glmnet")

# Define the grid search for the hyperparameters
grid <- grid_regular(mixture(), penalty(), levels = c(mixture = 4, penalty = 3))

# Define the workflow for the model
log_reg_wf <- workflow() %>%
  add_model(log_reg) %>%
  add_formula(Customer_Churn ~ .)

# Define the resampling method for the grid search
folds <- vfold_cv(training_data, v = 5)

# Tune the hyperparameters using the grid search
log_reg_tuned <- tune_grid(
  log_reg_wf,
  resamples = folds,
  grid = grid,
  control = control_grid(save_pred = TRUE))

select_best(log_reg_tuned, metric = "roc_auc")

We can see that the model works best with penalty = 1e-10 and mixture = 0.6666667

## Performing logistic regression - Applying model

In [None]:
log_reg_final <- logistic_reg(penalty = 1e-10 , mixture = 0.6666667) %>%
                 set_engine("glmnet") %>%
                 set_mode("classification") %>%
                 fit(Customer_Churn ~ Satisfaction_with_AI_Services+
                     +Customer_Service_Interactions
                     +Age
                , data = training_data)

# Evaluate the model performance on the testing set
pred_class <- predict(log_reg_final,
                      new_data = testing_data,
                      type = "class")

results <- testing_data |>
  #select(ID, Customer_Churn) |>
  bind_cols(pred_proba, pred_class)


# Assessing the results
result_form <- results |>
   #select(ID, .pred_class) |>
    tibble()

colnames(result_form) <- c("ID", "Prediction") 

# Create confusion matrix
conf_mat(results, truth = Customer_Churn,
         estimate = .pred_class)

#Calculate the accuracy rate
accuracy(results, truth= Customer_Churn, estimate = .pred_class)


## Conclusion 

In [None]:
# Downloading Prediction to further conclusion
write.csv(result_form, "prediction.csv", row.names = FALSE, sep = ",")