In [None]:
## Importing packages

# This R environment comes with all of CRAN and many other helpful packages preinstalled.
# You can see which packages are installed by checking out the kaggle/rstats docker image: 
# https://github.com/kaggle/docker-rstats

library(tidyverse) # metapackage with lots of helpful functions

## Running code

# In a notebook, you can run a single code cell by clicking in the cell and then hitting 
# the blue arrow to the left, or by clicking in the cell and pressing Shift+Enter. In a script, 
# you can run code by highlighting the code you want to run and then clicking the blue arrow
# at the bottom of this window.

## Reading in files

# You can access files from datasets you've added to this kernel in the "../input/" directory.
# You can see the files added to this kernel by running the code below. 

list.files(path = "../input")

## Saving data

# If you save any files or images, these will be put in the "output" directory. You 
# can see the output directory by committing and running your kernel (using the 
# Commit & Run button) and then checking out the compiled version of your kernel.

In [None]:
library(caret)
library(ggplot2)
library(dplyr)
data<-read_csv("../input/Telco-Customer-Churn.csv")

In [None]:
dim(data)
glimpse(data)
summary(data)

In [None]:
#NA value treatment
sapply(data, function(df){sum(is.na(df))})
data<-na.omit(data)
sum(is.na(data))

In [None]:
#Discarding insignificant variable
data<-data[,-1]
dim(data)
#Fitting logistic regression
glm.fit<-glm(Churn~.,new,family="binomial")
pred<-predict(glm.fit,new,type="response")
pred<-ifelse(pred>0.5,1,0)
table(new$Churn,pred)
misClassError<-mean(pred!=new$Churn)
print(paste('Accuracy=', 1-misClassError))

In [None]:
#A different approach
#by creating dummy variables
dmy<-dummyVars(Churn~.,data,fullRank=TRUE)
new<-data.frame(predict(dmy,data))
dim(new)
new$Churn<-data$Churn

In [None]:
set.seed(1000)
index<-createDataPartition(new$Churn,p=0.8,list=FALSE)
train<-new[index,]
test<-new[-index,]
model<-train(Churn~.,train,method="glm",family="binomial")
pred<-predict(model, test, type="prob")
summary(model)
confusionMatrix(pred,test[["Churn"]]))

In [None]:
#k-fold cross-validation
control<-trainControl(method = "repeatedcv", number = 10, repeats=3, summaryFunction=twoClassSummary, classProbs=TRUE,verboseIter=FALSE )
model<-train(Churn~.,train,method="glm",family="binomial",trControl=control)
pred<-predict(model, test, type="prob")
summary(model)
confusionMatrix(pred,test[["Churn"]]))

In [None]:
#variable importance
varImp(model)
#ROC-AUC curve
library(ROCR)
ROCRPred<-prediction(pred,test$Churn)
ROCRPref<-performance(ROCRpred,measure='tpr',x.measure='fpr')
plot(ROCRpref)
auc<-performance(ROCRPred,measure="auc")
#Finally we would be able to build a model with siginificant features

In [None]:
#Generalized Linear Model
glmnet_mod<-train(Churn~.,train,metric="ROC",method="glmnet",trControl=control,preProcess=c("center","scale"))
plot(glmnet_mod)
glmnet_mod$bestTune$alpha
glmnet_pred<-predict(glmnet_mod,test)
cm<-confusionMatrix(glmnet_pred,test[["churn"]])
accuracy<-cm$overall[c(1,3,4)]

In [None]:
#Implication of RanomForest
rf_mod<-train(Churn~.,train,metric="ROC",method="ranger",trControl=control)
plot(rf_mod)
rf_mod$bestTune$alpha
rf_pred<-predict(rf_mod,test)
cm<-confusionMatrix(rf_pred,test[["churn"]])
accuracy<-cm$overall[c(1,3,4)]

In [None]:
#K-nearest neighbour
knn_mod<-train(Churn~.,train,method="knn",trControl=control, preProcess=c("center","scale"), tuneLength=50)
knn_pred<-predict(knn_mod,test)
cm<-confusionMatrix(knn_pred,test[["Churn"]])
accuracy<-cm$overall[c(1,3,4)]

In [None]:
#Support Vector Classifier
grid<-expand.grid(C=c(0.01,0.05,0.1,0.25,0.5))
svm_mod<-train(Churn~.,train,method="svmLinear",trControl=control,preProcess=c('center','scale'),tuneLength=6,tuneGrid=grid)
print(svm_mod)
plot(svm_mod)
svm_pred<-predict(svm_mod,test)
cm<-confusionMatrix(svm_pred,test[["Churn"]])
accuracy<-cm$overall[c(1,3,4)]

In [None]:
#Model comparison
m_list<-list("Logistic"=model,"Glmnet"=glmnet_mod,"RandomForest"=rf_mod,"Knn"=knn_mod,"SVM"=svm_mod)
resamples<-resamples(m_list)
dotplot(resamples,metric="ROC")
#models<-c("Logistic","Glmnet","RandomForest","Knn","SVM")