In [None]:
churn <- read.csv("BankChurners.csv", na.strings = c("Unknown"))
head(churn)
names(churn)
#target distribution
table(churn$Attrition_Flag)/nrow(churn)

#tolgo ID
churn<-churn[-c(1,23,22)]

str(churn)

#Rename Variables
names(churn)[10]<-"Customer_Product" 
names(churn)[11]<-"Inactivity_Months"
names(churn)[12]<-"Contacts"
```


```{r}
#convert to factor
churn$Attrition_Flag<-as.factor(churn$Attrition_Flag)
levels(churn$Attrition_Flag)
table(churn$Attrition_Flag)

churn$Gender<-as.factor(churn$Gender)
levels(churn$Gender)
table(churn$Gender)

churn$Education_Level<-as.factor(churn$Education_Level)
levels(churn$Education_Level)
table(churn$Education_Level)

churn$Marital_Status<-as.factor(churn$Marital_Status)
levels(churn$Marital_Status)
table(churn$Marital_Status)

churn$Income_Category<-as.factor(churn$Income_Category)#reddito annuale
levels(churn$Income_Category)
table(churn$Income_Category)

churn$Card_Category<-as.factor(churn$Card_Category)
levels(churn$Card_Category)
table(churn$Card_Category)

#Numeric
numeric <- sapply(churn, function(x) is.numeric(x))
numeric <-churn[, numeric]
str(numeric)

library(funModeling)
status=df_status(churn, print_results = F)
status
```

In [None]:
################## PREPROCESSING PREPROCESSING PREPROCESSING ###################
sapply(churn, function(x)(sum(is.na(x))))/nrow(churn)
```

Il primo step della fase di preprocessing, ancora prima di scegliere se procedere 
un crossvalidation method o con uno split del dataset in dati di training e di validation
e lo step riguardante l'imputazione: procederemo con il criterio "pmm", un sottoprocesso
dei metodi di mice imputation.
```{r}
library(mice)
names(churn)
#Dopo aver selezionato le variabili con i dati da imputare procediamo:
pmmData<-mice(churn[-1], m=8, maxit=15, meth='pmm', seed=500)

completedData<-complete(pmmData,1)

Attrition_Flag<-churn$Attrition_Flag
churn<-cbind(Attrition_Flag,completedData)
names(churn)

#controlliamo che l'imputazione sia andata a buon fine
sapply(churn, function(x)(sum(is.na(x))))
#Non ci sono più dati mancanti.
churn_sheet<-churn#dataset da usare per fare porcherie
```

## 1.b)COLLINEARITY

```{r}
library(caret)
corFeatures <- findCorrelation(cor(numeric), cutoff = 0.90, names = TRUE)
corFeatures
```
La variabile *"Avg_Open_To_Buy"* riporta un legame lineare superiore al 90%, difatti,
nella parte introduttiva alla compresione del significato delle variabili abbiamo notato
l'esistenza di una combinazione lineare, la quale comprendeva la variabile in questione.
"Avg_Open_To_Buy" = "Credit_Limit" - "Total_Revolving_Bal" !!!!!

Procediamo...
```{r}
library(corrplot)
corrplot(cor(numeric),  type="upper", method="color")
```

```{r}
names(churn)
churn_features<-churn[-c(1,15)]#dataset con solo predittori, senza target
churn<-churn[-15]
churn_sheet<-churn#dataset da usare per fare porcherie
```

## 1.c) ZERO VARIANCE
```{r}
no_variance <- nearZeroVar(churn_features, saveMetrics = TRUE)
no_variance

In [None]:
######################## STEP1 STEP1 STEP1 STEP1 STEP1 #########################
#                            MODEL TUNNING

```{r}
churn$Attrition_Flag=ifelse(churn$Attrition_Flag=="Attrited Customer","l1","l0")
table(churn$Attrition_Flag)
churn_sheet<-churn
```


##              CREZIONE DELLE PARTIZIONI DI TRAIN & VALIDATION
```{r}
library(caret)
set.seed(1)
partition_score<-createDataPartition(y=churn$Attrition_Flag,times=1,p=.95)
?createDataPartition()
churn=churn[partition_score$Resample1,]
table(churn$Attrition_Flag)
score_churn=churn[-partition_score$Resample1,]
table(score_churn$Attrition_Flag)
##########
set.seed(1)
partition<-createDataPartition(y=churn$Attrition_Flag,times=1,p=.7)
?createDataPartition()
train_churn=churn[partition$Resample1,]
test_churn=churn[-partition$Resample1,]

str(train_churn)
train_churn$Attrition_Flag<-as.factor(train_churn$Attrition_Flag)
table(train_churn$Attrition_Flag)/nrow(train_churn)

str(test_churn)
test_churn$Attrition_Flag<-as.factor(test_churn$Attrition_Flag)
table(test_churn$Attrition_Flag)/nrow(test_churn)

str(score_churn)
score_churn$Attrition_Flag<-as.factor(score_churn$Attrition_Flag)
table(score_churn$Attrition_Flag)/nrow(score_churn)


```


## 2.a) RANDOM FOREST

set.seed(1)
   
#?trainControl() - chiedere cosa cambia mettendo method="OOB"
control <- trainControl(method="cv",
                        number=10,
                        search="grid",
                        summaryFunction = twoClassSummary,
                        classProbs = TRUE)
rf_grid <- expand.grid(.mtry=c(1:round((length(variable.names(churn)))/3)))
random_forest <- train(Attrition_Flag~., data=train_churn,
                      method="rf",
                      tuneGrid=rf_grid,
                      metric="Spec",
                      ntree=250,
                      trControl=control)

random_forest#tiene in memoria l'ultimo modello, tra l'altro risulta essere quello vincente!
ggplot(random_forest)
confusionMatrix(random_forest)

```

## 2.b) TREE BAGGING 
```{r}
set.seed(1)

control <- trainControl(method="cv",
                        number=10,
                        search="grid",
                        summaryFunction = twoClassSummary,
                        classProbs = TRUE)

tree_bagging <- train(Attrition_Flag ~ .,
                            data = train_churn,
                            method = "treebag",
                            metric="Spec",
                            ntree=250,
                            trControl = control)

tree_bagging
confusionMatrix(tree_bagging)
```


## 2.c) GRADIENT BOOSTING
```{r}
set.seed(1)
control <- trainControl(method="cv",
                        number=10,
                        search="grid",
                        summaryFunction = twoClassSummary,
                        classProbs = TRUE)
gbm_grid <-  expand.grid(interaction.depth = c(1,2,3,4,5,6,7,8,9),#profondità dell'albero 
                        n.trees = 50,#numero di alberi/iterazioni 
                        shrinkage = c(0.075,0.1,0.5,0.7),#learning rate/decay
                        n.minobsinnode = 20)#numero minimo di soggetti per ogni nodo
gradient_boost <- train(Attrition_Flag ~ ., data = train_churn, 
                 method = "gbm",
                 tuneGrid=gbm_grid,
                 metric="Spec",
                 trControl = control,
                 verbose = FALSE)

gradient_boost
gradient_boost$bestTune#the best gbTree for Spec having shrink=0.5 and 8 int.depth
ggplot(gradient_boost)
confusionMatrix(gradient_boost)
```

## 3) NEURAL NETWORK
```{r}
#elimino variabile "Avg_Utilization_Ratio" 
train_churn_net<-train_churn[-19]
library(caret)
set.seed(1)
#Model Selction with classification tree
control <- trainControl(method = "cv", number=10, search="grid", classProbs = TRUE, summaryFunction = twoClassSummary)

tree_modsel_net <- train(Attrition_Flag ~ ., data =train_churn_net , method = "rpart",
                      metric="Spec",
                      tuneLength = 10,
                      trControl = control)

# best accuracy using best cp
tree_modsel_net



# variables Importance
varImp(object=tree_modsel_net)
plot(varImp(object=tree_modsel_net),main="train tuned - Variable Importance")

# select only important variables
v_importance=as.data.frame(tree_modsel_net$finalModel$variable.importance)
v_importance
dim(v_importance)

# select important var from dataset 
vi_name=row.names(v_importance)
vi_name
vi_name[11]<-"Income_Category"
vi_name[14]<-"Card_Category"
vi_name<-vi_name[-15]
vi_name


# save train and test with only selected/important covariates of the tree 
train_churn_net=train_churn[vi_name]
names(train_churn_net)
test_churn_net=test_churn[vi_name]
names(test_churn_net)
# add target
train_churn_net=cbind(train_churn[1], train_churn_net)
test_churn_net=cbind(test_churn[1], test_churn_net)

head(train_churn_net)
head(test_churn_net)

#TUNNING NEURAL NET
set.seed(1)
control = trainControl(method="cv", number=10, search="grid",  classProbs = T, summaryFunction=twoClassSummary)
nnet_grid <- expand.grid(size=c(1:7), decay = c(0.05 , 0.1, 0.3, 0.5, 0.75))
nnet_tree <- train(train_churn_net[-1], train_churn_net$Attrition_Flag,
                         method = "nnet",
                         preProcess = "scale", 
                         tuneLength = 10, 
                         trControl=control,
                         metric = "Spec",
                         tuneGrid=nnet_grid,
                         trace = TRUE,
                         maxit = 750)




nnet_tree$results
ggplot(nnet_tree)
```

## 4) KNN

```{r}
library(Boruta)

set.seed(1)
train_churn_knn_boruta<-train_churn[-19]#elimino variabile "Avg_Utilization_Ratio"
test_churn_knn_boruta<-test_churn[-19]
boruta_train <- Boruta(Attrition_Flag~., data = train_churn_knn_boruta, doTrace = 1)

plot(boruta_train, xlab = "features", xaxt = "n", ylab="MDI")
#' Three Blue boxplots correspond to minimal, average and maximum Z score of MDI of an attribute. 

#' Red un-important feature 
#' yellow tentative/at limit important feature 
#' green important feature

print(boruta_train)

# boruta metrics on predictors#####
boruta_metrics <- attStats(boruta_train)
head(boruta_metrics)
table(boruta_metrics$decision)


# must select 49 vars...drop tantative and unimportant vars
knn_selected=subset(boruta_metrics, decision=="Confirmed")
head(knn_selected)  
#getSelectedAttributes(final.boruta, withTentative = F)
sel=t(knn_selected)

# select data from selected vars####
train_churn_knn_boruta<-cbind(train_churn[1],train_churn_knn_boruta[,colnames(sel)])
dim(train_churn_knn_boruta)

test_churn_knn_boruta<-cbind(test_churn[1],test_churn_knn_boruta[,colnames(sel)])
dim(test_churn_knn_boruta)



set.seed(1)
library(caret)

control <- trainControl(method = "cv", number=10, search="grid", 
                       summaryFunction = twoClassSummary, 
                       classProbs = TRUE)

knn <- train(Attrition_Flag ~., data=train_churn_knn_boruta,
                 method = "knn", tuneLength = 10,
                 preProcess = c("center", "scale"),
                 metric="Spec",
                 trControl = control)
print(knn)
```


```{r}
set.seed(1)
knn_boot <- train(Attrition_Flag ~., data=train_churn_knn_boruta,
                 method = "knn",
                 preProcess = c("center", "scale"),
                 tuneLength = 10, 
                 trControl = trainControl(method = "boot",
                                          summaryFunction = twoClassSummary, 
                                          classProbs = TRUE), 
                                          metric="Spec")

print(knn_boot)

```


## NAIVE BAYES

```{r}
predictors=c("Attrition_Flag","Customer_Age","Gender","Dependent_count","Education_Level","Marital_Status","Income_Category","Card_Category","Months_on_book","Customer_Product","Inactivity_Months",
"Contacts","Credit_Limit","Total_Revolving_Bal","Total_Amt_Chng_Q4_Q1","Total_Trans_Amt","Total_Trans_Ct","Total_Ct_Chng_Q4_Q1")
train_churn_naive <- train_churn[predictors]  
test_churn_naive<-test_churn[predictors] 

numeric_naive <- sapply(train_churn_naive, function(x) is.numeric(x))
numeric_naive <-train_churn_naive[, numeric_naive]
Attrition_Flag=train_churn_naive$Attrition_Flag
numeric_naive <-cbind(numeric_naive, Attrition_Flag)



control = trainControl(method="cv", number = 10, classProbs = T,
                   summaryFunction=twoClassSummary)

naive=train(Attrition_Flag~.,
                 data=train_churn_naive, method = "naive_bayes", metric="Spec",
                 trControl = control, tuneLength=10) 

naive
```

## LASSO
```{r}
library(caret)
set.seed(1)
control=trainControl(method="cv", number = 10, classProbs = T,
                   summaryFunction=twoClassSummary)
lasso_grid = expand.grid(.alpha=1,.lambda=c(0.1,0.25,0.5,0.75))
lasso=train(Attrition_Flag~.,
            data=train_churn, method = "glmnet",
            trControl = control, tuneLength=5,
            tuneGrid=lasso_grid, metric="Spec")
lasso

In [None]:
######################## STEP2 STEP2 STEP2 STEP2 STEP2 #########################


#                         STEP 2 - CONFRONTO TRA MODELLI

```{r}
library(caret)
results <- resamples(list(random_forest=random_forest, tree_bagging=tree_bagging, gradient_boost=gradient_boost,nnet_tree=nnet_tree, naive=naive, knn=knn))
summary(results)
bwplot(results)

test_churn_step2<-test_churn


test_churn_step2$knn=predict(knn,test_churn, type="prob")[,2]
test_churn_step2$naive=predict(naive,test_churn, "prob")[,2]
test_churn_step2$nnet_tree=predict(nnet_tree,test_churn, "prob")[,2]
test_churn_step2$gradient_boost=predict(gradient_boost,test_churn, "prob")[,2]
test_churn_step2$tree_bagging=predict(tree_bagging,test_churn, "prob")[,2]
test_churn_step2$random_forest=predict(random_forest,test_churn, "prob")[,2]
test_churn_step2$lasso=predict(lasso,test_churn, "prob")[,2]

head(test_churn_step2)

library(pROC)
# See roc values ########
roc_knn=roc(Attrition_Flag ~ knn, data = test_churn_step2)
roc_naive=roc(Attrition_Flag ~ naive, data = test_churn_step2)
roc_nnet_tree=roc(Attrition_Flag ~ nnet_tree, data = test_churn_step2)
roc_gradient_boost=roc(Attrition_Flag ~ gradient_boost, data = test_churn_step2)
roc_tree_bagging=roc(Attrition_Flag ~ tree_bagging, data = test_churn_step2)
roc_random_forest=roc(Attrition_Flag ~ random_forest, data = test_churn_step2)
roc_lasso=roc(Attrition_Flag ~ lasso, data = test_churn_step2)


AUC<-cbind(roc_knn$auc,roc_naive$auc,roc_lasso$auc,roc_nnet_tree$auc,roc_gradient_boost$auc,roc_tree_bagging$auc,roc_random_forest$auc)
colnames(AUC)<-c("knn","naive","lasso","nnet","gboost","tbagg","rf")
rownames(AUC)<-"AUC:"
AUC
```
Plottiamo le ROC curves dei vari classificatori per ogni soglia predetta
```{r}

plot(roc_knn,col="black", xlab="FPR = {1-Specificity}")
plot(roc_naive,add=T,col="red")
plot(roc_nnet_tree,add=T,col="blue")
plot(roc_gradient_boost,add=T,col="brown")
plot(roc_tree_bagging,add=T,col="green")
plot(roc_random_forest,add=T,col="orange")
plot(roc_lasso,add=T,col="purple")
legend( "bottomright", c("knn - AUC = 0.891087",
                         "naive - AUC = 0.8738543",
                         "lasso - AUC = 0.5",
                         "nnet_tree - AUC = 0.9815",
                         "gradient_boost - AUC = 0.992",
                         "tree_bagging - AUC = 0.9875",
                         "random_forest - AUC = 0.9924"),
                         col=c("black", "red", "purple","blue", "brown","green","orange"),
                         cex=.8,box.col="green",lty=1, lwd=4 )

```


```{r}
posterior_gradient_boost = predict(gradient_boost, newdata = test_churn, type="prob")
posterior_gradient_boost=data.frame(posterior_gradient_boost)
head(posterior_gradient_boost)


# find one column of interest
posterior_l0=posterior_gradient_boost[,1]
posterior_l1=posterior_gradient_boost[,2]

# add to test data
test_churn$posterior_l0=posterior_l0
test_churn$posterior_l0=round(test_churn$posterior_l0, digits = 3)
test_churn$posterior_l1=posterior_l1
test_churn$posterior_l1=round(test_churn$posterior_l1, digits = 3)

head(test_churn)


# extract mimimum elements for lift curves: target and predicted!!!########
test_minimal_elements=test_churn[,c(1,21)]
head(test_minimal_elements)


test_minimal_elements$breaks <- with(test_minimal_elements, cut(posterior_l1, 
          breaks=unique(quantile(posterior_l1, probs=seq(0,1, length= 13), type = 5, na.rm=TRUE)), include.lowest=TRUE))



test_minimal_elements$decile <- as.numeric(test_minimal_elements$breaks)  

test_minimal_elements$decile2 <- factor(11-test_minimal_elements$decile)          

head(test_minimal_elements)
table(test_minimal_elements$decile2)


# find mean posterior in each decile....
library(dplyr)
test_minimal_elements %>%
  group_by(decile2)%>%
  summarise(m1  = mean(posterior_l1)) %>% arrange(-m1)

```

```{r}
library(funModeling)
#gain_lift(data = test_minimal_elements, score = 'posterior_l1', target = 'Attrition_Flag')
gain_lift(data = test_churn_step2, score = 'random_forest', target = 'Attrition_Flag')
#stessi risultati
gain_lift(data = test_churn_step2, score = 'tree_bagging', target = 'Attrition_Flag')
gain_lift(data = test_churn_step2, score = 'nnet_tree', target = 'Attrition_Flag')
gain_lift(data = test_churn_step2, score = 'gradient_boost', target = 'Attrition_Flag')

prop.table(table(test_churn$Attrition_Flag))

```

```{r}
pred=predict(gradient_boost, test_churn)
confusionMatrix(pred, test_churn$Attrition_Flag)
#1)MODELLO SURROGATO
predProb_Attrited=predict(gradient_boost, train_churn, type="prob")[,2]
head(predProb_Attrited)

library(gbm)
library(caret)

Importance_gradient_boost<- varImp(gradient_boost ,numTrees=50)
ggplot(Importance_gradient_boost)


# do a copy of df
copy=train_churn
copy$predProb_Attrited=predict(gradient_boost, train_churn, type="prob")[,2]
copy$Attrition_Flag=NULL  

# fit  a tree
library(rpart)

set.seed(1) 
tree_surrogate <- rpart(predProb_Attrited ~ ., data = copy)
tree_surrogate$cptable

prun_surrogate <- prune(tree_surrogate, cp = 0.01)


par(mfrow=c(1,1))
library(rpart.plot)
rpart.plot(prun_surrogate, type = 4, tweak=2)
mean(predProb_Attrited)

#2)PARTIAL DEPENDENT PLOT (PDP)
library(DALEX)


# find pred to be yes in the train data
predProb_Attrited=predict(gradient_boost, train_churn, type="prob")[,2]
head(predProb_Attrited)
version
plot(Importance_gradient_boost)
version(gradient_boost)
explainer_rf  <- explain(gradient_boost, data = train_churn[-1], predict.function = predProb_Attrited)#is the same for all variables
#PDP-Total_Trans_Amt
profile_rf_ttAMT  <- single_variable(explainer_rf, variable = "Total_Trans_Amt",  type = "pdp",
                                 which.class = 2, prob = TRUE)
plot(profile_rf_ttAMT)
#PDP-Total_Trans_Ct 
profile_rf_ttCT  <- single_variable(explainer_rf, variable = "Total_Trans_Ct",  type = "pdp",
                                 which.class = 2, prob = TRUE)
plot(profile_rf_ttCT)
#PDP-Total_Revolving_Bal 
profile_rf_trBAL  <- single_variable(explainer_rf, variable = "Total_Revolving_Bal",  type = "pdp",
                                 which.class = 2, prob = TRUE)
plot(profile_rf_trBAL)
#PDP-Credit_Limit
profile_rf_cLIMIT  <- single_variable(explainer_rf, variable = "Credit_Limit",  type = "pdp",
                                 which.class = 2, prob = TRUE)
plot(profile_rf_cLIMIT)
```


In [None]:
######################## STEP3 STEP3 STEP3 STEP3 STEP3 #########################
```
#                             STEP 3 - Maximize Spec

```{r}
# extract mimimum elements: target and predicted!!!########
target_posterior=test_churn[,c(1,20)]
head(target_posterior)


# for each threshold, find tp, tn, fp, fn and the sens=prop_true_l0, spec=prop_true_l1, precision=tp/(tp+fp)
library(dplyr)
thresholds <- seq(from = 0, to = 1, by = 0.01)
prop_table <- data.frame(threshold = thresholds, prop_true_l0 = NA,  prop_true_l1 = NA, true_l0 = NA,  true_l1 = NA ,fn_l0=NA)

for (threshold in thresholds) {
  pred <- ifelse(target_posterior$posterior_l0 > threshold, "l0", "l1")  
  pred_t <- ifelse(pred == target_posterior$Attrition_Flag, TRUE, FALSE)
  
  group <- data.frame(target_posterior, "pred" = pred_t) %>%
    group_by(Attrition_Flag, pred) %>%
    dplyr::summarise(n = n())
  
  group_l0 <- filter(group, Attrition_Flag == "l0")
  
  true_l0=sum(filter(group_l0, pred == TRUE)$n)
  prop_l0 <- sum(filter(group_l0, pred == TRUE)$n) / sum(group_l0$n)
  
  prop_table[prop_table$threshold == threshold, "prop_true_l0"] <- prop_l0
  prop_table[prop_table$threshold == threshold, "true_l0"] <- true_l0
  
  fn_l0=sum(filter(group_l0, pred == FALSE)$n)
  prop_table[prop_table$threshold == threshold, "fn_l0"] <- fn_l0
  
  
  group_l1 <- filter(group, Attrition_Flag == "l1")
  
  true_l1=sum(filter(group_l1, pred == TRUE)$n)
  prop_l1 <- sum(filter(group_l1, pred == TRUE)$n) / sum(group_l1$n)
  
  prop_table[prop_table$threshold == threshold, "prop_true_l1"] <- prop_l1
  prop_table[prop_table$threshold == threshold, "true_l1"] <- true_l1
  
}

head(prop_table, n=10)


# now think to your best cell in the matrix ad decide the metric of interest##########
##########
#pred	
#true	l0	   l1
#l0  	 TP	  FN
#l1     FP	  TN
##########

# calculate other missing measures

# n of observations of the validation set    
prop_table$n=nrow(test_churn)

# false positive (fp_l0) by difference of   n and            tn,                 tp,         fn, 
prop_table$fp_l0=nrow(test_churn)-prop_table$true_l1-prop_table$true_l0-prop_table$fn_l0

# find precision
prop_table$prec_l0=prop_table$true_l0/(prop_table$true_l0+prop_table$fp_l0)

# find accuracy
prop_table$acc=(prop_table$true_l1+prop_table$true_l0)/nrow(train_churn)

# find F1 =2*(prec*sens)/(prec+sens)
prop_table$F1=2*(prop_table$prop_true_l0*prop_table$prec_l0)/(prop_table$prop_true_l0+prop_table$prec_l0)

# verify not having NA metrics at start or end of data 
tail(prop_table)
# we have typically some NA in the precision and F1 at the boundary..put,impute 1,0 respectively 

library(Hmisc)
#impute NA as 0, this occurs typically for precision
prop_table$prec_l0=impute(prop_table$prec_l0, 1)
prop_table$F1=impute(prop_table$F1, 0)
tail(prop_table)

colnames(prop_table)

# drop counts, PLOT only metrics
prop_table2 = prop_table[,-c(4:8)] 
head(prop_table2)

# plot measures vs soglia##########
# before we must impile data vertically: one block for each measure
library(dplyr)
library(tidyr)

gathered=prop_table2 %>%
  gather(x, y, prop_true_l0:F1)

head(gathered)

# plot measures 
library(ggplot2)
gathered %>%
  ggplot(aes(x = threshold, y = y, color = x)) +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = "Set1") +
  labs(y = "measures",
       color = "l0: event\nl1: nonevent")


# zoom
gathered %>%
  ggplot(aes(x = threshold, y = y, color = x)) +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = "Set1") +
  labs(y = "measures",
       color = "l0: event\n l1: nonevent") +
  coord_cartesian(xlim = c(0.4, 0.7))


# now you can choose the best threshold (using validated/test prob).##########
# decide the decision rule.
y=test_churn$Attrition_Flag
y=ifelse(y=="l0",1,0)


pred_probrO=predict(gradient_boost,newdata=test_churn,type=c("prob"))[,1]

library(ROCR)
predR <- prediction(pred_probrO,y)

z<-performance(predR,measure="spec")
#z@x.values ------ Valore della soglia
#z@y.values ------ Valore della Specificity in corrispondenza della soglia
spec<-cbind(z@x.values[[1]],z@y.values[[1]])
colnames(spec)<-c("Treshold","Specificity")
spec<-as.data.frame(spec)#dataframe where for each row corresponding the estimate Treshold with relativity Specificity

# maximize spec: threshold 0.75
head(target_posterior)

target_posterior$decision=ifelse(target_posterior$posterior_l0>0.75,"l0","l1")


table(target_posterior$Attrition_Flag,target_posterior$decision)
spec_metric<- 434/(29+434)#Our specificity with threshold of 0.75
################




In [None]:
######################## STEP4 STEP4 STEP4 STEP4 STEP4 #########################

```

#                               STEP 4 - SCORING

```{r}
score_churn$prob = predict(gradient_boost, score_churn, "prob")
head(score_churn$prob)
probExist<-score_churn$prob[,1]
score_churn$pred_y=ifelse(probExist>0.75, "Exist","Attrited")
head(score_churn[c("Attrition_Flag","pred_y")])

# extract a new case
new_observation <- score_churn[14,]
new_observation$Attrition_Flag=NULL
new_observation

# see the pred prob of yes such new case: we expect this value
# when measuring contribution of vars to predictions for a new cases
predProb=predict(gradient_boost, score_churn, type="prob")[,2]

# see pred probs to be yes for a new case
predProb[14]

predict.fun <- function(model, x) predict(model, x, type = "prob")[,2]

# explain the prediction based on x profile
library(DALEX)
library(caret)
library("breakDown")
explain_3 <- broken(gradient_boost, new_observation, data = train_churn, predict.function = predict.fun)
explain_3

library(ggplot2)
plot(explain_3)