## R SWAT Auto tuning

AutoTune actionset: https://go.documentation.sas.com/?cdcId=pgmsascdc&cdcVersion=9.4_3.5&docsetId=casactml&docsetTarget=cas-autotune-tuneall.htm&locale=pt-BR

In [None]:
# Load necessary packages
library('swat')

#options(cas.print.messages = FALSE)

In [None]:
conn <- CAS('pdcesx14134.exnet.sas.com', 
            port=8777, protocol = "http",
            caslib = 'casuser', 
            authinfo = './.authinfo')

In [None]:
## Carregando Actionsets no CAS
actionsets <- c('sampling', 'decisionTree', 'autotune', 'percentile')

for(i in actionsets){
    loadActionSet(conn, i)
}

In [None]:
# Carregando dados para CAS
castbl <- cas.read.csv(conn, './data/hmeq.csv')

In [None]:
# Particionamento de dados
cas.sampling.srs(conn,
    table = 'hmeq',
    samppct = 30,
    partind = TRUE,
    output = list(casOut = list(name = 'hmeq', replace = T), 
                  copyVars = 'ALL')
)

In [None]:
indata <- 'hmeq'

# Pega infromacao das variaveis
colinfo <- head(cas.table.columnInfo(conn, table = indata)$ColumnInfo, -1)

In [None]:
# Variavel target
target <- colinfo$Column[1]


In [None]:
# Separacao para modelos que lidam com missing
inputs <- colinfo$Column[-1]
nominals <- c(target, subset(colinfo, Type == 'varchar')$Column)

In [None]:
result <- cas.autotune.tuneGradientBoostTree(conn,
           trainOptions = list(
              table   = list("name"= "hmeq", where = '_PartInd_ = 0'),
              inputs  = inputs,
              target = target,
              nominal = nominals,
              casout  = list(name ="tune_boost_model", replace = TRUE)
           ),
           tunerOptions=list(seed = 12345)
      )

In [None]:
print(result$TunerInfo)

In [None]:
  print(result$TunerResults)

In [None]:
  print(result$IterationHistory)

In [None]:
  print(result$IterationHistory)

In [None]:
  print(result$EvaluationHistory)

In [None]:
  print(result$BestConfiguration)

In [None]:
  print(result$TunerSummary)

In [None]:
  print(result$TunerTiming)

In [None]:
  print(result$TunerCasOutputTables)

In [None]:
  print(result$HyperparameterImportance)

In [None]:

### Prevendo um unico modelo
cas.decisionTree.gbtreeScore(conn,
    table = list(name = 'hmeq'),
    modelTable   = list(name = 'tune_boost_model'),
    copyVars     = list(target, '_PartInd_'),
    assessonerow = TRUE,
    casOut       = list(name = 'gb_tune_scored', replace = T)
)

In [None]:
dt_scores <- defCasTable(conn, 'gb_tune_scored')

In [None]:
head(dt_scores)

In [None]:
asses_info <- cas.percentile.assess(conn,
        table    = list(name = paste0('gb_tune_scored'), 
                        where = '_PartInd_ = 1'),
        inputs   = paste0('_GBT_P_           1'),
        response = target,
        event    = '1')

In [None]:
roc <- asses_info$ROCInfo

In [None]:
# Manipulacao do DF
compare <- subset(roc, round(roc$CutOff, 2) == 0.49)
rownames(compare) <- NULL
compare[,c('TP','FP','FN','TN')]

In [None]:
library('ggplot2')

In [None]:
# Cria curva ROC
options(repr.plot.width=14, repr.plot.height=6)

plt <- ggplot(data = roc[c('FPR', 'Sensitivity')],
    aes(x = FPR, y = Sensitivity)) +
    geom_line(size =1.2) +
    labs(x = 'False Positive Rate', y = 'True Positive Rate') +
    theme_bw()
plt

In [None]:
embed_notebook(ggplotly(plt))

In [None]:
plt <- ggplot(data = roc[,c('ACC', 'CutOff')],
    aes(y = ACC, x = CutOff, color = ACC)) +
    geom_segment(aes(x=CutOff, xend=dplyr::lead(CutOff), y=ACC, yend=dplyr::lead(ACC))) +
  scale_colour_gradient2(low="red", mid = 'red', high="green")+
    labs(x = 'CutOff', y = 'Accuracy') +
    theme_bw()
plt

In [None]:
embed_notebook(ggplotly(plt))

In [None]:
cas.session.endSession(conn)