In [None]:
# !pip install swat

In [None]:
import swat
import getpass
import re
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests

from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

# Conexão com servidor

In [None]:
# slower http/REST connection method
conn = swat.CAS("https://my-viya-server.com/cas-shared-default-http", 443, "username", getpass.getpass())
# print(conn)

# fast binary connection method

#conn = swat.CAS("controller.sas-cas-server-default.viya4.svc.cluster.local", 5570, 
#                username = "username", 
#                password = getpass.getpass()")
print(conn)

In [None]:
# https://go.documentation.sas.com/doc/en/pgmsascdc/v_035/allprodsactions/actionSetsByName.htm

conn.loadactionset("sampling")
conn.loadactionset("decisionTree")
conn.loadactionset("autotune")

# Verificar Tabelas

In [None]:
# Tabelas em memoria
conn.tableinfo(caslib="CASUSER")

In [None]:
conn.upload(data='hmeq.csv', casout={"name":"hmeq","caslib":"casuser", "replace": True})
conn.tableinfo(caslib="CASUSER")

# Análise da Tabela

In [None]:
tbl = conn.CASTable("hmeq", caslib="CASUSER")

In [None]:
tbl.head()

In [None]:
tbl.describe()

In [None]:
# 0 = trainamento, 1 = teste
conn.sampling.stratified(
    table={"name":"hmeq", "groupBy":"BAD"},
    output={"casOut":{"name":"hmeq_part", "replace":1}, "copyVars":"ALL"},
    samppct=70,
    partind=True
)

In [None]:
tbl_part = conn.CASTable("hmeq_part", caslib="CASUSER")
tbl_part.head()

# Definição de Variáveis

## Imputação de Valores

In [None]:
# Imputacao de Valores
db_var_imp = conn.datapreprocess.impute(table="hmeq_part",
                                        methodnominal="mode", 
                                        methodinterval ="median",
                                        casout={"name":"HMEQ_TRATADA","caslib":"CASUSER", "replace":1},
                                        outvarsnameprefix='')

db_tratado = conn.CASTable("HMEQ_TRATADA")
conn.table.promote(db_tratado)

db_tratado.head()

In [None]:
# Separacao de Colunas
columns_info = conn.columninfo(table=db_tratado).ColumnInfo

target = "BAD"
columns_info


In [None]:

columns_char = list(columns_info["Column"][columns_info["Type"]=="varchar"])
columns_double = list( columns_info["Column"][ columns_info["Type"]=="double" ])
columns_double.remove("BAD")
columns_double.remove("_PartInd_")

print(columns_char)
print(columns_double)

# Criação do Modelo

## Random Forest

In [None]:
# Treinamento e Scoragem - Random Forest

resultrf = conn.autotune.tuneForest(
    # Treina e salva o codigo de treinamento na tabela rf_train.
    trainOptions={
         "table"   : {"name":"hmeq_part", "where": "_PartInd_=0"},
         "inputs"  : columns_double+columns_char,
         "target"  : target,
         "nominals" : columns_char+[target],
         "casout"  : {"name":"rf_train"},
        "saveState" : {"name" : "rf_astore", "caslib": "Public"} # astore
     },
    tunerOptions={
         "maxIters": 5,
         "maxTime": 60,
         "searchMethod": "GA",
         "objective": "KS",
         "userDefinedPartition": True,
         "targetEvent" : "1"
     },
    # Utiliza o modelo criado e otimizado para scoragem da base particionada
    scoreOptions = {
        "table": { "name":"hmeq_part", "where": "_PartInd_=1" },
        "modeltable": {"name":"rf_train"},
        "casout": {"name":"rf_score", "replace":1},
        "copyvars":["BAD"]
    }
)

# Scoragem - Random Forest
rf_score = conn.CASTable("rf_score") 
rf_score.head()

## Gradient Boosting

In [None]:
# Treinamento e Scoragem - Gradient Boosting
resultgb = conn.autotune.tuneGradientBoostTree(
    trainOptions = {
        "table"   : {"name":"hmeq_part", "where": "_PartInd_=0"},
        "inputs"  : columns_double+columns_char,
        "target"  : target,
        "nominal" : columns_char+[target],
        "casout"  : {"name":"gb_train"},
        "saveState" : {"name" : "gb_astore", "caslib": "Public"}
    },
    tunerOptions={
         "maxIters": 5,
         "maxTime": 60,
         "searchMethod": "GA",
         "objective": "KS",
         "userDefinedPartition": True,
         "targetEvent" : "1"
    },
    scoreOptions= {
        "table" : {"name":"hmeq_part", "where": "_PartInd_=1"},
        "modeltable": {"name":"gb_train"},
        "casout":{"name":"gb_score", "replace":1}, 
        "copyvars":["BAD"]
   }
)

gb_score = conn.CASTable("gb_score")
gb_score.head()

In [None]:
resultnn =conn.autotune.tuneneuralnet(
    trainOptions = {
        "table"   : {"name":"hmeq_part", "where": "_PartInd_=0"},
        "inputs"  : columns_double+columns_char,
        "target"  : target,
        "nominal" : columns_char+[target],
        "casout"  : {"name":"nn_train"}
    },
    tunerOptions={
         "maxIters": 5,
         "maxTime": 60,
         "searchMethod": "GA",
         "objective": "KS",
         "userDefinedPartition": True,
         "targetEvent" : "1"
    },
    scoreOptions= {
        "table" : {"name":"hmeq_part", "where": "_PartInd_=1"},
        "modeltable": {"name":"nn_train"},
        "casout":{"name":"nn_score", "replace":1}, 
        "copyvars":["BAD"]
   }
)

nn_score = conn.CASTable("gb_score")
nn_score.head()

# Informações dos Modelos

In [None]:
resultgb

In [None]:
conn.tableinfo(caslib="CASUSER")

In [None]:
conn.tableinfo(caslib="Public")

# Gráfico de Assessment

In [None]:
metric = "KS"

In [None]:
resultgb["ROCInfo"][resultgb["ROCInfo"][metric] == max(resultgb["ROCInfo"][metric])]


In [None]:
resultrf["ROCInfo"][resultrf["ROCInfo"][metric] == max(resultrf["ROCInfo"][metric])]

In [None]:
resultnn["ROCInfo"][resultnn["ROCInfo"][metric] == max(resultnn["ROCInfo"][metric])]

In [None]:
plt.plot(resultrf["ROCInfo"]["FPR"], resultrf["ROCInfo"]["Sensitivity"])
plt.plot(resultgb["ROCInfo"]["FPR"], resultgb["ROCInfo"]["Sensitivity"])
plt.plot(resultnn["ROCInfo"]["FPR"], resultnn["ROCInfo"]["Sensitivity"])
plt.plot([0,1],linestyle="dashed", color = 'black')
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title("Curva ROC")
plt.legend(["RandomForest :"+ str(round(resultrf["ROCInfo"]["C"][0], 3)),
            "GradientBoost :"+ str(round(resultgb["ROCInfo"]["C"][0], 3)),
            "NeuralNet :"+ str(round(resultnn["ROCInfo"]["C"][0], 3))])


In [None]:
from sklearn.metrics import classification_report

In [None]:
gbdf = gb_score.to_frame()
rfdf = rf_score.to_frame()

In [None]:
print("## GBT metrics")
print(classification_report(y_true = gbdf["BAD"].astype(int), 
                            y_pred = gbdf["I_BAD"].astype(int), 
                            target_names = ["0", "1"]))

In [None]:
print("## RF metrics")
print(classification_report(y_true = rfdf["BAD"].astype(int), 
                            y_pred = rfdf["I_BAD"].astype(int), 
                            target_names = ["0", "1"]))

## Salvando, registrando e publicando Modelos

In [None]:
## download the model
conn.loadactionset('aStore')

store=conn.download(rstore= {"name": "gb_astore", "caslib": "Public"})

with open('savelocal.sasast','wb') as file:
   file.write(store['blob'])

In [None]:
from sasctl import Session
from sasctl.tasks import register_model, publish_model
import getpass

In [None]:
# Establish a session with Viya
s= Session("https://my-viya-server.com", 
            username  = "username",
            password = getpass.getpass()
       )
            
print(s)

In [None]:
result = conn.astore.describe(rstore= dict(name = "gb_astore", caslib= "Public" ), epcode=False)
var_list = [print(v) for v in result.InputVariables.itertuples()]



In [None]:
astore = conn.CASTable('gb_astore', caslib = "public")
model = register_model(astore, 'gb_swat', 'WebinarBrHmeq') #force = True to create the project

In [None]:
# Publicar o modelo para scoragem em tempo real
module = publish_model(model, 'maslocal')


In [None]:
first_rows = tbl.head(10)

In [None]:
# Enviando uma linha para MAS e rebendo a predição.
result = module.score(first_rows.iloc[8])
print(result)

In [None]:
s.delete()

In [None]:
conn.terminate()

## Modelo Python

In [None]:
import pandas as pd
from sasctl import Session, register_model, publish_model
from sklearn.linear_model import LogisticRegression


In [None]:
# Load the Iris data set and split into features and target.
df = pd.read_csv('https://support.sas.com/documentation/onlinedoc/viya/exampledatasets/iris.csv')
df.columns = df.columns.str.replace(' ', '_')
df.drop(["Index"], axis=1, inplace = True)

X = df.drop('Species', axis=1)
y = df.Species.astype('category')

In [None]:
df.head(10)

In [None]:
# Fit a sci-kit learn model
model = LogisticRegression()

In [None]:
model.fit(X, y)

In [None]:

# Establish a session with Viya
Session("https://my-viya-server.com", 
            username  = "username",
            password = getpass.getpass())


In [None]:
model_name = 'IrisLogRegression'

# Register the model in Model Manager
register_model(model,
                   model_name,
                   input=X,         # Use X to determine model inputs
                   project='IrisProject',  # Register in "Iris" project
                   force=True)      # Create project if it doesn't exist

In [None]:
# Publish the model to the real-time scoring engine
module = publish_model(model_name, 'maslocal')

In [None]:
# Select the first row of training data
x = X.iloc[100, :] #

# Call the published module and score the record
result = module.predict(x)
print(result)

In [None]:
# deletando modulo (publicação)

from sasctl import delete
delete("/microanalyticScore/modules/irislogregression_338e183c7aca42")