# Modelowanie w SAS CAS i Python

Szablon do uzupełnienia podczas warsztatu

## Importy bibliotek

In [None]:
import sys
import os
import pandas as pd
import swat
from matplotlib import pyplot as plt

## Połączenie z CAS i ładowanie danych

In [None]:
conn = swat.CAS('server', 8777, 'student', 'Metadata0')

In [None]:
indata = conn.upload_file('/home/student/SGF20VIY/hmeq.csv', casOut=dict(caslib='casuser', name='HMEQ', replace=True))

In [None]:
indata.summary()
inputs_nominal = ['REASON', 'JOB']
inputs_interval = ['LOAN', 'MORTDUE', 'VALUE', 'YOJ', 'DEROG', 'DELINQ', 'CLAGE', 'NINQ', 'CLNO', 'DEBTINC']
target = 'BAD'

In [None]:
indata.dataPreprocess.impute(
    inputs=inputs_interval + inputs_nominal,
    casOut=dict(caslib='casuser', name='HMEQ', replace=True),
    methodInterval='MEAN',
    methodNominal='MODE',
    outVarsNamePrefix='',
    copyVars=['BAD']
)

In [None]:
conn.loadActionSet('sampling')
conn.sampling.srs(
    table=dict(caslib='casuser', name='HMEQ'),
    samppct=70,
    partind=True,
    output=dict(casout=dict(caslib='casuser', name='HMEQ', replace=True), copyVars='ALL')
)
indata.head()

In [None]:
conn.loadActionSet('decisionTree')
conn.decisionTree.gbtreeTrain(
    inputs=inputs_interval + inputs_nominal,
    nominals=inputs_nominal + [target],
    table=dict(caslib='casuser', name='HMEQ', where = '_PartInd_= 1'),
    target=target,
    casOut=dict(name='hmeq_gbtree', replace=True)
)

In [None]:
hmeq_gbtree_score = conn.decisionTree.gbtreeScore(
    table=dict(caslib='casuser', name='HMEQ', where='_PartInd_ = 0'),
    model=dict(caslib='casuser', name='hmeq_gbtree'),
    casout=dict(caslib='casuser', name='hmeq_gbtree_scored', replace=True),
    copyVars=target,
    encodename=True,
    assessonerow=True
)

In [None]:
hmeq_gbtree_score

In [None]:
conn.loadActionSet('Percentile')

prediction = 'P_BAD1'

hmeq_gbtree_asses = conn.percentile.assess(
    table=dict(caslib='casuser', name='hmeq_gbtree_scored'),
    inputs=prediction,
    casout=dict(caslib='casuser', name='hmeq_gbtree_assessed'),
    response=target,
    event='1'
)

In [None]:
hmeq_gbtree_asses

In [None]:
hmeq_gbtree_ROC = conn.CASTable(caslib='casuser', name='hmeq_gbtree_assessed_ROC')
hmeq_gbtree_Lift = conn.CASTable(caslib='casuser', name='hmeq_gbtree_assessed')


In [None]:
hmeq_gbtree_ROC.head()

In [None]:
hmeq_gbtree_ROC = hmeq_gbtree_ROC.to_frame()

In [None]:
hmeq_gbtree_Lift = hmeq_gbtree_Lift.to_frame()

In [None]:
plt.figure(figsize=(10, 10))
plt.plot(1-hmeq_gbtree_ROC['_Specificity_'], hmeq_gbtree_ROC['_Sensitivity_'], 'bo-', linewidth=3)
plt.plot(pd.Series(range(0, 11, 1)) / 10, pd.Series(range(0, 11, 1)) / 10, 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Krzywa ROC')
plt.show()

In [None]:
plt.figure(figsize=(10, 10))
plt.plot(hmeq_gbtree_Lift['_Depth_'], hmeq_gbtree_Lift['_CumLift_'], 'bo-', linewidth=3)
plt.xlabel('Depth')
plt.ylabel('Cumulative Lift')
plt.title('Krzywa liftu skumulowanego')
plt.show()

In [None]:
conn.close()