### Teradata Vantage Analytics Workshop Basic
Bring Your Own Model -- XGBoost

In [None]:
from sklearn import datasets
import pandas as pd
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn2pmml import sklearn2pmml
from sklearn import svm
from xgboost import XGBClassifier
import os
import time
from teradataml import *
import getpass as gp

### Connect to Database

In [None]:
td_context = create_context(host="tdprd.td.teradata.com", username="lc250058", password=gp.getpass(prompt='Password:'), logmech="LDAP")

Creating a TeradataML DataFrame 

In [None]:
train_df = DataFrame.from_query("select * FROM TRNG_TDU_TD01.bust_out_int")
train_df.shape

In [None]:
train_df.head(10)

In [None]:
type(train_df)

Moving the table to local Python

In [None]:
traid_pd = train_df.to_pandas()
traid_pd.head(10)

In [None]:
type(traid_pd)

In [None]:
traid_pd.dtypes

In [None]:
X = traid_pd[['days_since_lst_pymnt','num_pymnt_lst_60_days', 'num_pur_lst_60_days' ]]
y=traid_pd[['bustout1']]
X.head(10)

### Model Training
This step can run several minutes and will have warnings

In [None]:
#pipeline
pipeline = PMMLPipeline([("classifier", XGBClassifier())])
pipeline.fit(X, y.values.ravel())
sklearn2pmml(pipeline, "bustout_xgb_model.pmml", with_repr = True)

Create a table in your database to hold your models.

In [None]:
td_context.execute("DROP TABLE pmml_models;")

In [None]:
td_context.execute("CREATE SET TABLE pmml_models \
     (model_id VARCHAR(40) CHARACTER SET LATIN NOT CASESPECIFIC, \
      model BLOB(2097088000)) \
PRIMARY INDEX ( model_id );")

In [None]:
model_bytes = open("bustout_xgb_model.pmml", "rb").read()
td_context.execute("insert into pmml_models  (model_id, model) values(?,?)", 'bustout_xgb_model', model_bytes)

In [None]:
model_list = pd.read_sql("select * from pmml_models", td_context)

In [None]:
model_list

### Scoring Phase
Choosing a differente dataset to score

In [None]:
bustout_test = DataFrame.from_query("SELECT * FROM TRNG_TDU_TD01.bustout_test;")
bustout_test.head(10)

In [None]:
td_context.execute("DROP TABLE bustout_xgb_out;")

In [None]:
td_context.execute("CREATE TABLE bustout_xgb_out AS ( \
SELECT * FROM TRNG_BYOM.PMMLPredict( \
    ON (SELECT top 10000 * FROM TRNG_TDU_TD01.bustout_test) \
    ON (select * from pmml_models where model_id='bustout_xgb_model') DIMENSION \
    USING \
        Accumulate('acct_no') \
        ModelOutputFields ('probability(0)', 'probability(1)') \
) AS dt \
) WITH DATA;")

In [None]:
pd.set_option('display.max_colwidth', 80)
bustout_df = DataFrame.from_query("select * FROM bustout_xgb_out")
bustout_df.head(10)

**Update the table's prediction column with the probabiliy column's value.**

In [None]:
td_context.execute('UPDATE bustout_xgb_out SET prediction=0 WHERE "probability(0)" GT "probability(1)";')

In [None]:
td_context.execute('UPDATE bustout_xgb_out SET prediction=1 WHERE "probability(1)" GT "probability(0)";')

In [None]:
pd.set_option('display.max_colwidth', 80)
bustout_df = DataFrame.from_query("select * FROM bustout_xgb_out")
bustout_df.head(10)

In [None]:
td_context.execute("DROP TABLE bustout_xgb_accuracy;")

In [None]:
td_context.execute("CREATE MULTISET TABLE bustout_xgb_accuracy AS \
(SELECT t.acct_no, t.bustout, p.prediction FROM bustout_xgb_out p, TRNG_TDU_TD01.bustout_test t \
 WHERE t.acct_no = p.acct_no \
) WITH DATA;")

In [None]:
pd.set_option('display.max_colwidth', 80)
bustout_ac = DataFrame.from_query("select * FROM bustout_xgb_accuracy")
bustout_ac.head(10)

In [None]:
td_context.execute("ALTER table bustout_xgb_accuracy ADD bustout1 int;")

In [None]:
td_context.execute("UPDATE bustout_xgb_accuracy SET bustout1=1 WHERE bustout = 'Y';")

In [None]:
td_context.execute("UPDATE bustout_xgb_accuracy SET bustout1=0 WHERE bustout = 'N';")

In [None]:
pd.set_option('display.max_colwidth', 80)
bustout_ac = DataFrame.from_query("select * FROM bustout_xgb_accuracy")
bustout_ac.head(10)

**Model Accuracy**

In [None]:
bustout_accr = DataFrame.from_query("SELECT (SELECT count(acct_no)*1.00 FROM bustout_xgb_accuracy \
WHERE bustout1 = prediction) / (SELECT count(acct_no) \
FROM bustout_xgb_accuracy) AS PA;")
bustout_accr

In [None]:
# Remove context
remove_context()

Copyright 2022 Teradata. All rights reserved.