# BYOM 

## 1. Desarrollo/Entrenamiento del modelo

In [1]:
import pandas as pd
import getpass

from teradataml import (
    create_context, 
    remove_context,
    get_context,
    get_connection,
    DataFrame,
    retrieve_byom,
    PMMLPredict,
    configure)

In [2]:
#username = input("Username: ")
#password = getpass.getpass("Password: ")
#val_db = input("VAL DB: ")
#byom_db = input("BYOM DB: ")

# configure byom/val installation
#configure.val_install_location = val_db
#configure.byom_install_location = byom_db

# by default we assume your are using your user database. change as required
database = "rb255002"

password = getpass.getpass("Password:")
host = "tdprd.td.teradata.com"
username = "rb255002"
#password = ""

# VAL DB: TRNG_XSP
# BYOM DB:TRNG_BYOM


context=create_context(host=host, username=username, database=database, password=password, logmech="TDNEGO")

Password: ··········


In [3]:
from xgboost import XGBClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline


train_pdf = DataFrame.from_query("""
SELECT 
    F.*, D.hasdiabetes 
FROM pima_patient_features F
JOIN pima_patient_diagnoses D
    ON F.patientid = D.patientid 
    WHERE F.patientid MOD 5 <> 0
""").to_pandas(all_rows=True)

features = ["NumTimesPrg", "Age", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns", "BMI", "DiPedFunc"]
target = "HasDiabetes"

# split data into X and y
X_train = train_pdf[features]
y_train = train_pdf[target]

model = Pipeline([('scaler', MinMaxScaler()),
                  ('xgb', XGBClassifier(eta=0.2, max_depth=6))])

model.fit(X_train, y_train)
print("Finished training")

Finished training


In [4]:
model

Pipeline(steps=[('scaler', MinMaxScaler()),
                ('xgb', XGBClassifier(eta=0.2, max_depth=6))])

In [5]:
from nyoka import xgboost_to_pmml

xgboost_to_pmml(pipeline=model, col_names=features, target_name=target, pmml_f_name="model.pmml")

## 2.  Carga del modelo

In [6]:
#model_file = os.path.join(os.path.dirname(teradataml.__file__), "data", "models", "iris_db_glm_model.pmml")
import  datetime
from teradataml import  save_byom 

In [7]:
model_file = "./model.pmml"
save_byom("xgb_model",
              model_file,
              "modelos",
              additional_columns={"Description": "XGBoost",
                                  "ModelSavedTime": datetime.datetime.now(),
                                  "ModelGeneratedDate":datetime.date.today(),
                                  "ModelGeneratedTime": datetime.time(hour=0,minute=5,second=45,microsecond=110)
                                  },
              schema_name="rb255002"
              )

Model is saved.


## 3. Consultar modelos

In [8]:
pd.options.display.max_colwidth = 250
pd.read_sql("SELECT TOP 2 * FROM rb255002.modelos", get_connection())

Unnamed: 0,model_id,model,Description,ModelSavedTime,ModelGeneratedDate,ModelGeneratedTime
0,xgb_model,"b'<?xml version=""1.0"" encoding=""UTF-8""?>\n<PMML xmlns=""http://www.dmg.org/PMML-4_4"" version=""4.4"">\n <Header copyright=""Copyright (c) 2018 Software AG"" description=""Default Description"">\n <Application name=""Nyoka"" version=""4.3.0""/>\n ...",XGBoost,2022-08-29 21:31:15.556957,2022-08-29,00:05:45.000110


In [9]:
modelo = retrieve_byom("xgb_model", table_name="modelos")
modelo

model_id,model
xgb_model,b'3C3F786D6C20766572...'


In [10]:
from teradataml import  list_byom

In [11]:
list_byom(table_name="modelos")
# list_byom(table_name="modelos", model_id="xgb_model")


                              model Description              ModelSavedTime ModelGeneratedDate ModelGeneratedTime
model_id                                                                                                         
xgb_model  b'3C3F786D6C20766572...'     XGBoost  2022-08-29 21:31:15.556957         2022/08/29    00:05:45.000110


## 4. Eliminar modelos

In [12]:
 from teradataml import delete_byom

In [13]:
#delete_byom(model_id="xgb_model", table_name="modelos")

## 5. Predecir -- modelos

In [14]:
df = DataFrame('{database}."taller_pima_patient_features"')
df

PatientId,NumTimesPrg,PlGlcConc,BloodP,SkinThick,TwoHourSerIns,BMI,DiPedFunc,Age
383,1,90,62,18,59,25.1,1.268,25
13,1,189,60,23,846,30.1,0.398,59
179,5,130,82,0,0,39.1,0.956,37
668,6,98,58,33,190,34.0,0.43,43
108,3,83,58,31,18,34.3,0.336,25
364,4,147,74,25,293,34.9,0.385,30
133,8,84,74,31,0,38.3,0.457,39
554,1,84,64,23,115,36.9,0.471,28
195,5,158,84,41,210,39.4,0.395,29
507,1,130,60,23,170,28.6,0.6920000000000001,21


In [15]:
configure.byom_install_location = "TRNG_BYOM"
pmml = PMMLPredict(
            modeldata=modelo,
            newdata=df,
            accumulate='PatientId')

In [16]:
pmml

############ STDOUT Output ############

   PatientId prediction                                                                                           json_report
0         36          0    {"probability_0":0.7762990235189258,"predicted_HasDiabetes":0,"probability_1":0.22370097648107423}
1        583          0   {"probability_0":0.9753213755679428,"predicted_HasDiabetes":0,"probability_1":0.024678624432057098}
2        377          0    {"probability_0":0.9573951271458577,"predicted_HasDiabetes":0,"probability_1":0.04260487285414232}
3        396          0   {"probability_0":0.9552131883176238,"predicted_HasDiabetes":0,"probability_1":0.044786811682376144}
4        579          1    {"probability_0":0.03812856541346432,"predicted_HasDiabetes":1,"probability_1":0.9618714345865357}
5        425          1     {"probability_0":0.04190902729728396,"predicted_HasDiabetes":1,"probability_1":0.958090972702716}
6        577          1       {"probability_0":0.292597230803771,"predicted_H

In [17]:

#df=""" SELECT * FROM TRNG_BYOM.PMMLPredict (
#    ON rb255002.taller_pima_patient_features AS InputTable
#    ON (SELECT * FROM modelos WHERE model_id='xgb_model') AS ModelTable DIMENSION
#    USING
#      Accumulate ('PatientID')
#) AS td;"""

#pd.read_sql(qry, get_connection())

# Moviendo las  predicciones desde Teradata

predicciones = DataFrame.from_query("""
     SELECT * FROM TRNG_BYOM.PMMLPredict (
        ON rb255002.taller_pima_patient_features AS InputTable
        ON (SELECT * FROM modelos WHERE model_id='xgb_model') AS ModelTable DIMENSION
    USING
          Accumulate ('PatientID')
    ) AS td""").to_pandas()
predicciones

Unnamed: 0,PatientId,prediction,json_report
0,273,0,"{""probability_0"":0.9805662801524387,""predicted_HasDiabetes"":0,""probability_1"":0.01943371984756128}"
1,699,0,"{""probability_0"":0.8027856013465714,""predicted_HasDiabetes"":0,""probability_1"":0.1972143986534286}"
2,325,0,"{""probability_0"":0.5190959793145888,""predicted_HasDiabetes"":0,""probability_1"":0.48090402068541116}"
3,568,0,"{""probability_0"":0.890689772275908,""predicted_HasDiabetes"":0,""probability_1"":0.10931022772409206}"
4,319,1,"{""probability_0"":0.14088944323861552,""predicted_HasDiabetes"":1,""probability_1"":0.8591105567613845}"
...,...,...,...
763,412,0,"{""probability_0"":0.973575287265374,""predicted_HasDiabetes"":0,""probability_1"":0.02642471273462598}"
764,496,0,"{""probability_0"":0.9763723557196795,""predicted_HasDiabetes"":0,""probability_1"":0.023627644280320537}"
765,31,1,"{""probability_0"":0.03865647693266128,""predicted_HasDiabetes"":1,""probability_1"":0.9613435230673387}"
766,696,1,"{""probability_0"":0.019206081873594294,""predicted_HasDiabetes"":1,""probability_1"":0.9807939181264057}"


In [18]:
# Sin Moverlas 
prediccionesTD = DataFrame.from_query("""
     SELECT * FROM TRNG_BYOM.PMMLPredict (
        ON rb255002.taller_pima_patient_features AS InputTable
        ON (SELECT * FROM modelos WHERE model_id='xgb_model') AS ModelTable DIMENSION
    USING
          Accumulate ('PatientID')
    ) AS td""")

In [19]:
prediccionesTD

PatientId,prediction,json_report
490,0,"{""probability_0"":0.9554923991091431,""predicted_HasDiabetes"":0,""probability_1"":0.044507600890856966}"
181,0,"{""probability_0"":0.8543539762482569,""predicted_HasDiabetes"":0,""probability_1"":0.14564602375174307}"
741,0,"{""probability_0"":0.9754018379437485,""predicted_HasDiabetes"":0,""probability_1"":0.024598162056251507}"
551,0,"{""probability_0"":0.9686045933091266,""predicted_HasDiabetes"":0,""probability_1"":0.031395406690873325}"
303,1,"{""probability_0"":0.1593008416928824,""predicted_HasDiabetes"":1,""probability_1"":0.8406991583071176}"
446,0,"{""probability_0"":0.9967667392499847,""predicted_HasDiabetes"":0,""probability_1"":0.0032332607500153033}"
499,0,"{""probability_0"":0.8822477782779066,""predicted_HasDiabetes"":0,""probability_1"":0.11775222172209342}"
78,1,"{""probability_0"":0.07018680078658923,""predicted_HasDiabetes"":1,""probability_1"":0.9298131992134108}"
686,0,"{""probability_0"":0.9349565561519769,""predicted_HasDiabetes"":0,""probability_1"":0.06504344384802309}"
528,0,"{""probability_0"":0.978538858801581,""predicted_HasDiabetes"":0,""probability_1"":0.021461141198419065}"


In [22]:
from teradataml import copy_to_sql
copy_to_sql(df = prediccionesTD, table_name = "prediccionDiabetes", primary_index="PatientId", if_exists="replace")

In [23]:
df = DataFrame('rb255002."prediccionDiabetes"')
df

PatientId,prediction,json_report
733,0,"{""probability_0"":0.9893440145028043,""predicted_HasDiabetes"":0,""probability_1"":0.010655985497195698}"
419,1,"{""probability_0"":0.18567191509646086,""predicted_HasDiabetes"":1,""probability_1"":0.8143280849035391}"
735,0,"{""probability_0"":0.8819371031576648,""predicted_HasDiabetes"":0,""probability_1"":0.11806289684233517}"
509,0,"{""probability_0"":0.9907120523302234,""predicted_HasDiabetes"":0,""probability_1"":0.009287947669776576}"
465,0,"{""probability_0"":0.9738572046383646,""predicted_HasDiabetes"":0,""probability_1"":0.026142795361635485}"
617,0,"{""probability_0"":0.9954461425152445,""predicted_HasDiabetes"":0,""probability_1"":0.004553857484755449}"
522,0,"{""probability_0"":0.920327476959031,""predicted_HasDiabetes"":0,""probability_1"":0.07967252304096893}"
629,0,"{""probability_0"":0.9982109609619734,""predicted_HasDiabetes"":0,""probability_1"":0.0017890390380266115}"
391,1,"{""probability_0"":0.07416757525241102,""predicted_HasDiabetes"":1,""probability_1"":0.925832424747589}"
145,0,"{""probability_0"":0.9815069597156939,""predicted_HasDiabetes"":0,""probability_1"":0.018493040284306095}"
