In [164]:
# https://www.tensorflow.org/decision_forests/tutorials/beginner_colab
# https://www.tensorflow.org/decision_forests/tutorials/predict_colab
# https://www.tensorflow.org/decision_forests/tutorials/automatic_tuning_colab
# https://www.tensorflow.org/decision_forests/tutorials/dtreeviz_colab
from google.cloud import aiplatform
from google.cloud import bigquery

import pandas as pd
import numpy as np

import math
import os

import tensorflow as tf
print(tf.__version__)

2.12.0


In [165]:
# ! pip install --upgrade tensorflow-decision-forests
import tensorflow_decision_forests as tfdf
print(tfdf.__version__)

1.3.0


# Variable to Configuration

In [166]:
projectId='pongthorn'
dataset_id='SMartML'

train_name='train2_incident'
test_name='test2_incident'

train_table_id=f"{projectId}.{dataset_id}.{train_name}"
test_tabel_id=f"{projectId}.{dataset_id}.{test_name}"
print(f"train-ds = {train_table_id}")
print(f"test-ds = {test_tabel_id}")

train-ds = pongthorn.SMartML.train2_incident
test-ds = pongthorn.SMartML.test2_incident


In [167]:
metric="accuracy"
model_gs_path="gs://demo-tuned-tf-incident-pongthorn/model_df_tf"

In [168]:
# unusedCols=['severity_id','severity_name','label_binary_severity','open_to_close_hour','response_to_resolved_hour']
# cateCols=['sla','product_type','brand','service_type','incident_type','range_open_to_close_hour','range_response_to_resolved_hour']
# numbericCols=[]

unusedCols=['severity_id','severity_name','label_binary_severity','range_open_to_close_hour','range_response_to_resolved_hour']
cateCols=['sla','product_type','brand','service_type','incident_type']
numbericCols=['open_to_close_hour','response_to_resolved_hour']


labelCol='label_multi_severity'
# labelCol='label_binary_severity'

# Load & Prepare Data

In [169]:
def load_ml_data(data_path):
 df=pd.read_csv(data_path)
 df =df.drop(columns=unusedCols)
 
 return df

def load_data_bq(sql:str):
 
 query_result=client.query(sql)
 df=query_result.to_dataframe()
 df =df.drop(columns=unusedCols)
 df[labelCol]=df[labelCol].astype('int') 
 df=df[[labelCol]+cateCols+numbericCols]   
  
 return df

In [170]:
client = bigquery.Client(project=projectId)

train=load_data_bq(f"SELECT * FROM {train_table_id}")
test=load_data_bq(f"SELECT * FROM {test_tabel_id}")

print(train.info())

print(test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2076 entries, 0 to 2075
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   label_multi_severity       2076 non-null   int64  
 1   sla                        2076 non-null   object 
 2   product_type               2076 non-null   object 
 3   brand                      2076 non-null   object 
 4   service_type               2076 non-null   object 
 5   incident_type              2076 non-null   object 
 6   open_to_close_hour         2076 non-null   float64
 7   response_to_resolved_hour  2076 non-null   float64
dtypes: float64(2), int64(1), object(5)
memory usage: 129.9+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 519 entries, 0 to 518
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   label_multi_severity       519 non-null    i

In [171]:
train.tail(5)

Unnamed: 0,label_multi_severity,sla,product_type,brand,service_type,incident_type,open_to_close_hour,response_to_resolved_hour
2071,1,24x7 4Hrs Response Time,Software,Trend Micro,Request,Software,2.0,1.916667
2072,2,24x7 6Hrs Resolution Time,Software,Trend Micro,Incident,Configuration Change,119.616667,119.033333
2073,2,24x7 4Hrs Response Time,Software,Trend Micro,Incident,General Incident,2247.85,2247.85
2074,2,24x7 6Hrs Resolution Time,Software,Trend Micro,Incident,General Incident,306.583333,306.2
2075,2,24x7 6Hrs Resolution Time,Software,Trend Micro,Incident,General Incident,21.883333,21.6


In [172]:
no_unseen=19
unseen=test.iloc[-no_unseen:,:]
print(unseen.info())
unseen

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 500 to 518
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   label_multi_severity       19 non-null     int64  
 1   sla                        19 non-null     object 
 2   product_type               19 non-null     object 
 3   brand                      19 non-null     object 
 4   service_type               19 non-null     object 
 5   incident_type              19 non-null     object 
 6   open_to_close_hour         19 non-null     float64
 7   response_to_resolved_hour  19 non-null     float64
dtypes: float64(2), int64(1), object(5)
memory usage: 1.3+ KB
None


Unnamed: 0,label_multi_severity,sla,product_type,brand,service_type,incident_type,open_to_close_hour,response_to_resolved_hour
500,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,1.183333,0.916667
501,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.55,0.416667
502,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.583333,0.566667
503,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.466667,0.416667
504,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.416667,0.4
505,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,Software,13.633333,0.833333
506,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.933333,0.933333
507,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.65,0.65
508,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.216667,0.2
509,2,24x7 4Hrs Response Time,Firewall,Palo Alto,Incident,General Incident,472.516667,404.85


In [173]:
test=test.iloc[0:len(test)-no_unseen,:]
print(test.info())
test.tail()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   label_multi_severity       500 non-null    int64  
 1   sla                        500 non-null    object 
 2   product_type               500 non-null    object 
 3   brand                      500 non-null    object 
 4   service_type               500 non-null    object 
 5   incident_type              500 non-null    object 
 6   open_to_close_hour         500 non-null    float64
 7   response_to_resolved_hour  500 non-null    float64
dtypes: float64(2), int64(1), object(5)
memory usage: 31.4+ KB
None


Unnamed: 0,label_multi_severity,sla,product_type,brand,service_type,incident_type,open_to_close_hour,response_to_resolved_hour
495,2,8x5 4Hrs Response Time,Software,Microsoft,Request,General Incident,16.0,13.5
496,2,8x5 4Hrs Response Time,Software,Microsoft,Incident,General Incident,0.366667,0.2
497,0,24x7 4Hrs Response Time,Firewall,Palo Alto,Request,Configuration Change,641.033333,638.466667
498,1,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,1.783333,0.016667
499,2,24x7 4Hrs Resolution Time,Firewall,Palo Alto,Incident,General Incident,0.616667,0.416667


# Build Model

In [174]:
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train, label=labelCol)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test, label=labelCol)

In [175]:
# Specify the model.
#model = tfdf.keras.RandomForestModel()
model=tfdf.keras.GradientBoostedTreesModel()
model.fit(x=train_ds)

Use /var/tmp/tmpzh9rkj8d as temporary training directory
Reading training dataset...


2023-06-27 08:01:32.494615: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_6' with dtype string and shape [2076]
	 [[{{node Placeholder/_6}}]]


Training dataset read in 0:00:00.224357. Found 2076 examples.
Training model...
Model trained in 0:00:00.383498
Compiling model...
Model compiled.


[INFO 23-06-27 08:01:33.0724 UTC kernel.cc:1242] Loading model from path /var/tmp/tmpzh9rkj8d/model/ with prefix fd0bbd9ee4f44aa6
[INFO 23-06-27 08:01:33.0944 UTC decision_forest.cc:660] Model loaded with 136 root(s), 6380 node(s), and 7 input feature(s).
[INFO 23-06-27 08:01:33.0953 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesGeneric" built
[INFO 23-06-27 08:01:33.0958 UTC kernel.cc:1074] Use fast generic engine
2023-06-27 08:01:33.110434: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype double and shape [2076]
	 [[{{node Placeholder/_4}}]]


<keras.callbacks.History at 0x7f27dbb318a0>

In [176]:
model.compile(metrics=[metric])
model.evaluate(test_ds)
# evaluation = model.evaluate(test_ds, return_dict=True)
for name, value in evaluation.items():
  print(f"{name}: {value:.4f}")

loss: 0.0000
accuracy: 0.7060


2023-06-27 08:01:33.352105: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_5' with dtype string and shape [500]
	 [[{{node Placeholder/_5}}]]


# Save Model

In [177]:
model.save(model_gs_path)



INFO:tensorflow:Assets written to: gs://demo-tuned-tf-incident-pongthorn/model_df_tf/assets


INFO:tensorflow:Assets written to: gs://demo-tuned-tf-incident-pongthorn/model_df_tf/assets


# Load Model

In [178]:
abc_model = tf.keras.models.load_model(model_gs_path)  

[INFO 23-06-27 08:02:17.7610 UTC kernel.cc:1242] Loading model from path gs://demo-tuned-tf-incident-pongthorn/model_df_tf/assets/ with prefix fd0bbd9ee4f44aa6
[INFO 23-06-27 08:02:18.1944 UTC decision_forest.cc:660] Model loaded with 136 root(s), 6380 node(s), and 7 input feature(s).
[INFO 23-06-27 08:02:18.1953 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesGeneric" built
[INFO 23-06-27 08:02:18.1958 UTC kernel.cc:1074] Use fast generic engine


# Make Prediction

In [179]:
unseen_ds= tfdf.keras.pd_dataframe_to_tf_dataset(unseen.drop(columns=[labelCol]))
unseen_ds

<_PrefetchDataset element_spec={'sla': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'product_type': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'brand': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'service_type': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'incident_type': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'open_to_close_hour': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'response_to_resolved_hour': TensorSpec(shape=(None,), dtype=tf.float64, name=None)}>

In [180]:
predResultList=abc_model.predict(unseen_ds)



2023-06-27 08:02:27.198756: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [19]
	 [[{{node Placeholder/_1}}]]


In [181]:
map_severity_to_class={0:4,1: 3, 2: 2, 3: 1}
for predResult in predResultList:
    _class=tf.argmax(predResult,-1).numpy()
    print(f"{predResult} : {_class} as severity#{map_severity_to_class[_class]}")   

[0.00961858 0.05841344 0.9206418  0.01132618] : 2 as severity#2
[0.00912646 0.03197109 0.94815576 0.01074669] : 2 as severity#2
[0.00951547 0.03333384 0.9459459  0.01120477] : 2 as severity#2
[0.01011193 0.03193929 0.9472127  0.010736  ] : 2 as severity#2
[0.01011193 0.03193929 0.9472127  0.010736  ] : 2 as severity#2
[0.00949452 0.08582594 0.8954805  0.00919898] : 2 as severity#2
[0.00961858 0.05841344 0.9206418  0.01132618] : 2 as severity#2
[0.00952273 0.03259631 0.94666773 0.01121331] : 2 as severity#2
[0.00906669 0.03499257 0.94522727 0.0107135 ] : 2 as severity#2
[0.00935035 0.09552983 0.8881262  0.00699364] : 2 as severity#2
[0.00698659 0.13805676 0.8480582  0.00689839] : 2 as severity#2
[0.01192594 0.2820107  0.694288   0.01177539] : 2 as severity#2
[0.00737353 0.16599399 0.8194053  0.00722716] : 2 as severity#2
[0.9037126  0.08144306 0.01039661 0.00444784] : 0 as severity#4
[0.9092407  0.07490288 0.0113813  0.00447505] : 0 as severity#4
[0.923732   0.06390007 0.00817693 0.0041