In [2]:
from google.cloud import bigquery

import pandas as pd
import numpy as np
from datetime import date,datetime,timedelta,timezone

import math
import os

import tensorflow as tf
import tensorflow_decision_forests as tfdf  # constantly registered to load model 
print(tf.__version__)
print(tfdf.__version__)

2.12.0
1.4.0


# Load Configuration Data and Constant Variable

In [17]:
# import functions_framework
# @functions_framework.http
# def demo_gbt_tf_predict_incident_severity(request):

all_prediction=False
is_evaluation=False


_model="demo_binary_gbt_tf_model"
model_version=f'{_model}_demo_t150723'

model_gs_path=f"gs://demo-tf-incident-pongthorn/{_model}"


projectId="pongthorn"
dataset_id="SMartML"
data_table="new2_incident"
prediction_table="new2_result_binary_prediction_incident"

unusedCols_unseen=['id','severity_name','imported_at']

# Get today's date
prediction_datetime=datetime.now(timezone.utc)
today_str=prediction_datetime.strftime("%Y-%m-%d")
today=datetime.strptime(today_str,"%Y-%m-%d")
print(f"Prediction at {prediction_datetime} for {today_str} ({today})")
      
print(model_gs_path)
print(f"Data: {data_table} and Prediction: {prediction_table}")    

Prediction at 2023-08-12 06:48:16.470270+00:00 for 2023-08-12 (2023-08-12 00:00:00)
gs://demo-tf-incident-pongthorn/demo_binary_gbt_tf_model
Data: new2_incident and Prediction: new2_result_binary_prediction_incident


# BigQuery Configuration

In [18]:
client = bigquery.Client(project=projectId)
new_data_table_id=f"{projectId}.{dataset_id}.{data_table}"
predictResult_table_id=f"{projectId}.{dataset_id}.{prediction_table}"
print(new_data_table_id)
print(predictResult_table_id)

pongthorn.SMartML.new2_incident
pongthorn.SMartML.new2_result_binary_prediction_incident


In [19]:
try:
    client.get_table(predictResult_table_id)  # Make an API request.
    print("Predict Result Table {} already exists.".format(predictResult_table_id))
    
except Exception as ex:
    schema = [
    bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("prediction_item_date", "DATE", mode="REQUIRED"),    
    bigquery.SchemaField("label_binary_severity", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("pred_binary_severity", "INTEGER", mode="REQUIRED"),       
    bigquery.SchemaField("prediction_datetime", "DATETIME", mode="REQUIRED") ,
    bigquery.SchemaField("model_version", "STRING", mode="REQUIRED")     
    ]

    table = bigquery.Table(predictResult_table_id,schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(
    type_=bigquery.TimePartitioningType.DAY,field="prediction_item_date")
    
    table = client.create_table(table)  # Make an API request.
    
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )

Predict Result Table pongthorn.SMartML.new2_result_binary_prediction_incident already exists.


# Load unseen data(new incident) to Make Prediction

In [20]:
if int(all_prediction)==0:
    sql=f"""
    SELECT *  FROM `{new_data_table_id}` 
     WHERE DATE(imported_at) = '{today_str}'
     order by imported_at
    """
else:
    sql=f"""
    SELECT *  FROM `{new_data_table_id}` 
     order by imported_at
    """

print(sql)


query_result=client.query(sql)
df=query_result.to_dataframe()
if df.empty==True:
  print("no data to make prediction")  
  # return "no data to make prediction"
print(df.info())


    SELECT *  FROM `pongthorn.SMartML.new2_incident` 
     WHERE DATE(imported_at) = '2023-08-12'
     order by imported_at
    
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   id                  33 non-null     Int64         
 1   severity_id         33 non-null     Int64         
 2   severity_name       33 non-null     object        
 3   sla                 33 non-null     object        
 4   product_type        33 non-null     object        
 5   brand               33 non-null     object        
 6   service_type        33 non-null     object        
 7   incident_type       33 non-null     object        
 8   open_to_close_hour  33 non-null     float64       
 9   imported_at         33 non-null     datetime64[ns]
dtypes: Int64(2), datetime64[ns](1), float64(1), object(6)
memory usage: 2.8+ KB
None


# Build Unseen data by removing label and others

In [21]:
unseen =df.drop(columns=unusedCols_unseen)
print(unseen.info())
unseen.tail(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   severity_id         33 non-null     Int64  
 1   sla                 33 non-null     object 
 2   product_type        33 non-null     object 
 3   brand               33 non-null     object 
 4   service_type        33 non-null     object 
 5   incident_type       33 non-null     object 
 6   open_to_close_hour  33 non-null     float64
dtypes: Int64(1), float64(1), object(5)
memory usage: 2.0+ KB
None


Unnamed: 0,severity_id,sla,product_type,brand,service_type,incident_type,open_to_close_hour
28,4,24x7 4Hrs Response Time,Software,VMWare,Request,General Incident,155.833333
29,4,24x7 4Hrs Response Time,Software,Trend Micro,Request,General Incident,412.933333
30,4,24x7 4Hrs Resolution Time,Access Point,Cisco,Request,General Incident,33.016667
31,4,24x7 4Hrs Resolution Time,Access Point,Cisco,Request,General Incident,57.0
32,4,24x7 4Hrs Resolution Time,Access Point,Cisco,Request,General Incident,27.866667


# Convert dataframe to tensorflow dataset

In [22]:
unseen_ds= tfdf.keras.pd_dataframe_to_tf_dataset(unseen.drop(columns=['severity_id']))
print(unseen_ds)

<_PrefetchDataset element_spec={'sla': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'product_type': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'brand': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'service_type': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'incident_type': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'open_to_close_hour': TensorSpec(shape=(None,), dtype=tf.float64, name=None)}>


# Load Model

In [23]:
abc_model = tf.keras.models.load_model(model_gs_path)  
print(abc_model.summary())

[INFO 23-08-12 06:48:25.5962 UTC kernel.cc:1243] Loading model from path gs://demo-tf-incident-pongthorn/demo_binary_gbt_tf_model/assets/ with prefix 7a8644254fd943aa
[INFO 23-08-12 06:48:26.1430 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-08-12 06:48:26.1438 UTC kernel.cc:1075] Use fast generic engine


Model: "gradient_boosted_trees_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
Total params: 1
Trainable params: 0
Non-trainable params: 1
_________________________________________________________________
None


# Make prediction

In [24]:
predResultList=abc_model.predict(unseen_ds)
predServerityIDList=[]
for predResult in predResultList:
    _class= 1 if predResult[0]>=0.5 else 0  
    predServerityIDList.append(_class) #0=normal , 1=critical
    print(f"{predResult} : {_class}")

dfPred=pd.DataFrame(data=predServerityIDList,columns=["pred_binary_severity"])      
dfPred    

[0.9678189] : 1
[0.9335864] : 1
[0.9617086] : 1
[0.00885496] : 0
[0.79946023] : 1
[0.00855461] : 0
[0.97818965] : 1
[0.68895394] : 1
[0.64098704] : 1
[0.1665692] : 0
[0.3687919] : 0
[0.28544658] : 0
[0.24586609] : 0
[0.68773186] : 1
[0.42768547] : 0
[0.43371543] : 0
[0.01287257] : 0
[0.9745316] : 1
[0.6750108] : 1
[0.48936152] : 0
[0.7464763] : 1
[0.66087013] : 1
[0.6629097] : 1
[0.21891762] : 0
[0.32071224] : 0
[0.05414203] : 0
[0.04224585] : 0
[0.09265725] : 0
[0.01184955] : 0
[0.01144262] : 0
[0.03277632] : 0
[0.02245646] : 0
[0.03296293] : 0


2023-08-12 06:48:26.456390: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_5' with dtype string and shape [33]
	 [[{{node Placeholder/_5}}]]


Unnamed: 0,pred_binary_severity
0,1
1,1
2,1
3,0
4,1
5,0
6,1
7,1
8,1
9,0


# Map severity_id to label for actual value.
# Merge predicted value to main dataframe

In [25]:
def map_4to2_serverity(severity_id):
    if severity_id==1 or severity_id==2:
       return 1
    else:
       return 0 
df['label_binary_severity'] =df['severity_id'].apply(map_4to2_serverity)

dfPred
df=pd.concat([df,dfPred],axis=1)
df

Unnamed: 0,id,severity_id,severity_name,sla,product_type,brand,service_type,incident_type,open_to_close_hour,imported_at,label_binary_severity,pred_binary_severity
0,3325,2,Major,24x7 4Hrs Response Time,Server,HPE,Incident,General Incident,5.216667,2023-08-12 06:38:09.094373,1,1
1,3161,2,Major,24x7 4Hrs Response Time,Server,DELL,Incident,Network Card Failure,5.4,2023-08-12 06:38:09.094373,1,1
2,3056,2,Major,24x7 4Hrs Response Time,Server,DELL,Incident,Network Cable Failure,55.166667,2023-08-12 06:38:09.094373,1,1
3,2902,3,Minor,24x7 6Hrs Response Time,Server,HPE,Request,Configuration Change,546.9,2023-08-12 06:38:09.094373,0,0
4,3317,3,Minor,24x7 4Hrs Resolution Time,Server,Oracle,Incident,General Incident,214.6,2023-08-12 06:38:09.094373,0,1
5,3327,4,Cosmetic,24x7 4Hrs Response Time,Server,DELL,Request,General Incident,1.683333,2023-08-12 06:38:09.094373,0,0
6,3315,1,Critical,24x7 4Hrs Response Time,Storage,NetApp,Incident,Power Supply Failure,3.416667,2023-08-12 06:38:09.094373,1,1
7,3115,2,Major,24x7 4Hrs Response Time,Storage,NetApp,Incident,Hard Disk Drive Failure,3.3,2023-08-12 06:38:09.094373,1,1
8,3058,2,Major,24x7 4Hrs Response Time,Storage,NetApp,Incident,Hard Disk Drive Failure,3.083333,2023-08-12 06:38:09.094373,1,1
9,3067,2,Major,24x7 4Hrs Response Time,Storage,NetApp,Incident,Software,0.366667,2023-08-12 06:38:09.094373,1,0


In [26]:
# Evaluate model and Show Metric Report

In [27]:
if is_evaluation:
    from sklearn.metrics import confusion_matrix,classification_report
    className=list(set().union(list(df['pred_binary_severity'].unique()),list(df['label_binary_severity'].unique())))
    print(className)
    actualClass=[  f'actual-{x}' for x in  className]
    predictedlClass=[  f'pred-{x}' for x in className]
    y_true=list(df['label_binary_severity'])
    y_pred=list(df['pred_binary_severity'])
    cnf_matrix = confusion_matrix(y_true,y_pred)
    cnf_matrix

    # #index=actual , column=prediction
    cm_df = pd.DataFrame(cnf_matrix,
                         index = actualClass, 
                         columns = predictedlClass)
    print(cm_df)
    print(classification_report(y_true, y_pred, labels=className))

[0, 1]
          pred-0  pred-1
actual-0      17       2
actual-1       3      11
              precision    recall  f1-score   support

           0       0.85      0.89      0.87        19
           1       0.85      0.79      0.81        14

    accuracy                           0.85        33
   macro avg       0.85      0.84      0.84        33
weighted avg       0.85      0.85      0.85        33



# Transform data for Writing Prediction Result to BQ

In [18]:
df=df[['id','label_binary_severity','pred_binary_severity']]
df['prediction_item_date']=today
df['prediction_datetime']=datetime.now()
df['model_version']=model_version
print(df.tail())

      id  label_binary_severity  pred_binary_severity prediction_item_date  \
95  3191                      0                     0           2023-08-11   
96  3189                      0                     0           2023-08-11   
97  3216                      0                     0           2023-08-11   
98  3215                      0                     0           2023-08-11   
99  3172                      0                     0           2023-08-11   

          prediction_datetime                          model_version  
95 2023-08-11 16:27:42.602139  demo_binary_gbt_tf_model_demo_t150723  
96 2023-08-11 16:27:42.602139  demo_binary_gbt_tf_model_demo_t150723  
97 2023-08-11 16:27:42.602139  demo_binary_gbt_tf_model_demo_t150723  
98 2023-08-11 16:27:42.602139  demo_binary_gbt_tf_model_demo_t150723  
99 2023-08-11 16:27:42.602139  demo_binary_gbt_tf_model_demo_t150723  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['prediction_item_date']=today
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['prediction_datetime']=datetime.now()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['model_version']=model_version


# Load data to BQ

In [19]:
def loadDataFrameToBQ():
    # WRITE_TRUNCATE , WRITE_APPEND
    try:
        if all_prediction==1: 
            job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")
        else:
            job_config = bigquery.LoadJobConfig(write_disposition="WRITE_APPEND")

        job = client.load_table_from_dataframe(
            df, predictResult_table_id, job_config=job_config
        )
        job.result()  # Wait for the job to complete.
        print("Total Prediction ML ", len(df), "Imported bigquery successfully")

    except BadRequest as e:
        print("Bigquery Error\n")
        for e in job.errors:
            print('ERROR: {}'.format(e['message']))

try:
    loadDataFrameToBQ()
except Exception as ex:
    raise ex

Total Prediction ML  100 Imported bigquery successfully


In [None]:
#return f"Imported prediction result  successfully for IsAllPrediction={all_prediction}"