In [105]:
from google.cloud import bigquery
import pandas as pd
import json

from datetime import date,datetime,timedelta

In [106]:
table_id = "pongthorn.SMartML.new_incident"
predictResult_table_id="pongthorn.SMartML.new_result_prediction_incident"
mapping_file="incident_sevirity_to_class.json"
PATH_FOLDER_ARTIFACTS="model"
unUsedColtoPredict=['severity','id','severity_id','severity_name','imported_at']

In [107]:

with open(mapping_file, 'r') as json_file:
     map_sevirity_to_class= json.load(json_file)
print(map_sevirity_to_class)

{'Cosmatic': 0, 'Minor': 1, 'Major': 2, 'Critical': 3}


In [108]:
# Get today's date
prediction_datetime=datetime.now()

today = date.today()
# Yesterday date
yesterday = today - timedelta(days = 1)
str_today=today.strftime('%Y-%m-%d')
str_yesterday=yesterday.strftime('%Y-%m-%d')
print(f"Get data between {str_yesterday} to {str_today} to predict sevirity level")

Get data between 2023-03-27 to 2023-03-28 to predict sevirity level


In [109]:
def load_data_bq(sql:str):
 client_bq = bigquery.Client()
 query_result=client_bq.query(sql)
 df=query_result.to_dataframe()
 return df

In [110]:
sql=f"""
SELECT *  FROM `{table_id}` 
WHERE DATE(imported_at) >= '{str_yesterday}' and DATE(imported_at) < '{str_today}'  
LIMIT 3
"""

dfNewData=load_data_bq(sql)
dfNewData=dfNewData.drop_duplicates(subset=['id'],keep='first')

dfNewData.insert(2, 'severity', dfNewData['severity_name'].map(map_sevirity_to_class),True)


print(dfNewData.info())
dfNewData

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 0 to 2
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   id                         3 non-null      Int64         
 1   severity_id                3 non-null      Int64         
 2   severity                   3 non-null      int64         
 3   severity_name              3 non-null      object        
 4   sla                        3 non-null      object        
 5   product_type               3 non-null      object        
 6   brand                      3 non-null      object        
 7   service_type               3 non-null      object        
 8   incident_type              3 non-null      object        
 9   open_to_close_hour         3 non-null      float64       
 10  response_to_resolved_hour  3 non-null      float64       
 11  imported_at                3 non-null      datetime64[ns]
dtypes: Int64(2),

Unnamed: 0,id,severity_id,severity,severity_name,sla,product_type,brand,service_type,incident_type,open_to_close_hour,response_to_resolved_hour,imported_at
0,2491,3,1,Minor,24x7 4Hrs Resolution Time,Storage,NetApp,Request,Hard Disk Drive Failure,7.666667,7.666667,2023-03-27 05:27:08.968019
1,2516,3,1,Minor,24x7 4Hrs Resolution Time,Firewall,CheckPoint,Request,General Incident,24.483333,24.016667,2023-03-27 05:27:08.968019
2,2502,3,1,Minor,24x7 6Hrs Resolution Time,Software,SAPB1,Request,General Incident,313.0,313.0,2023-03-27 05:27:08.968019


In [111]:
model = tf.keras.models.load_model(PATH_FOLDER_ARTIFACTS)    
print(f"Load from {PATH_FOLDER_ARTIFACTS}")
print(model.summary())

Load from model
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 sla (InputLayer)               [(None, 1)]          0           []                               
                                                                                                  
 product_type (InputLayer)      [(None, 1)]          0           []                               
                                                                                                  
 brand (InputLayer)             [(None, 1)]          0           []                               
                                                                                                  
 service_type (InputLayer)      [(None, 1)]          0           []                               
                                                                            

In [117]:
pdPrediction=pd.DataFrame(columns=['_id','predict_severity','prob_severity'])

for  row_dict in dfNewData.to_dict(orient="records"):
      incident_id=row_dict['id']
      print(f"{incident_id} - {row_dict['severity']}({row_dict['severity_name']})") 
      for key_removed in unUsedColtoPredict:
       row_dict.pop(key_removed)
      # print(row_dict)  

      input_dict = {name: tf.convert_to_tensor([value]) for name, value in row_dict.items()}


      predictionResult = model.predict(input_dict)
      result_str=','.join([ str(prob) for prob in predictionResult[0]])  
      print(result_str)   

      prob = tf.nn.softmax(predictionResult)
      prob_pct=(100 * prob)  
      _class = tf.argmax(predictionResult,-1).numpy()[0]
      
      dictPrediction={'_id':incident_id, 'predict_severity':_class,'prob_severity':result_str} 
      pdPrediction =pd.concat([pdPrediction,pd.DataFrame.from_dict([dictPrediction])] )

      print(f"{prob_pct} %   as {_class}")     
      print("======================================================================================")
            
dfPredictData=pd.merge(dfNewData,pdPrediction,how='inner',left_on='id',right_on='_id')
dfPredictData=dfPredictData.drop(columns=['_id'])
dfPredictData['predict_severity']=dfPredictData['predict_severity'].astype('int')
dfPredictData=dfPredictData[['id','prob_severity','predict_severity','severity']]
dfPredictData['prediction_item_date']= datetime.strptime(str(yesterday), '%Y-%m-%d')
dfPredictData['prediction_datetime']=prediction_datetime

2491 - 1(Minor)
0.20702027,0.700546,0.08181793,0.01061574
[[23.030441 37.72574  20.320179 18.923645]] %   as 1
2516 - 1(Minor)
0.12305665,0.11055092,0.7646284,0.0017640598
[[20.952015 20.691626 39.7976   18.558765]] %   as 2
2502 - 1(Minor)
0.9542967,0.043510735,0.0021846048,7.928289e-06
[[46.01469  18.507475 17.758223 17.71961 ]] %   as 0


In [118]:
print(dfPredictData.info())
dfPredictData

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   id                    3 non-null      object        
 1   prob_severity         3 non-null      object        
 2   predict_severity      3 non-null      int32         
 3   severity              3 non-null      int64         
 4   prediction_item_date  3 non-null      datetime64[ns]
 5   prediction_datetime   3 non-null      datetime64[ns]
dtypes: datetime64[ns](2), int32(1), int64(1), object(2)
memory usage: 156.0+ bytes
None


Unnamed: 0,id,prob_severity,predict_severity,severity,prediction_item_date,prediction_datetime
0,2491,"0.20702027,0.700546,0.08181793,0.01061574",1,1,2023-03-27,2023-03-28 23:23:22.868127
1,2516,"0.12305665,0.11055092,0.7646284,0.0017640598",2,1,2023-03-27,2023-03-28 23:23:22.868127
2,2502,"0.9542967,0.043510735,0.0021846048,7.928289e-06",0,1,2023-03-27,2023-03-28 23:23:22.868127


In [120]:
#https://cloud.google.com/bigquery/docs/samples/bigquery-create-table#bigquery_create_table-python

try:
    client = bigquery.Client()
    client.get_table(predictResult_table_id)  # Make an API request.
    print("Predict Result Table {} already exists.".format(predictResult_table_id))
except Exception as ex:
    schema = [
    bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("prob_severity", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("predict_severity", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("severity", "INTEGER", mode="REQUIRED"),    
    bigquery.SchemaField("prediction_item_date", "DATETIME", mode="REQUIRED"),    
    bigquery.SchemaField("prediction_datetime", "DATETIME", mode="REQUIRED") 
    ]

    table = bigquery.Table(predictResult_table_id,schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(
    type_=bigquery.TimePartitioningType.DAY,field="prediction_item_date")
    
    table = client.create_table(table)  # Make an API request.
    
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )

Created table pongthorn.SMartML.new_result_prediction_incident


In [121]:
def loadDataFrameToBQ():
    try:
        job_config = bigquery.LoadJobConfig(
            write_disposition="WRITE_APPEND",
        )

        job = client.load_table_from_dataframe(
            dfPredictData, predictResult_table_id, job_config=job_config
        )
        job.result()  # Wait for the job to complete.
        print("Total Prediction ML ", len(dfPredictData), "Imported igquery successfully")

    except BadRequest as e:
        print("Bigquery Error\n")
        for e in job.errors:
            print('ERROR: {}'.format(e['message']))

try:
    loadDataFrameToBQ()
except Exception as ex:
    raise ex

Total Prediction ML  3 Imported igquery successfully
