In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime,date,timedelta,timezone
import pytz
import json

from tensorflow.keras.models import load_model
import joblib

from google.cloud import storage
from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud.exceptions import NotFound
from google.api_core.exceptions import BadRequest


print('Tensorflow Version: ' + tensorflow.__version__)


# Constant & Parameter Variable

In [2]:
#today='2023-07-08' # last record of training data to predict the first movement
today=''

input_sequence_length =60
output_sequence_length =5

projectId='pongthorn'

local_model_path="model\Incident_60To5_E150S15B32"

model_file='Incident_60To5_E150S15B32-M0122-0723.h5'
scaler_file='scaler_Incident_60To5_E150S15B32-M0122-0723.gz'
scalerPred_file='scaler_pred_Incident_60To5_E150S15B32-M0122-0723.gz'


model_id=model_file.split(".")[0]
print(model_id)

Incident_60To5_E150S15B32-M0122-0723


# BigQuery Setting

In [3]:
dataset_id="SMartDW"
table_data_id = f"{projectId}.{dataset_id}.daily_incident"
table_id = f"{projectId}.{dataset_id}.prediction_daily_incident"

print(table_id)
print(table_data_id)

client = bigquery.Client(project=projectId )

def load_data_bq(sql:str):
 query_result=client.query(sql)
 df=query_result.to_dataframe()
 return df


pongthorn.SMartDW.prediction_daily_incident
pongthorn.SMartDW.daily_incident


# Load Model  Configuration MetaData

# Load model and scaler

In [4]:
objectPaht=local_model_path
model_path=f"{objectPaht}/{model_file}"
scale_input_path=f"{objectPaht}/{scaler_file}"
scale_output_path=f"{objectPaht}/{scalerPred_file}"

print(model_path)
print(scale_input_path)
print(scale_output_path)

try:
    print("Model and Scaler Object Summary")
    x_model = load_model(model_path)
except Exception as ex:
    print(str(ex))
    raise Exception(str(ex)) 

try:
    print("Scaler Max-Min")
    x_scaler = joblib.load(scale_input_path)
    x_scalerPred=joblib.load(scale_output_path)

except Exception as ex:
    print(str(ex))
    raise Exception(str(ex))

print("=====================================================================================================")

print(x_model.summary())
#(max - min) / (X.max(axis=0) - X.min(axis=0))
print(f"max={x_scaler.data_max_} and min={x_scaler.data_min_} and scale={x_scaler.scale_}")
print(f"max={x_scalerPred.data_max_} and min={x_scalerPred.data_min_} and scale={x_scalerPred.scale_}")

model\Incident_60To5_E150S15B32/Incident_60To5_E150S15B32-M0122-0723.h5
model\Incident_60To5_E150S15B32/scaler_Incident_60To5_E150S15B32-M0122-0723.gz
model\Incident_60To5_E150S15B32/scaler_pred_Incident_60To5_E150S15B32-M0122-0723.gz
Model and Scaler Object Summary
Scaler Max-Min
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 60)                14880     
                                                                 
 dropout_3 (Dropout)         (None, 60)                0         
                                                                 
 dense_3 (Dense)             (None, 5)                 305       
                                                                 
Total params: 15,185
Trainable params: 15,185
Non-trainable params: 0
_________________________________________________________________
None
max=[16.] and min=[0.] and 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


# Declare and Initialize TS Model Variable

In [5]:
date_col='date'
prediction_col="count_incident"
feature_cols=[prediction_col]

nLastData=int(input_sequence_length)
# nLastData=int(input_sequence_length*1.5)

dt_imported=datetime.now()
# dt_imported=datetime.now(timezone.utc)
dtStr_imported=dt_imported.strftime("%Y-%m-%d %H:%M:%S")
print(dtStr_imported)


2023-07-10 01:46:56


# Query Fin Data from BQ

In [6]:
print(f"Check Last Record as starting point to retrive  sequence {prediction_col} over the past {input_sequence_length} day")

Check Last Record as starting point to retrive  sequence count_incident over the past 60 day


In [7]:
lastDate=None
if today=='':
    sqlLastDate=f""" select max({date_col}) as LastDate  from `{table_data_id}` """

else:
    sqlLastDate=f""" 
    select {date_col} as LastDate  from `{table_data_id}` where {date_col}='{today}' order by datetime_imported desc limit 1
    """
print(sqlLastDate)

results = client.query(sqlLastDate)
dfLastDate=results.to_dataframe()
print(dfLastDate)
if dfLastDate.empty:
    print( f"Not found {prediction_col}  at {today}")
    exit()
    # return f"Not found {prediction_col} at {today}  "
else:
    lastDate=dfLastDate.iloc[0,0]
    today=lastDate.strftime('%Y-%m-%d')


print(f"Take incident the last {input_sequence_length} days to Forecast  over the next {output_sequence_length}  days at {today}")


 select max(date) as LastDate  from `pongthorn.SMartDW.daily_incident` 
    LastDate
0 2023-07-08
Take incident the last 60 days to Forecast  over the next 5  days at 2023-07-08


In [8]:
print(f"Check whether {prediction_col} as {today} was predicted the future for the next {output_sequence_length} days")

sqlLastPred=f"""select prediction_date,pred_timestamp from `{table_id}` 
where prediction_date='{today}' order by pred_timestamp 
"""
print(sqlLastPred)
dfLastPred=load_data_bq(sqlLastPred)
if dfLastPred.empty==False:
   dfLastPred=dfLastPred.drop_duplicates(subset=['prediction_date'],keep='last') 
   print(f"{today} has been predicted {prediction_col}")
   print(dfLastPred)
   exit()
   # return f"Prediction price movement of {asset_name}-{prediction_col} at {today} has been predicted"
else:
       print(f"{today} has not been predicted {prediction_col} yet.") 
       print(f"The system is about to predict {prediction_col} shortly.") 
       print("=======================================================================================")

Check whether count_incident as 2023-07-08 was predicted the future for the next 5 days
select prediction_date,pred_timestamp from `pongthorn.SMartDW.prediction_daily_incident` 
where prediction_date='2023-07-08' order by pred_timestamp 

2023-07-08 has been predicted count_incident
  prediction_date            pred_timestamp
0      2023-07-08 2023-07-10 01:28:34+00:00


In [38]:
dayAgo=datetime.strptime(today,'%Y-%m-%d') +timedelta(days=-nLastData)
print(f"Get data from {dayAgo.strftime('%Y-%m-%d')} - {today} as input to forecast")

sql=f"""
SELECT  *  FROM `{table_data_id}`  
Where  {date_col} between  DATE_SUB({date_col} '{today}', INTERVAL {nLastData} DAY) 
and '{today}'  order by {date_col},datetime_imported
"""
print(sql)
query_result=client.query(sql)
df=query_result.to_dataframe()

df=df.drop_duplicates(subset=[date_col],keep='last')
df[date_col]=pd.to_datetime(df[date_col],format='%Y-%m-%d')
df.set_index(date_col,inplace=True)

print(df.info())
print(df.head())
print(df.tail())

if df.empty==True or len(df)<input_sequence_length:
    print(f"There is no enough data to make prediction during {dayAgo.strftime('%Y-%m-%d')} - {today}")
    exit()
    # return f"There is no enough data to make prediction during {dayAgo.strftime('%Y-%m-%d')} - {today}"

Get data from 2023-05-09 - 2023-07-08 as input to forecast

SELECT  *  FROM `pongthorn.SMartDW.daily_incident`  
Where  date between  DATE_SUB(date '2023-07-08', INTERVAL 60 DAY) 
and '2023-07-08'  order by date,datetime_imported

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 61 entries, 2023-05-09 to 2023-07-08
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   count_incident     61 non-null     float64       
 1   datetime_imported  61 non-null     datetime64[ns]
dtypes: datetime64[ns](1), float64(1)
memory usage: 1.4 KB
None
            count_incident          datetime_imported
date                                                 
2023-05-09             5.0 2023-07-09 15:04:27.770878
2023-05-10            10.0 2023-07-09 15:04:27.770878
2023-05-11             7.0 2023-07-09 15:04:27.770878
2023-05-12             1.0 2023-07-09 15:04:27.770878
2023-05-13             2.0 2023-07-09 1

In [None]:
# import matplotlib.pyplot as plt
# import matplotlib.dates as mdates
# import seaborn as sns

# plt.subplots(1, 1, figsize = (20, 10),sharex=True)

# ax1 = plt.subplot(2, 1, 1)
# plt.plot(df[[prediction_col]])
# plt.ylabel(prediction_col)


# plt.show()

# Get only Feature( 1 Indicator) to Predict itself in the next N days

In [None]:
print(f"Get Feature to Predict : {prediction_col} ")
dfForPred=df[feature_cols]
#dfForPred=dfForPred.iloc[-(input_sequence_length+1):-1,:]
dfForPred=dfForPred.iloc[-input_sequence_length:,:]
print(dfForPred.info())
print(dfForPred.shape)

print(dfForPred.head(10))
print(dfForPred.tail(10))

# Make Pediction as Forecast

In [None]:
xUnscaled=dfForPred.values #print(xUnscaled.shape)
xScaled=x_scaler.transform(xUnscaled)
print(xScaled.shape)
# print(xScaled[-5:])


xScaledToPredict= xScaled.reshape(1,input_sequence_length,len(feature_cols))
print(xScaledToPredict.shape)

yPredScaled = x_model.predict(xScaledToPredict)
print(yPredScaled.shape, yPredScaled)

yPred = x_scalerPred.inverse_transform(yPredScaled).reshape(-1, 1)
print(yPred.shape, yPred)


print("============================Summary============================")
print(xUnscaled.shape)
print(yPred.shape)

print("============================Input============================")
print(xUnscaled)
print("============================Output============================")
print(yPred)



# Build Prediction Result Data

## Feature Data

In [21]:
print("Create indexes from Dataframe dfForPred")
dfFeature=pd.DataFrame(data= xUnscaled,columns=feature_cols,index=dfForPred.index)
dfFeature['type']='feature'
print(dfFeature.shape)
print(dfFeature.head())
print(dfFeature.tail())

Create indexes from Dataframe dfForPred
(60, 2)
            count_incident     type
date                               
2023-05-10            10.0  feature
2023-05-11             7.0  feature
2023-05-12             1.0  feature
2023-05-13             2.0  feature
2023-05-14             0.0  feature
            count_incident     type
date                               
2023-07-04            13.0  feature
2023-07-05             5.0  feature
2023-07-06             5.0  feature
2023-07-07             1.0  feature
2023-07-08             0.0  feature


## Forecast/Preidction Value Data

In [22]:
print("Create indexes by specifying output_sequence_length stating from get last record of DFFeature+1")
lastRowOfFeature=dfFeature.index.max()
firstRowofPrediction=lastRowOfFeature+timedelta(days=1)
datePred=pd.date_range(start=firstRowofPrediction,freq='d',periods=output_sequence_length)
print(datePred)

Create indexes by specifying output_sequence_length stating from get last record of DFFeature+1
DatetimeIndex(['2023-07-09', '2023-07-10', '2023-07-11', '2023-07-12',
               '2023-07-13'],
              dtype='datetime64[ns]', freq='D')


In [23]:
dfPrediction=pd.DataFrame(data= yPred,columns=[prediction_col],index=datePred)
dfPrediction['type']='prediction'
dfPrediction[prediction_col]=dfPrediction[prediction_col].round(0)
dfPrediction.index.name=date_col
print(dfPrediction.shape)
print(dfPrediction)


(5, 2)
            count_incident        type
date                                  
2023-07-09             1.0  prediction
2023-07-10             6.0  prediction
2023-07-11             6.0  prediction
2023-07-12             6.0  prediction
2023-07-13             5.0  prediction


# Merge Feature and Prediction

In [24]:
dfFeaToPred=pd.concat([dfFeature,dfPrediction])
print(dfFeaToPred)

            count_incident        type
date                                  
2023-05-10            10.0     feature
2023-05-11             7.0     feature
2023-05-12             1.0     feature
2023-05-13             2.0     feature
2023-05-14             0.0     feature
...                    ...         ...
2023-07-09             1.0  prediction
2023-07-10             6.0  prediction
2023-07-11             6.0  prediction
2023-07-12             6.0  prediction
2023-07-13             5.0  prediction

[65 rows x 2 columns]


# Get Prepraed To ingest data into BQ , we have to create dataframe and convert to Json-Rowns

In [25]:
outputDF=pd.DataFrame(data=[ [today,dtStr_imported,model_id] ],columns=["prediction_date","pred_timestamp","model_id"])
print(outputDF.info())
outputDF

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   prediction_date  1 non-null      object
 1   pred_timestamp   1 non-null      object
 2   model_id         1 non-null      object
dtypes: object(3)
memory usage: 152.0+ bytes
None


Unnamed: 0,prediction_date,pred_timestamp,model_id
0,2023-07-08,2023-07-10 01:28:34,Incident_60To5_E150S15B32-M0122-0723


In [26]:
jsonOutput = json.loads(outputDF.to_json(orient = 'records'))
for item in jsonOutput:
    
    dataFeaToPred=dfFeaToPred.reset_index()[[date_col,prediction_col,'type']]
    dataFeaToPred[date_col]=dataFeaToPred[date_col].dt.strftime('%Y-%m-%d')
    print(dataFeaToPred)
    jsonFeaToPred= json.loads(dataFeaToPred.to_json(orient = 'records'))
    item["prediction_result"]=jsonFeaToPred
    
 
with open("incident_prediction.json", "w") as outfile:
    json.dump(jsonOutput, outfile)
jsonOutput

          date  count_incident        type
0   2023-05-10            10.0     feature
1   2023-05-11             7.0     feature
2   2023-05-12             1.0     feature
3   2023-05-13             2.0     feature
4   2023-05-14             0.0     feature
..         ...             ...         ...
60  2023-07-09             1.0  prediction
61  2023-07-10             6.0  prediction
62  2023-07-11             6.0  prediction
63  2023-07-12             6.0  prediction
64  2023-07-13             5.0  prediction

[65 rows x 3 columns]


[{'prediction_date': '2023-07-08',
  'pred_timestamp': '2023-07-10 01:28:34',
  'model_id': 'Incident_60To5_E150S15B32-M0122-0723',
  'prediction_result': [{'date': '2023-05-10',
    'count_incident': 10.0,
    'type': 'feature'},
   {'date': '2023-05-11', 'count_incident': 7.0, 'type': 'feature'},
   {'date': '2023-05-12', 'count_incident': 1.0, 'type': 'feature'},
   {'date': '2023-05-13', 'count_incident': 2.0, 'type': 'feature'},
   {'date': '2023-05-14', 'count_incident': 0.0, 'type': 'feature'},
   {'date': '2023-05-15', 'count_incident': 7.0, 'type': 'feature'},
   {'date': '2023-05-16', 'count_incident': 7.0, 'type': 'feature'},
   {'date': '2023-05-17', 'count_incident': 2.0, 'type': 'feature'},
   {'date': '2023-05-18', 'count_incident': 11.0, 'type': 'feature'},
   {'date': '2023-05-19', 'count_incident': 7.0, 'type': 'feature'},
   {'date': '2023-05-20', 'count_incident': 1.0, 'type': 'feature'},
   {'date': '2023-05-21', 'count_incident': 3.0, 'type': 'feature'},
   {'date

# Ingest Data to BigQuery 

In [27]:
try:
    table=client.get_table(table_id)
    print("Table {} already exists.".format(table_id))
except Exception as ex :
    print(str(ex))
#if error  please create table and other configuration as  bq_prediction.txt    



Table pongthorn.SMartDW.prediction_daily_incident already exists.


In [28]:
job_config = bigquery.LoadJobConfig(
schema=table.schema
)

In [29]:
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND  
#job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
job = client.load_table_from_json(jsonOutput,table_id, job_config = job_config)
if job.errors is not None:
    print(job.error_result)
    print(job.errors)
else:
    print(f"Import to bigquery successfully  {len(jsonOutput)} records")
    
#job_config.schema

Import to bigquery successfully  1 records
