In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime,date,timedelta,timezone
import pytz
import json

from tensorflow.keras.models import load_model
import joblib

from google.cloud import storage
from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud.exceptions import NotFound
from google.api_core.exceptions import BadRequest


print('Tensorflow Version: ' + tensorflow.__version__)


# Constant & Parameter Variable

In [None]:
#today='2023-07-08' # last record of training data to predict the first movement
today=''

input_sequence_length =60
output_sequence_length =5

projectId='pongthorn'

local_model_path="model\Incident_60To5_E150S15B32"

model_file='Incident_60To5_E150S15B32-M0122-0723.h5'
scaler_file='scaler_Incident_60To5_E150S15B32-M0122-0723.gz'
scalerPred_file='scaler_pred_Incident_60To5_E150S15B32-M0122-0723.gz'


model_id=model_file.split(".")[0]
print(model_id)

# BigQuery Setting

In [None]:
dataset_id="SMartDW"
table_data_id = f"{projectId}.{dataset_id}.daily_incident"
table_id = f"{projectId}.{dataset_id}.prediction_daily_incident"

print(table_id)
print(table_data_id)

client = bigquery.Client(project=projectId )

def load_data_bq(sql:str):
 query_result=client.query(sql)
 df=query_result.to_dataframe()
 return df


# Load Model  Configuration MetaData

# Load model and scaler

In [None]:
objectPaht=local_model_path
model_path=f"{objectPaht}/{model_file}"
scale_input_path=f"{objectPaht}/{scaler_file}"
scale_output_path=f"{objectPaht}/{scalerPred_file}"

print(model_path)
print(scale_input_path)
print(scale_output_path)

try:
    print("Model and Scaler Object Summary")
    x_model = load_model(model_path)
except Exception as ex:
    print(str(ex))
    raise Exception(str(ex)) 

try:
    print("Scaler Max-Min")
    x_scaler = joblib.load(scale_input_path)
    x_scalerPred=joblib.load(scale_output_path)

except Exception as ex:
    print(str(ex))
    raise Exception(str(ex))

print("=====================================================================================================")

print(x_model.summary())
#(max - min) / (X.max(axis=0) - X.min(axis=0))
print(f"max={x_scaler.data_max_} and min={x_scaler.data_min_} and scale={x_scaler.scale_}")
print(f"max={x_scalerPred.data_max_} and min={x_scalerPred.data_min_} and scale={x_scalerPred.scale_}")

# Declare and Initialize TS Model Variable

In [None]:
date_col='date'
prediction_col="count_incident"
feature_cols=[prediction_col]

nLastData=int(input_sequence_length)
# nLastData=int(input_sequence_length*1.5)

dt_imported=datetime.now()
# dt_imported=datetime.now(timezone.utc)
dtStr_imported=dt_imported.strftime("%Y-%m-%d %H:%M:%S")
print(dtStr_imported)


# Query Fin Data from BQ

In [None]:
print(f"Check Last Record as starting point to retrive  sequence {prediction_col} over the past {input_sequence_length} day")

In [None]:
lastDate=None
if today=='':
    sqlLastDate=f""" select max({date_col}) as LastDate  from `{table_data_id}` """

else:
    sqlLastDate=f""" 
    select {date_col} as LastDate  from `{table_data_id}` where {date_col}='{today}' order by datetime_imported desc limit 1
    """
print(sqlLastDate)

results = client.query(sqlLastDate)
dfLastDate=results.to_dataframe()
print(dfLastDate)
if dfLastDate.empty:
    print( f"Not found {prediction_col}  at {today}")
    exit()
    # return f"Not found {prediction_col} at {today}  "
else:
    lastDate=dfLastDate.iloc[0,0]
    today=lastDate.strftime('%Y-%m-%d')


print(f"Take incident the last {input_sequence_length} days to Forecast  over the next {output_sequence_length}  days at {today}")


In [None]:
print(f"Check whether {prediction_col} as {today} was predicted the future for the next {output_sequence_length} days")

sqlLastPred=f"""select prediction_date,pred_timestamp from `{table_id}` 
where prediction_date='{today}' order by pred_timestamp 
"""
print(sqlLastPred)
dfLastPred=load_data_bq(sqlLastPred)
if dfLastPred.empty==False:
   dfLastPred=dfLastPred.drop_duplicates(subset=['prediction_date'],keep='last') 
   print(f"{today} has been predicted {prediction_col}")
   print(dfLastPred)
   exit()
   # return f"Prediction price movement of {asset_name}-{prediction_col} at {today} has been predicted"
else:
       print(f"{today} has not been predicted {prediction_col} yet.") 
       print(f"The system is about to predict {prediction_col} shortly.") 
       print("=======================================================================================")

In [None]:
dayAgo=datetime.strptime(today,'%Y-%m-%d') +timedelta(days=-nLastData)
print(f"Get data from {dayAgo.strftime('%Y-%m-%d')} - {today} as input to forecast")

sql=f"""
SELECT  *  FROM `{table_data_id}`  
Where  {date_col} between  DATE_SUB({date_col} '{today}', INTERVAL {nLastData} DAY) 
and '{today}'  order by {date_col},datetime_imported
"""
print(sql)
query_result=client.query(sql)
df=query_result.to_dataframe()

df=df.drop_duplicates(subset=[date_col],keep='last')
df[date_col]=pd.to_datetime(df[date_col],format='%Y-%m-%d')
df.set_index(date_col,inplace=True)

print(df.info())
print(df.head())
print(df.tail())

if df.empty==True or len(df)<input_sequence_length:
    print(f"There is no enough data to make prediction during {dayAgo.strftime('%Y-%m-%d')} - {today}")
    exit()
    # return f"There is no enough data to make prediction during {dayAgo.strftime('%Y-%m-%d')} - {today}"

In [None]:
# import matplotlib.pyplot as plt
# import matplotlib.dates as mdates
# import seaborn as sns

# plt.subplots(1, 1, figsize = (20, 10),sharex=True)

# ax1 = plt.subplot(2, 1, 1)
# plt.plot(df[[prediction_col]])
# plt.ylabel(prediction_col)


# plt.show()

# Get only Feature( 1 Indicator) to Predict itself in the next N days

In [None]:
print(f"Get Feature to Predict : {prediction_col} ")
dfForPred=df[feature_cols]
#dfForPred=dfForPred.iloc[-(input_sequence_length+1):-1,:]
dfForPred=dfForPred.iloc[-input_sequence_length:,:]
print(dfForPred.info())
print(dfForPred.shape)

print(dfForPred.head(10))
print(dfForPred.tail(10))

# Make Pediction as Forecast

In [None]:
xUnscaled=dfForPred.values #print(xUnscaled.shape)
xScaled=x_scaler.transform(xUnscaled)
print(xScaled.shape)
# print(xScaled[-5:])


xScaledToPredict= xScaled.reshape(1,input_sequence_length,len(feature_cols))
print(xScaledToPredict.shape)

yPredScaled = x_model.predict(xScaledToPredict)
print(yPredScaled.shape, yPredScaled)

yPred = x_scalerPred.inverse_transform(yPredScaled).reshape(-1, 1)
print(yPred.shape, yPred)


print("============================Summary============================")
print(xUnscaled.shape)
print(yPred.shape)

print("============================Input============================")
print(xUnscaled)
print("============================Output============================")
print(yPred)



# Build Prediction Result Data

## Feature Data

In [None]:
print("Create indexes from Dataframe dfForPred")
dfFeature=pd.DataFrame(data= xUnscaled,columns=feature_cols,index=dfForPred.index)
dfFeature['type']='feature'
print(dfFeature.shape)
print(dfFeature.head())
print(dfFeature.tail())

## Forecast/Preidction Value Data

In [None]:
print("Create indexes by specifying output_sequence_length stating from get last record of DFFeature+1")
lastRowOfFeature=dfFeature.index.max()
firstRowofPrediction=lastRowOfFeature+timedelta(days=1)
datePred=pd.date_range(start=firstRowofPrediction,freq='d',periods=output_sequence_length)
print(datePred)

In [None]:
dfPrediction=pd.DataFrame(data= yPred,columns=[prediction_col],index=datePred)
dfPrediction['type']='prediction'
dfPrediction[prediction_col]=dfPrediction[prediction_col].round(0)
dfPrediction.index.name=date_col
print(dfPrediction.shape)
print(dfPrediction)


# Merge Feature and Prediction

In [None]:
dfFeaToPred=pd.concat([dfFeature,dfPrediction])
print(dfFeaToPred)

# Get Prepraed To ingest data into BQ , we have to create dataframe and convert to Json-Rowns

In [None]:
outputDF=pd.DataFrame(data=[ [today,dtStr_imported,model_id] ],columns=["prediction_date","pred_timestamp","model_id"])
print(outputDF.info())
outputDF

In [None]:
jsonOutput = json.loads(outputDF.to_json(orient = 'records'))
for item in jsonOutput:
    
    dataFeaToPred=dfFeaToPred.reset_index()[[date_col,prediction_col,'type']]
    dataFeaToPred[date_col]=dataFeaToPred[date_col].dt.strftime('%Y-%m-%d')
    print(dataFeaToPred)
    jsonFeaToPred= json.loads(dataFeaToPred.to_json(orient = 'records'))
    item["prediction_result"]=jsonFeaToPred
    
 
with open("incident_prediction.json", "w") as outfile:
    json.dump(jsonOutput, outfile)
jsonOutput

# Ingest Data to BigQuery 

In [None]:
try:
    table=client.get_table(table_id)
    print("Table {} already exists.".format(table_id))
except Exception as ex :
    print(str(ex))
#if error  please create table and other configuration as  bq_prediction.txt    



In [None]:
job_config = bigquery.LoadJobConfig(
schema=table.schema
)

In [None]:
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND  
#job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
job = client.load_table_from_json(jsonOutput,table_id, job_config = job_config)
if job.errors is not None:
    print(job.error_result)
    print(job.errors)
else:
    print(f"Import to bigquery successfully  {len(jsonOutput)} records")
    
#job_config.schema