# BigQueryML example


### Scenario
Data flowing from PI Integrator to BigQuery table, create a model and operationalize
hydraulic dataset care of https://archive.ics.uci.edu/ml/datasets/Condition+monitoring+of+hydraulic+systems

***

## setup - tables/views
hydraulic - table - PI Integrator view writes data every 5 minutes<br>
hydraulicv - view - average each instance dataset to one row<br>
hydraulic_instances_cooler_condition - table - specify a window the defines the dataset shape (2205 instances) and filter out invalid labels<br>
hydraulic_instances_cooler_conditionv - view - based upon table with similar name, remove columns not required for modeling<br>

***

## configuration

In [4]:
# install/upgrade bigquery client
!pip install --upgrade 'google-cloud-bigquery[bqstorage,pandas]'

Collecting google-cloud-bigquery[bqstorage,pandas]
  Downloading google_cloud_bigquery-2.25.1-py2.py3-none-any.whl (200 kB)
[K     |████████████████████████████████| 200 kB 8.2 MB/s eta 0:00:01
Installing collected packages: google-cloud-bigquery
  Attempting uninstall: google-cloud-bigquery
    Found existing installation: google-cloud-bigquery 2.24.0
    Uninstalling google-cloud-bigquery-2.24.0:
      Successfully uninstalled google-cloud-bigquery-2.24.0
Successfully installed google-cloud-bigquery-2.25.1


In [66]:
# load magic commands
%load_ext google.cloud.bigquery

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


In [67]:
!export GOOGLE_APPLICATION_CREDENTIALS="aveva-gcp-accelerator-dev.json"

In [68]:
# query table being populated from PI with PI Integrator (verify connection)
%%bigquery
select *
from `aveva-gcp-accelerator-dev.hydraulic.hydraulic`
limit 1

Query complete after 0.00s: 100%|██████████| 2/2 [00:00<00:00, 924.77query/s]                         
Downloading: 100%|██████████| 1/1 [00:01<00:00,  1.51s/rows]


Unnamed: 0,Hydraulic_Test_Rig,TimeStamp,Cooler_Condition,Cooling_Efficiency,Cooling_Power,Efficiency_Factor,Hydraulic_Accumulator,Internal_Pump_Leakage,Motor_Power,Pressure1,...,Temperature1,Temperature2,Temperature3,Temperature4,Valve_Condition,Vibration,Volume_Flow1,Volume_Flow2,PIIntTSTicks,PIIntShapeID
0,Hydraulic Test Rig,2021-08-31 14:42:16+00:00,100.0,46.737,2.823,68.185,90.0,0.0,2295.63,144.96,...,41.023,47.211,45.238,34.363,100.0,0.58,7.9,9.62,637660177360000000,1


***

### create supporting tables and views to create model

In [117]:
%%bigquery
# create a table with the source dataset (2205 instances) as input to build a model
create table if not exists `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_condition`
options (
    description='table contains all instances from original dataset with valid cooler condition values'
) as 
select *
from  `aveva-gcp-accelerator-dev.hydraulic.hydraulicv`
where (cycle >= timestamp "2021-08-28"
       and 
       cycle < timestamp_add(timestamp "2021-08-28", interval 2205 minute))
and Cooler_Condition in (1,3,20,100) /* clean-up dataset to remove invalid labels*/

Executing query with job ID: c2ebb43f-fa83-487f-af4c-2adda2d38a0a
Query executing: 0.40s


ERROR:
 409 Already Exists: Table aveva-gcp-accelerator-dev:hydraulic.hydraulic_instances_cooler_condition

(job ID: c2ebb43f-fa83-487f-af4c-2adda2d38a0a)

                                      -----Query Job SQL Follows-----                                       

    |    .    |    .    |    .    |    .    |    .    |    .    |    .    |    .    |    .    |    .    |
   1:# create a table with the source dataset (2205 instances) as input to build a model
   2:create table `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_condition`
   3:options (
   4:    description='table contains all instances from original dataset with valid cooler condition values'
   5:) as 
   6:select *
   7:from  `aveva-gcp-accelerator-dev.hydraulic.hydraulicv`
   8:where (cycle >= timestamp "2021-08-28"
   9:       and 
  10:       cycle < timestamp_add(timestamp "2021-08-28", interval 2205 minute))
  11:and Cooler_Condition in (1,3,20,100) /* clean-up dataset */
    |    .    |    .    |    

In [119]:
%%bigquery df
# verify dataset
select *
from `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_condition`

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 472.49query/s]                          
Downloading: 100%|██████████| 2201/2201 [00:01<00:00, 1347.04rows/s]


In [121]:
df.head()

Unnamed: 0,count,cycle_id,cycle,Cooler_Condition,Cooling_Efficiency,Cooling_Power,Efficiency_Factor,Hydraulic_Accumulator,Internal_Pump_Leakage,Motor_Power,...,Pressure6,Stable,Temperature1,Temperature2,Temperature3,Temperature4,Valve_Condition,Vibration,Volume_Flow1,Volume_Flow2
0,60,280058.0,2021-08-28 00:58:00+00:00,100.0,27.273033,2.119633,59.835233,90.0,0.0,2384.1335,...,8.457833,0.0,54.8337,59.8335,56.918067,48.223683,100.0,0.61165,6.572,9.045167
1,60,280318.0,2021-08-28 03:18:00+00:00,100.0,26.508517,1.721867,59.537483,90.0,0.0,2452.101833,...,8.933333,0.0,46.5923,51.574217,48.744,42.096033,100.0,0.703767,6.641167,9.6145
2,60,280355.0,2021-08-28 03:55:00+00:00,100.0,26.63885,1.725717,59.851333,90.0,0.0,2441.127,...,8.919667,0.0,46.445233,51.435767,48.664717,42.005183,100.0,0.601533,6.6385,9.614167
3,60,280519.0,2021-08-28 05:19:00+00:00,20.0,26.760883,1.701383,57.5319,130.0,1.0,2463.7595,...,8.965833,0.0,45.941567,50.780117,48.084133,41.538133,80.0,0.633033,6.456,9.646667
4,60,280728.0,2021-08-28 07:28:00+00:00,20.0,27.304317,1.72675,56.793583,115.0,1.0,2467.523333,...,9.035833,0.0,44.827717,49.782933,47.028633,40.413483,73.0,0.616317,6.4345,9.686833


In [127]:
%%bigquery
# create a view to remove columns not required for modeling
create or replace view `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_conditionv` as 
select * except (count,cycle_id,cycle)
from `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_condition`

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 1003.18query/s]


In [129]:
%%bigquery dfv
# view the view!
select * from `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_conditionv`

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 763.43query/s] 
Downloading: 100%|██████████| 2201/2201 [00:01<00:00, 1562.25rows/s]


In [130]:
dfv.head()

Unnamed: 0,Cooler_Condition,Cooling_Efficiency,Cooling_Power,Efficiency_Factor,Hydraulic_Accumulator,Internal_Pump_Leakage,Motor_Power,Pressure1,Pressure2,Pressure3,...,Pressure6,Stable,Temperature1,Temperature2,Temperature3,Temperature4,Valve_Condition,Vibration,Volume_Flow1,Volume_Flow2
0,100.0,27.273033,2.119633,59.835233,90.0,0.0,2384.1335,156.514,105.791833,1.741167,...,8.457833,0.0,54.8337,59.8335,56.918067,48.223683,100.0,0.61165,6.572,9.045167
1,100.0,26.508517,1.721867,59.537483,90.0,0.0,2452.101833,158.308667,107.323333,1.7765,...,8.933333,0.0,46.5923,51.574217,48.744,42.096033,100.0,0.703767,6.641167,9.6145
2,100.0,26.63885,1.725717,59.851333,90.0,0.0,2441.127,158.345,107.381167,1.787667,...,8.919667,0.0,46.445233,51.435767,48.664717,42.005183,100.0,0.601533,6.6385,9.614167
3,20.0,26.760883,1.701383,57.5319,130.0,1.0,2463.7595,158.278333,107.0315,1.7405,...,8.965833,0.0,45.941567,50.780117,48.084133,41.538133,80.0,0.633033,6.456,9.646667
4,20.0,27.304317,1.72675,56.793583,115.0,1.0,2467.523333,158.602333,106.915833,1.748833,...,9.035833,0.0,44.827717,49.782933,47.028633,40.413483,73.0,0.616317,6.4345,9.686833


***

### create a model and evaluate

In [131]:
%%bigquery 
create or replace model `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`
options (
    model_type='BOOSTED_TREE_CLASSIFIER'
    ,BOOSTER_TYPE = 'GBTREE'
    ,NUM_PARALLEL_TREE = 1
    /* ,MAX_INTERATIONS = 50 */
    /* ,TREE_METHOD = 'HIST' for larger datasets */
    ,EARLY_STOP = FALSE
    ,SUBSAMPLE = 0.8
    ,ENABLE_GLOBAL_EXPLAIN = TRUE /* for use with ML.global_explain */
    ,INPUT_LABEL_COLS = ['cooler_condition']) as
select * from `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_conditionv`

Query complete after 0.00s: 100%|██████████| 3/3 [00:00<00:00, 1661.77query/s]                        


In [145]:
%%bigquery
/* Confusion matrix
_____________________________________________________________
Actual result\test result | True           | False
-------------------------------------------------------------
True                      | True Positive  | False Negative
False                     | False Positive | True Negative
_____________________________________________________________
Recall – horizonal TP / (TP + FN) - completeness, not miss any failures
Accuracy – diagonal (TP + TN)/total - % correct
Precision – vertical TP / (TP + FP) - quality, minimize false alarms
*/
select *
from ML.EVALUATE(MODEL `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`)

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 525.67query/s]                          
Downloading: 100%|██████████| 1/1 [00:01<00:00,  1.67s/rows]


Unnamed: 0,precision,recall,accuracy,f1_score,log_loss,roc_auc
0,1.0,0.996764,0.997619,0.998379,0.013286,1.0


In [144]:
%%bigquery
select * 
from ML.TRAINING_INFO(MODEL `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`)

Query complete after 0.00s: 100%|██████████| 2/2 [00:00<00:00, 1306.43query/s]                        
Downloading: 100%|██████████| 20/20 [00:01<00:00, 13.97rows/s]


Unnamed: 0,training_run,iteration,loss,eval_loss,learning_rate,duration_ms
0,0,20,0.002123,0.013286,0.3,42
1,0,19,0.00261,0.013303,0.3,49
2,0,18,0.003103,0.013878,0.3,80
3,0,17,0.00391,0.014206,0.3,48
4,0,16,0.004973,0.014736,0.3,62
5,0,15,0.006395,0.015593,0.3,50
6,0,14,0.00813,0.017379,0.3,56
7,0,13,0.010614,0.019416,0.3,52
8,0,12,0.013962,0.022342,0.3,53
9,0,11,0.01844,0.026028,0.3,48


In [133]:
%%bigquery
select * 
from ML.GLOBAL_EXPLAIN(MODEL `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`)

Query complete after 0.00s: 100%|██████████| 3/3 [00:00<00:00, 1525.57query/s]                        
Downloading: 100%|██████████| 21/21 [00:01<00:00, 15.51rows/s]


Unnamed: 0,feature,attribution
0,Pressure2,1.085804
1,Temperature4,0.71069
2,Temperature2,0.500126
3,Temperature1,0.259551
4,Pressure1,0.116652
5,Hydraulic_Accumulator,0.111164
6,Pressure6,0.100679
7,Cooling_Efficiency,0.073101
8,Pressure3,0.044833
9,Temperature3,0.042381


### execute model inference

In [136]:
%%bigquery
# predict equipment status
select *
from ml.predict(MODEL `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`,
                (select * from `aveva-gcp-accelerator-dev.hydraulic.hydraulic_instances_cooler_conditionv` limit 1))

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 385.12query/s]                          
Downloading: 100%|██████████| 1/1 [00:01<00:00,  1.51s/rows]


Unnamed: 0,predicted_cooler_condition,predicted_cooler_condition_probs,Cooler_Condition,Cooling_Efficiency,Cooling_Power,Efficiency_Factor,Hydraulic_Accumulator,Internal_Pump_Leakage,Motor_Power,Pressure1,...,Pressure6,Stable,Temperature1,Temperature2,Temperature3,Temperature4,Valve_Condition,Vibration,Volume_Flow1,Volume_Flow2
0,100.0,"[{'label': 100.0, 'prob': 0.9978728294372559},...",100.0,27.273033,2.119633,59.835233,90.0,0.0,2384.1335,156.514,...,8.457833,0.0,54.8337,59.8335,56.918067,48.223683,100.0,0.61165,6.572,9.045167


# simulate operationalizing a model inference

In [146]:
%%bigquery 
# view latest record
select * 
from `aveva-gcp-accelerator-dev.hydraulic.hydraulicv`
order by cycle desc
limit 1

Query complete after 0.00s: 100%|██████████| 3/3 [00:00<00:00, 1257.03query/s]                        
Downloading: 100%|██████████| 1/1 [00:01<00:00,  1.31s/rows]


Unnamed: 0,count,cycle_id,cycle,Cooler_Condition,Cooling_Efficiency,Cooling_Power,Efficiency_Factor,Hydraulic_Accumulator,Internal_Pump_Leakage,Motor_Power,...,Pressure6,Stable,Temperature1,Temperature2,Temperature3,Temperature4,Valve_Condition,Vibration,Volume_Flow1,Volume_Flow2
0,1,12119.0,2021-09-01 21:19:00+00:00,20.0,27.891,1.754,65.243,115.0,2.0,2508.74,...,9.04,0.0,45.031,49.91,47.16,40.414,90.0,0.594,0.95,9.7


In [140]:
%%bigquery
# view most recent instance
select *
from ml.predict(MODEL `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`,
                (select * from `aveva-gcp-accelerator-dev.hydraulic.hydraulicv` order by cycle desc limit 1))

Query complete after 0.00s: 100%|██████████| 3/3 [00:00<00:00, 1216.09query/s]                        
Downloading: 100%|██████████| 1/1 [00:01<00:00,  1.58s/rows]


Unnamed: 0,predicted_cooler_condition,predicted_cooler_condition_probs,count,cycle_id,cycle,Cooler_Condition,Cooling_Efficiency,Cooling_Power,Efficiency_Factor,Hydraulic_Accumulator,...,Pressure6,Stable,Temperature1,Temperature2,Temperature3,Temperature4,Valve_Condition,Vibration,Volume_Flow1,Volume_Flow2
0,100.0,"[{'label': 100.0, 'prob': 0.998157799243927}, ...",1,10444.0,2021-09-01 04:44:00+00:00,100.0,20.717,1.58,69.982,90.0,...,8.55,0.0,53.414,57.91,54.902,48.57,100.0,0.629,0.9,9.26


## simulate inference and write results to prediction table

In [None]:
%%bigquery
# table to store inference results
CREATE TABLE IF NOT EXISTS
  `osi-pi-gcp-accelerator.hydraulic.hydraulic_predictions` ( timestamp timestamp,
    prediction STRUCT< stable FLOAT64,
    cooler float64,
    accumulator float64,
    pump_leakage float64,
    valve float64> );

In [142]:
%%bigquery
MERGE
  `aveva-gcp-accelerator-dev.hydraulic.hydraulic_predictions` P
USING
  (
  SELECT
    cycle AS timestamp,
    STRUCT(0.0,
      predicted_cooler_condition,
      0.0,
      0.0,
      0.0) AS prediction
  FROM
    ML.PREDICT(MODEL `aveva-gcp-accelerator-dev.hydraulic.hydraulic_model_xg_001`,
      (
      SELECT
        *
      FROM
        `aveva-gcp-accelerator-dev.hydraulic.hydraulicv`
      WHERE
        cycle_id < CAST(FORMAT_TIMESTAMP("%d%H%M",TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 MINUTE)) AS NUMERIC)
        AND cycle_id > CAST(FORMAT_TIMESTAMP("%d%H%M",TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 6 MINUTE)) AS NUMERIC))) AS model) T
ON
  P.timestamp = T.timestamp
  WHEN NOT MATCHED THEN INSERT (timestamp, prediction) VALUES (timestamp,prediction) 

Query complete after 0.00s: 100%|██████████| 6/6 [00:00<00:00, 3274.67query/s]                        


In [143]:
%%bigquery
# view inference results in output table
select *
from `aveva-gcp-accelerator-dev.hydraulic.hydraulic_predictions`
order by timestamp desc
limit 10

Query complete after 0.00s: 100%|██████████| 2/2 [00:00<00:00, 998.29query/s]                         
Downloading: 100%|██████████| 10/10 [00:01<00:00,  6.79rows/s]


Unnamed: 0,timestamp,prediction
0,2021-09-01 05:24:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
1,2021-09-01 05:23:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
2,2021-09-01 05:22:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
3,2021-09-01 05:21:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
4,2021-04-25 06:44:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
5,2021-04-25 06:43:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
6,2021-04-25 06:42:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
7,2021-04-25 06:41:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
8,2021-04-25 06:40:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."
9,2021-04-25 06:39:00+00:00,"{'stable': 0.0, 'cooler': 100.0, 'accumulator'..."


***