# explore Google Anomaly detection solution

# required packages

In [None]:
# install/upgrade bigquery client
!pip install --upgrade 'google-cloud-bigquery[bqstorage,pandas]'

In [None]:
!pip3 install --user google-cloud-secret-manager==2.5.0

In [None]:
!python -m pip install ocs_sample_library_preview

# functions

In [263]:
# Import the Secret Manager client library.
from google.cloud import bigquery
from google.cloud import secretmanager
import json
import pandas as pd

In [None]:
# access secret manager

def create_secret(project_id,secret_id):
    # Create the Secret Manager client.
    client = secretmanager.SecretManagerServiceClient()

    # Build the resource name of the parent project.
    parent = f"projects/{project_id}"

    # Build a dict of settings for the secret
    #secret = secret

    # Create the secret
    response = client.create_secret(
        request={
            "parent": parent,
            "secret_id": secret_id,
            "secret": {"replication": {"automatic": {}}}
        }
    )

    # Print the new secret name.
    print(f'Created secret: {response.name}')      

def add_secret(project_id,secret_id,secret):
    
    client = secretmanager.SecretManagerServiceClient()

    # Build the resource name of the parent project.
    parent = f"projects/{project_id}/secrets/{secret_id}"
    
    # str to bytes
    payload = secret.encode('UTF-8')
    
    # Add the secret version.
    version = client.add_secret_version(
        request={"parent": parent, "payload": {"data": payload}}
    )

def get_secret(project_id,secret_id,debug=0):

    client = secretmanager.SecretManagerServiceClient()
    
    # Build the resource name of the parent project.
    name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
    #name = "projects/326764451207/secrets/anom_detect/versions/latest"
    if debug: print(name)
    response = client.access_secret_version(request = {            
            "name": name
    })
    # Access the secret version.
    #response = client.access_secret_version(request={"name": version.name})
    return response.payload.data.decode('UTF-8')

def list_secret(project_id,secret_id):

    debug=1
    client = secretmanager.SecretManagerServiceClient()
    
    # Build the resource name of the parent project.
    name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
    #name = "projects/326764451207/secrets/anom_detect/versions/latest"
    if debug: print(name)
    response = client.access_secret_version(request = {            
            "name": name
    })
    # Access the secret version.
    #response = client.access_secret_version(request={"name": version.name})
    return response

def list_secrets(project_id):

    debug=1
    client = secretmanager.SecretManagerServiceClient()
    
    # Build the resource name of the parent project.
    parent = f"projects/{project_id}"
    #name = "projects/326764451207/secrets/anom_detect/versions/latest"
    if debug: print(parent)
    response = client.list_secrets(request = {            
            "parent": parent
    })
    # Access the secret version.
    #response = client.access_secret_version(request={"name": version.name})
    return response

# configuration

In [324]:
%run '../config.py'

In [315]:
# read configuration 
import os
fname = os.path.join("..","parameters.json")
config_function = json.load(open(fname))

# secret manager

In [None]:
!gcloud auth list

In [None]:
# https://codelabs.developers.google.com/codelabs/secret-manager-python#8
#client = secretmanager.SecretManagerServiceClient()
#client.list_secrets(request = {"parent": f"projects/{PROJECT_ID_ID}" })
#client.get_secret(request = {"name": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}"})
#client.list_secret_versions(request = {"parent": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}"})
#client.get_secret_version(request= {"name": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}/versions/latest"})
#client.access_secret_version(request = {"name": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}/versions/latest"})
#client.access_secret_version(request = {"name": "projects/326764451207/secrets/anom_detect/versions/1" })

In [None]:
create_secret(PROJECT_ID,secret_name)

In [247]:
add_secret(PROJECT_ID,secret_name,json.dumps(config_function))

In [None]:
list_secrets(PROJECT_ID)

In [248]:
configuration = json.loads(get_secret(PROJECT_ID,secret_name))

# read from Big Query ingress table

In [254]:
from google.cloud import bigquery

bqclient = bigquery.Client()

sql ="""SELECT * FROM `{}` LIMIT 10"""
query=sql.format(configuration['bigquery_table_ingress_test'])
#query=sql.format(configuration['bigquery_table_ingress_test'])

bq_result = bqclient.query(query)

In [255]:
pd = bq_result.to_dataframe()
pd

Unnamed: 0,assetId,tagName,eventTimeStamp,value,ingestionTimeStamp
0,Hydraulic test bench,Pressure1,2022-02-11 00:17:14+00:00,144.0,
1,Hydraulic test bench,Pressure1,2022-02-11 00:17:18+00:00,144.0,
2,Hydraulic test bench,Pressure1,2022-02-11 00:39:10+00:00,144.0,
3,Hydraulic test bench,Pressure1,2022-02-11 00:13:13+00:00,144.25,
4,Hydraulic test bench,Pressure1,2022-02-11 00:32:10+00:00,144.75,
5,Hydraulic test bench,Pressure1,2022-02-11 00:41:25+00:00,152.75,
6,Hydraulic test bench,Pressure1,2022-02-11 00:42:41+00:00,152.75,
7,Hydraulic test bench,Pressure1,2022-02-11 00:42:44+00:00,152.75,
8,Hydraulic test bench,Pressure1,2022-02-11 00:44:24+00:00,152.75,
9,Hydraulic test bench,Pressure1,2022-02-11 00:44:25+00:00,152.75,


# read data from ADH

In [None]:

import datetime
from io import StringIO
from ocs_sample_library_preview import * # default
import pandas as pd
import pprint
import urllib

In [197]:

def endpoint_request(baseClient, url):
    # add request headers
    headers = baseClient.sdsHeaders()
    headers['Accept-Encoding'] = "gzip" # set also by requests
    headers['Request-Timeout'] = "300"  # in seconds
    # query!
    return baseClient.request("get"
                              ,url
                              ,headers=headers)

def get_timerange(minute_period):
    """
    get_timerange - return a start and end timestamp for the preceding minute_period time range
    param: minute_period - time in minutes for the period 
    """
    ts = datetime.datetime.utcnow()
    # set time to most recent period minutes interval with offsite 00:00:00
    # care of google TODO: add ref.
    ts_end = ts - datetime.timedelta(
        minutes=ts.minute % minute_period, # remainder minutes after dividing by 5
        seconds=ts.second, # use timestamp time to clear seconds to zero
        microseconds=ts.microsecond # use timestamp time clear microseconds to zero
    )
    # check if we are in the current minute, we want the time 5 minutes ago
    if ts.minute == ts_end.minute:
        ts_end = ts_end - datetime.timedelta(minutes=minute_period)

    ts_start = ts_end - datetime.timedelta(minutes=minute_period)
    
    return { "start": ts_start, "end": ts_end }

def get_timerange_test():
    # setup variables for use in queries
    start_time = '2022-02-11T00:00:00Z'
    end_time = '2022-02-11T01:00:00Z'
    start_time = '2022-02-11T01:00:00Z'
    end_time = '2022-02-11T02:00:00Z'
    start_datetime = (datetime.datetime.strptime(start_time,"%Y-%m-%dT%H:%M:%SZ"))
    end_datetime = (datetime.datetime.strptime(end_time,"%Y-%m-%dT%H:%M:%SZ"))
    return { "start": start_time, "end": end_time }

In [None]:
# get configuration

try:
    parameters = json.loads(get_secret(PROJECT_ID,secret_name))
except Exception as e:
    print(f'get parameters error: ',{e})
    exit(1)

In [None]:

# range = get_timerange()  # once implemented
range = get_timerange_test()
print("start: " + range['start'] + "\n" + "end:   " +  range['end'])

In [None]:
# connect to OSIsoft Cloud Services
ocsClient = OCSClient("v1", parameters['tenant_id'], parameters['resource'], parameters['client_id'], parameters['client_secret'])

In [277]:
api_version = ocsClient.baseClient.api_version
baseUri = f'{ocsClient.uri}/api/{api_version}/Tenants/{ocsClient.tenant}'
baseNamespaceUri = f"{baseUri}/Namespaces/{parameters['namespace']}"
namespace_id = parameters['namespace']

In [198]:
options = "&form=csvh" # Note: code below removes header row, due to 'csvh' selection for additional pages
dataview = urllib.parse.quote(parameters['dataview'])
url = f"{baseNamespaceUri}/DataViews/{parameters['dataview']}/Data/Interpolated?startIndex={range['start']}&endIndex={range['end']}&interval={parameters['interval']}{options}"
response = endpoint_request(ocsClient.baseClient,url)
response_text = response.text

# paging assumes csv format only TODO support JSON paged responses
while response.links.get('next') is not None and (response.status_code == 200 or response.status_code == 204):
    response = endpoint_request(ocsClient.baseClient,response.links['next']['url'])
    response_text += '\r\n'
    response_text += response.text.split("\n",1)[1]  # remove header row

In [199]:
len(response_text)

4239401

# process dataset from ADH and write to BigQuery

In [200]:
if len(response_text) > 0:
    df = pd.read_csv(StringIO(response_text),index_col='Timestamp',parse_dates=True)
    df.rename(columns={"Field": "tagName","Timestamp": "eventTimeStamp"},inplace=True)
    df.index.rename("eventTimeStamp",inplace=True)

In [None]:
if len(response_text) > 0:
    #df = pd.read_csv(StringIO(response_text))
    df = pd.read_csv(StringIO(response_text),index_col='Timestamp',parse_dates=True)
    df.rename(columns={"Field": "tagName","Timestamp": "eventTimeStamp"},inplace=True)
    df.index.rename("eventTimeStamp",inplace=True)

In [201]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 61217 entries, 2022-02-11 00:00:00+00:00 to 2022-02-11 01:00:00+00:00
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   assetId  61217 non-null  object 
 1   tagName  61217 non-null  object 
 2   Value    61217 non-null  float64
dtypes: float64(1), object(2)
memory usage: 1.9+ MB


In [202]:
df.head()

Unnamed: 0_level_0,assetId,tagName,Value
eventTimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-11 00:00:00+00:00,Hydraulic test bench,Cooling_efficiency,47.202
2022-02-11 00:00:01+00:00,Hydraulic test bench,Cooling_efficiency,47.273
2022-02-11 00:00:02+00:00,Hydraulic test bench,Cooling_efficiency,47.25
2022-02-11 00:00:03+00:00,Hydraulic test bench,Cooling_efficiency,47.332
2022-02-11 00:00:04+00:00,Hydraulic test bench,Cooling_efficiency,47.213


In [203]:
# write response from ADH to BigQuery
client = bigquery.Client()
table_id = parameters['bigquery_table_ingress']
job_config = bigquery.LoadJobConfig(schema=[
    bigquery.SchemaField("assetId", "STRING"),
    bigquery.SchemaField("tagName", "STRING"),
])
job = client.load_table_from_dataframe(
    df, table_id, job_config=job_config
)
job.result()

LoadJob<project=aveva-gcp-accelerator-dev, location=US, id=d31d32a1-216c-41d9-ad27-de3d425ff722>

In [208]:
job.state

'DONE'

# auth

In [230]:
!gcloud auth list

E0327 00:14:28.326263269   31031 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


                        Credentialed Accounts
ACTIVE  ACCOUNT
*       326764451207-compute@developer.gserviceaccount.com
        anom-detect@aveva-gcp-accelerator-dev.iam.gserviceaccount.com

To set the active account, run:
    $ gcloud config set account `ACCOUNT`



# read inference results from Big Query

In [None]:
# load/verify magic commands
%load_ext google.cloud.bigquery

In [305]:
from google.cloud import bigquery

bqclient = bigquery.Client()

sql ="""SELECT timestamp_millis(cast(timestamp as int64)) as eventTimeStamp,* FROM `{}` LIMIT 10"""
query=sql.format(configuration['bigquery_table_egress'])
query=sql.format(configuration['bigquery_table_egress_test'])
#query=sql.format("aveva-gcp-accelerator-dev.Test.Test")
#query=sql.format("ind-ai-pdm-sln-dev.ind_ai_pdm_sln_dev_bq_streaming.smart_factory_streaming_results_v2")

bq_result = bqclient.query(query)
df = bq_result.to_dataframe()
df.drop(labels=['asset'],axis=1,inplace=True)
df

Unnamed: 0,eventTimeStamp,timestamp,tagName,value,stream_id
0,2022-01-17 01:06:46.471000+00:00,1642381606471,FC18HotWaterValve,0.0,10_12_2021_1634041111037
1,2022-01-17 01:06:46.471000+00:00,1642381606471,FC20Setpoint_error,3.984684153460437,10_12_2021_1634041111037
2,2022-01-17 01:06:46.471000+00:00,1642381606471,FC18HotWaterValve_error,3.985598949731135,10_12_2021_1634041111037
3,2022-01-17 01:06:46.471000+00:00,1642381606471,FC23Setpoint,0.0,10_12_2021_1634041111037
4,2022-01-17 01:06:46.471000+00:00,1642381606471,FC23Setpoint_error,3.984986202604096,10_12_2021_1634041111037
5,2022-01-17 01:06:46.471000+00:00,1642381606471,AHU4ColdWaterValve,10.0,10_12_2021_1634041111037
6,2022-01-17 01:06:46.471000+00:00,1642381606471,min_max_FC20Setpoint,0.0,10_12_2021_1634041111037
7,2022-01-17 01:06:46.471000+00:00,1642381606471,min_max_AHU4ColdWaterValve,10.0,10_12_2021_1634041111037
8,2022-01-17 01:06:46.471000+00:00,1642381606471,predicted_AHU4ColdWaterValve_error,24.929768771066524,10_12_2021_1634041111037
9,2022-01-17 01:06:46.471000+00:00,1642381606471,predicted_Panel2Power_error,24.926472403339712,10_12_2021_1634041111037


In [307]:
#pprint.pprint(json.loads(df.to_json(orient='records',date_format='iso')))
ocsClient.Streams.updateValues(namespace_id,'anomaly_detection_inference_test',df.to_json(orient='records',date_format='iso'))

# create AVEVA(tm) Data Hub type and stream for inference results

In [279]:
# create a type

# Dictionary to describe information about the type
thetype = {'sdstypecode': SdsTypeCode.Object, 
           'id': 'anomaly_detection_inference',
           'name': 'anomaly_detection_inference', 
           'description': 'gc anomaly detection model inference'}

# sds type properties
timestamp_property = SdsTypeProperty()
timestamp_property.Id = "eventTimestamp"
timestamp_property.SdsType = SdsType.fromJson({"SdsTypeCode": SdsTypeCode.DateTime.value})
timestamp_property.IsKey = True

channel_property =  SdsTypeProperty()
channel_property.Id = "timestamp"
channel_property.SdsType = SdsType.fromJson({"SdsTypeCode" : SdsTypeCode.Decimal.value})

channel1_property =  SdsTypeProperty()
channel1_property.Id = "tagName"
channel1_property.SdsType = SdsType.fromJson({"SdsTypeCode" : SdsTypeCode.String.value})

channel2_property =  SdsTypeProperty()
channel2_property.Id = "value"
channel2_property.SdsType = SdsType.fromJson({"SdsTypeCode" : SdsTypeCode.Double.value})

channel3_property =  SdsTypeProperty()
channel3_property.Id = "asset"
channel3_property.SdsType = SdsType.fromJson({"SdsTypeCode" : SdsTypeCode.String.value})

channel4_property =  SdsTypeProperty()
channel4_property.Id = "stream_id"
channel4_property.SdsType = SdsType.fromJson({"SdsTypeCode" : SdsTypeCode.String.value})

# sds type defintion
bearing = SdsType()
bearing.Id = thetype['id']
bearing.SdsTypeCode = thetype['sdstypecode']
bearing.Name = thetype['name']
bearing.Description=thetype['description']
bearing.Properties = [timestamp_property
                      ,channel_property
                      ,channel1_property
                      ,channel2_property
                      ,channel3_property
                      ,channel4_property]

bearing_type = ocsClient.Types.getOrCreateType(namespace_id, bearing)

In [288]:
stream_ad = SdsStream(id=f'anomaly_detection_inference_test',
            name=f'anomaly_detection_inference_test', 
            description=f'anomaly detection inference test', 
            type_id=f'anomaly_detection_inference')
stream = ocsClient.Streams.createOrUpdateStream(namespace_id,stream_ad)