# explore Google Anomaly detection solution

# required packages

In [None]:
# install/upgrade bigquery client
!pip install --upgrade 'google-cloud-bigquery[bqstorage,pandas]'

In [None]:
!pip3 install --user google-cloud-secret-manager==2.5.0

In [None]:
!python -m pip install ocs_sample_library_preview

# functions

In [None]:
# Import the Secret Manager client library.
from google.cloud import bigquery
from google.cloud import secretmanager
import json

In [None]:
# access secret manager

def create_secret(project_id,secret_id):
    # Create the Secret Manager client.
    client = secretmanager.SecretManagerServiceClient()

    # Build the resource name of the parent project.
    parent = f"projects/{project_id}"

    # Build a dict of settings for the secret
    #secret = secret

    # Create the secret
    response = client.create_secret(
        request={
            "parent": parent,
            "secret_id": secret_id,
            "secret": {"replication": {"automatic": {}}}
        }
    )

    # Print the new secret name.
    print(f'Created secret: {response.name}')      

def add_secret(project_id,secret_id,secret):
    
    client = secretmanager.SecretManagerServiceClient()

    # Build the resource name of the parent project.
    parent = f"projects/{project_id}/secrets/{secret_id}"
    
    # str to bytes
    payload = secret.encode('UTF-8')
    
    # Add the secret version.
    version = client.add_secret_version(
        request={"parent": parent, "payload": {"data": payload}}
    )

def get_secret(project_id,secret_id,debug=0):

    client = secretmanager.SecretManagerServiceClient()
    
    # Build the resource name of the parent project.
    name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
    #name = "projects/326764451207/secrets/anom_detect/versions/latest"
    if debug: print(name)
    response = client.access_secret_version(request = {            
            "name": name
    })
    # Access the secret version.
    #response = client.access_secret_version(request={"name": version.name})
    return response.payload.data.decode('UTF-8')

def list_secret(project_id,secret_id):

    debug=1
    client = secretmanager.SecretManagerServiceClient()
    
    # Build the resource name of the parent project.
    name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
    #name = "projects/326764451207/secrets/anom_detect/versions/latest"
    if debug: print(name)
    response = client.access_secret_version(request = {            
            "name": name
    })
    # Access the secret version.
    #response = client.access_secret_version(request={"name": version.name})
    return response

def list_secrets(project_id):

    debug=1
    client = secretmanager.SecretManagerServiceClient()
    
    # Build the resource name of the parent project.
    parent = f"projects/{project_id}"
    #name = "projects/326764451207/secrets/anom_detect/versions/latest"
    if debug: print(parent)
    response = client.list_secrets(request = {            
            "parent": parent
    })
    # Access the secret version.
    #response = client.access_secret_version(request={"name": version.name})
    return response

# configuration

In [None]:
PROJECT_ID = "aveva-gcp-accelerator-dev"
PROJECT_ID_ID = "326764451207"
GCP_SVC_ACC = "anom-detect@aveva-gcp-accelerator-dev.iam.gserviceaccount.com"
GCP_SVC_ACC_KEY_FILE = "/home/jupyter/anomaly\ detection/../anom.json"
secret_name = "anom_detect_aveva"

In [None]:
config_function = {
        "prefix" : "adh_ad_"
        ,"client_id" : "e2e45a5b-4964-40e9-ac26-424c181b49c1"
        ,"client_secret" : "TupWb0MGpCr1N8EKGG151Fh6tX3e6HlRANoLo2rmrVU="
        ,"resource" : "https://uswe.datahub.connect.aveva.com"
        ,"tenant_id" : "cee3a3fd-aeb2-4950-80f5-4b72c77322b1"
        ,"namespace" : "ebd1fe8e-713b-416e-a811-f281200b847a"
        ,"namespace_name" : "Development"
        ,"dataview" : "hydraulic test bench"
        ,"interval" : "00.00:00:01"
        ,"bigquery_table_ingress" : "aveva-gcp-accelerator-dev.Test.dataset"
        ,"bigquery_table_egress" : ""
}

# secret manager

In [None]:
!gcloud auth list

In [None]:
# https://codelabs.developers.google.com/codelabs/secret-manager-python#8
#client = secretmanager.SecretManagerServiceClient()
#client.list_secrets(request = {"parent": f"projects/{PROJECT_ID_ID}" })
#client.get_secret(request = {"name": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}"})
#client.list_secret_versions(request = {"parent": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}"})
#client.get_secret_version(request= {"name": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}/versions/latest"})
#client.access_secret_version(request = {"name": f"projects/{PROJECT_ID_ID}/secrets/{secret_name}/versions/latest"})
#client.access_secret_version(request = {"name": "projects/326764451207/secrets/anom_detect/versions/1" })

In [None]:
create_secret(PROJECT_ID,secret_name)

In [None]:
add_secret(PROJECT_ID,secret_name,json.dumps(config_function))

In [None]:
list_secrets(PROJECT_ID)

In [None]:
configuration = json.loads(get_secret(PROJECT_ID,secret_name))

# read data from ADH

In [None]:

import datetime
from io import StringIO
from ocs_sample_library_preview import * # default
import pandas as pd
import pprint
import urllib

In [None]:

def endpoint_request(baseClient, url):
    # add request headers
    headers = baseClient.sdsHeaders()
    headers['Accept-Encoding'] = "gzip" # set also by requests
    headers['Request-Timeout'] = "300"  # in seconds
    # query!
    return baseClient.request("get"
                              ,url
                              ,headers=headers)

def get_timerange(minute_period):
    """
    get_timerange - return a start and end timestamp for the preceding minute_period time range
    param: minute_period - time in minutes for the period 
    """
    ts = datetime.datetime.utcnow()
    # set time to most recent period minutes interval with offsite 00:00:00
    # care of google TODO: add ref.
    ts_end = ts - datetime.timedelta(
        minutes=ts.minute % minute_period, # remainder minutes after dividing by 5
        seconds=ts.second, # use timestamp time to clear seconds to zero
        microseconds=ts.microsecond # use timestamp time clear microseconds to zero
    )
    # check if we are in the current minute, we want the time 5 minutes ago
    if ts.minute == ts_end.minute:
        ts_end = ts_end - datetime.timedelta(minutes=minute_period)

    ts_start = ts_end - datetime.timedelta(minutes=minute_period)
    
    return { "start": ts_start, "end": ts_end }

def get_timerange_test():
    # setup variables for use in queries
    start_time = '2022-02-11T00:00:00Z'
    end_time = '2022-02-11T01:00:00Z'
    start_datetime = (datetime.datetime.strptime(start_time,"%Y-%m-%dT%H:%M:%SZ"))
    end_datetime = (datetime.datetime.strptime(end_time,"%Y-%m-%dT%H:%M:%SZ"))
    return { "start": start_time, "end": end_time }

In [None]:
# get configuration

try:
    parameters = json.loads(get_secret(PROJECT_ID,secret_name))
except Exception as e:
    print(f'get parameters error: ',{e})
    exit(1)

In [None]:

# range = get_timerange()  # once implemented
range = get_timerange_test()
print("start: " + range['start'] + "\n" + "end:   " +  range['end'])

In [None]:
# connect to OSIsoft Cloud Services
ocsClient = OCSClient("v1", parameters['tenant_id'], parameters['resource'], parameters['client_id'], parameters['client_secret'])

In [None]:
api_version = ocsClient.baseClient.api_version
baseUri = f'{ocsClient.uri}/api/{api_version}/Tenants/{ocsClient.tenant}'
baseNamespaceUri = f"{baseUri}/Namespaces/{parameters['namespace']}"

In [None]:
options = "&form=csvh" # Note: code below removes header row, due to 'csvh' selection
dataview = urllib.parse.quote(parameters['dataview'])
url = f"{baseNamespaceUri}/DataViews/{parameters['dataview']}/Data/Interpolated?startIndex={range['start']}&endIndex={range['end']}&interval={parameters['interval']}{options}"
response = endpoint_request(ocsClient.baseClient,url)
response_text = response.text

# paging assumes csv format only TODO support JSON paged responses
while response.links.get('next') is not None and (response.status_code == 200 or response.status_code == 204):
    response = endpoint_request(ocsClient.baseClient,response.links['next']['url'])
    response_text += '\r\n'
    response_text += response.text.split("\n",1)[1]  # remove header row

# process dataset from ADH and write to BigQuery

In [None]:
if len(response_text) > 0:
    #df = pd.read_csv(StringIO(response_text))
    # nanoseconds is seven digits, not supported.
    df = pd.read_csv(StringIO(response_text),index_col='Timestamp',parse_dates=True)
    df.rename(columns={"Field": "tagName","Timestamp": "eventTimeStamp"},inplace=True)
    df.index.rename("eventTimeStamp",inplace=True)
# truncate 7th subsecond digit
#df.Timestamp = df.Timestamp.replace('.0000000Z','',regex=True)
#df.Timestamp = df.Timestamp.replace('T.*','T',regex=True)
#df.Timestamp = df.Timestamp.replace(':.*','',regex=True)
#df.Timestamp = df.Timestamp.replace('[ ].*','',regex=True)
#'T.*$','T',regex=True)
#.0000000Z','',regex=True)
#df.head()

In [None]:
df.info()

In [None]:
df.head()

In [None]:
# write response from ADH to BigQuery
client = bigquery.Client()
table_id = parameters['bigquery_table_ingress']
job_config = bigquery.LoadJobConfig(schema=[
    bigquery.SchemaField("assetId", "STRING"),
    bigquery.SchemaField("tagName", "STRING"),
])
job = client.load_table_from_dataframe(
    df, table_id, job_config=job_config
)
job.result()

# acccess bigquery table

In [None]:
# load/verify magic commands
%load_ext google.cloud.bigquery

In [None]:
from google.cloud import bigquery

bqclient = bigquery.Client()

sql ="""SELECT * FROM `{}` LIMIT 10"""
query=sql.format(configuration['bigquery_table_source'])

bq_result = bqclient.query(query)

In [None]:
pd = bq_result.to_dataframe()

In [None]:
%%bigquery
# query table data source
select *
from 
limit 1