In [0]:
dbutils.library.installPyPI("mlflow")
#dbutils.library.restartPython()

In [0]:
from pyspark.sql.types import StructType
from pyspark.sql.types import *
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType
from pyspark.sql.functions import *      # for window() function
from typing import List
from pyspark.sql.types import *
import pandas as pd
import time
from datetime import datetime, timedelta, timezone
import mlflow
from pyspark.sql.types import StructType
import dateutil.parser
from pyspark.sql.functions import unix_timestamp
from pyspark.sql.functions import from_unixtime
from pyspark.sql.types import StructType

version = "tests_working_stream_from_kakfa_v2_1"

In [0]:
%sql
CREATE TABLE IF NOT EXISTS raw_log_data_delta_tests_ (
  account_id STRING,
  agent_id STRING,
  event STRING, 
  timestamp TIMESTAMP
 )
USING DELTA;


CREATE TABLE IF NOT EXISTS anomalies_data_delta_tests_ (
  user_id STRING,
  Ips LONG,
  prediction DOUBLE
)
USING DELTA;


In [0]:
inputPath = "dbfs:/mnt/kafka_raw/testes_PN/"

schema_raw_logs = (  StructType()
  .add("account_id","string")
  .add("agent_id","string")
  .add("event","string")
  .add("timestamp","timestamp") 
)


#reads From Kafka
#eventsDF = (spark.readStream
#  .format("kafka")
#  .option("kafka.bootstrap.servers", "td-guardian.tdx.sandcitadel.com:9093")
#  .option("subscribe", "event-splitter.audit_logs")
#  .load())


#reads From S3
eventsDF = (
  spark
    .readStream
    .schema(schema_raw_logs) # Set the schema of the JSON data
    .option("maxFilesPerTrigger", 1) # Treat a sequence of files as a stream by picking one file at a time
    .json(inputPath)
)

#writes to Raw table
(eventsDF.writeStream.queryName("write_raw_table")
  .outputMode("append")
  .option("checkpointLocation", "/mnt/delta/events/_checkpoints/etl-from-json_PN_"+version)
  .table("raw_log_data_delta_tests_")
)




In [0]:

#We may need to user OPTIMIZE, which deals with small files, merge them and compact them into larger files
raw_data = spark.readStream.format("delta").table("raw_log_data_delta_tests_")



In [0]:

fullschema = (  StructType()
  .add("logger_event_id", "string")
  .add("logger_timestamp","timestamp")                
  .add("account_id","string")
  .add("agent_id","string")
  .add("event",StructType())
         .add("actor",StructType()
             .add("user_id","string")
             .add("ip_addresses",ArrayType(StringType()))
             .add("session_id","string")
             .add("impersonated_user_id","string")
             .add("id","string")
             .add("type","string")
             .add("user_agent","string")
         )
         .add("account_id","string")
         .add("event_type","string")
         .add("audit",StructType()
              .add("severity","string")
              .add("resource_id","string")
              .add("operation","string")
              .add("timestamp","timestamp")
              .add("status","string")
         )
         .add("logger_event_id","string")     
         .add("object",StructType())
         .add("timestamp","timestamp") 
   
   .add("timestamp","timestamp") 
)


In [0]:
%sql
select * from raw_log_data_delta_tests_

account_id,agent_id,event,timestamp
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.15""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.12""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.19""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.13""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.20""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.6""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.3""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.17""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.10""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000
57c7413abca837e974000009,,"{""logger_event_id"":""f3f8755f-1558-4dfb-a848-72d2aae3e8d5"",""logger_timestamp"":""2020-10-22T10:00:00.000"",""actor"":{""id"":""da0fcc979e6f4df5ad891781afe7114b"",""type"":""user"",""ip_addresses"":[""1.1.1.10""],""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""user_agent"":""Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"",""user_id"":""PEDRONEVES""},""generator"":{""id"":""talkdesk-id-web-5fd4c7878d-thcff"",""name"":""talkdesk-id"",""type"":""system""},""audit"":{""resource_id"":""2e24501acf0242a8a0775c640d3f5bea"",""operation"":""user_issue_authorization_code"",""status"":""success"",""severity"":""medium"",""type"":""external""},""account_id"":""57c7413abca837e974000009"",""realm"":""MAIN"",""object"":{""oauth_client_id"":""09278611f21847a88fdd48e3d664d2d4"",""principal_id"":""da0fcc979e6f4df5ad891781afe7114b"",""subject_id"":""5dc9338defe0380010de1cd1"",""session_id"":""38fd382e1c9341ae91608f3b1005b982"",""redirect_uri"":""com.talkdesk.callbar:/oauth2redirect/talkdeskid"",""scopes"":[""openid"",""callbar"",""sentiment-settings:read"",""user-session:end"",""callbar-settings:read"",""rtm-settings:read"",""rtm-user:auth"",""callbar-analytics:write"",""interactions:park"",""permissions:verify"",""interaction-quality-feedback:write"",""interaction-quality-settings:read"",""interaction-quality-settings:write"",""device-routing:read"",""device-routing:write"",""device-routing-nailup:write"",""apps:graphql"",""dispositions:read"",""megazord:read"",""numbers:read"",""presence-user:read"",""ring-groups:read"",""users:read"",""graph-users:read"",""integrations-zendesk-tickets:read"",""interactions:hold"",""ur:write"",""ur:read"",""interactions:disconnect""],""type"":""authorization_code""},""event"":""create_audit_operation"",""timestamp"":""2020-10-22T10:00:00.000"",""event_id"":""b009047da89a41da81f93b4a6a5ecfdc""}",2020-10-22T10:00:00.000+0000


In [0]:
filtered_data = (raw_data
                  .select( "timestamp"    ,from_json("event", fullschema).alias("data"))
                )
display(filtered_data)

In [0]:


run_id = "09840597c6e04f279aaa27be313c6e73"
model_uri = "runs:/" + run_id + "/sklearn-model"
model = mlflow.pyfunc.spark_udf(spark, model_uri)



filtered_data = (raw_data
                  .select( "timestamp"    ,from_json("event", fullschema).alias("data"))
                  .withColumn("timestamp",to_timestamp(to_date("timestamp","yyyy-MM-DD"),"yyyy-MM-DD"))
                  .select( "timestamp","data.logger_timestamp" ,   "data.actor.user_id",  "data.actor.ip_addresses")       
                  .where(col("user_id").isNotNull())
                  .where(col("timestamp") >= datetime.now().astimezone(timezone.utc).strftime("%Y-%m-%dT00:00:00.000+0000") )
                  .withWatermark("logger_timestamp", "5 seconds")
                  .groupBy(col("timestamp"),"user_id").agg(approx_count_distinct('ip_addresses').alias('Ips'))
                  .select("user_id","Ips")
                  .withColumn("prediction", model("Ips"))
                )







In [0]:
display(filtered_data)

user_id,Ips,prediction
PEDRONEVES,18,-1.0


In [0]:


#write data to anomalies' table
(filtered_data.writeStream.queryName("write_anomalies_table")
  .format("delta") 
  .outputMode("complete")
  .option("checkpointLocation", "/mnt/delta/events/_checkpoints/anomalies_"+version)
  .trigger(processingTime='1 second')
  .table("anomalies_data_delta_PN_job")
)



In [0]:
%sql
select * from anomalies_data_delta_PN_job

user_id,Ips,prediction
PEDRONEVES,18,-1.0


In [0]:
 %sql
 CREATE TABLE IF NOT EXISTS anomalies_data_delta_PN_job (
  user_id STRING,
  Ips LONG,
  prediction DOUBLE
)
USING DELTA;