# Data Services for demo run
See more examples of using data services on the link: https://github.com/th2-net/th2-data-services-utils/tree/master/examples/notebooks

Data services core lib: https://github.com/th2-net/th2-data-services

# Summary

----------------------------------------------

Jupyter notebook for demo run analysis.

## [1] General statistic

### [1.1] Number of Test Events within requested time range

### [1.2] Statistics by event type and execution status
The table aggregates all event types and shows how many of them and how many of them were failed.



## [2] Recons analysis

### [2.1] Summary table  
The table summarizes the work of the check2-recon th2 component, including the list of executed rules and their statuses.

### [2.2] Distribution by message type
This table presents how many messages were processed per each message type.



## [3] Script analysis

### [3.1] Basic statistics by test cases
Shows how many test cases were failed and passed.

### [3.2] Detailed test case statistics
This table shows all test runs, their test cases, statuses and time execution.

### [3.3] Failed Verifications
The table shows detailed information on failed verifications, including test event ids, failed tags and test case details. 

### [3.4] Plot all data aggregated by supertype into a single chart with filters

----------------------------------------------

In [None]:
from pprint import pprint
from IPython.core.display import display, HTML
from datetime import datetime, timedelta
from th2_data_services.events_tree import EventsTree
from th2_data_services.provider.v5.data_source.http import HTTPProvider5DataSource
from th2_data_services.provider.v5.commands import http
from th2_data_services.filter import Filter
from th2_data_services.provider.v5.events_tree import (
    EventsTreeCollectionProvider5, 
    ParentEventsTreeCollectionProvider5
)
from th2_data_services.data import Data
from th2_data_services_utils import utils as Utils
from pandas import DataFrame, Grouper
import pandas as pd
import pickle
import yaml

# This settings for increase display jupyter notebook and dataframe table.
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.options.display.max_rows = 1500
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_colwidth', 1000)

# For understand which event type on based name we get from stream.
def get_super_type(record):
    name = record.get("eventName")
    parent_id = record.get("parentEventId")
    super_type = record.get("eventType")
    if super_type == "":
        if "Recon" in name:
            super_type = "Recon Folder"
        else:
            if not parent_id:
                super_type = "Test Run"
            else:
                parent_event = collection.get_event(parent_id)
                if parent_event:
                    parent_super_type = get_super_type(parent_event)
                    if parent_super_type == "Test Run":
                        super_type = "Test Case"
                    elif parent_super_type == "Recon Folder":
                        super_type = "Recon Rule"
                    elif parent_super_type == "Recon Rule":
                        super_type = "Recon Status"
                    elif parent_super_type == "Recon Status":
                        super_type = "Recon Event"

    return super_type

# Base extract (transform function)
# record is required arguments.
def extract_basic(record):
    new_object = {}
    start_time = datetime.fromtimestamp(record.get("startTimestamp", {}).get("epochSecond", 0))
    start_time += timedelta(microseconds=record.get("startTimestamp", {}).get("nano", 0)/1000)
    end_time = datetime.fromtimestamp(record.get("endTimestamp", {}).get("epochSecond", 0))
    end_time += timedelta(microseconds=record.get("endTimestamp", {}).get("nano", 0)/1000)
    new_object.update(
        {
            "super_type": get_super_type(record),
            "start_time": start_time,
            "end_time": end_time,
            "status": "SUCCESSFUL" if record.get("successful") else "FAILED",
            "eventName": record.get("eventName"),
            "eventId": record.get("eventId"),
            "parentEventId": record.get("parentEventId"),
            "body": record.get("body"),
            "attachedMessageIds": record.get("attachedMessageIds")
        }
    )
    if new_object['eventName'] is None:
        pprint(record)
    return new_object

## Start and finish test time setup

Get start and finish test time automatically

In [None]:
with open('start_datetime.pickle', 'rb') as f:
    START_TIME = pickle.load(f)
with open('finish_datetime.pickle', 'rb') as f:
    END_TIME = pickle.load(f)
#START_TIME = datetime(2022, 3, 30, 15, 58, 40, 174859)
#END_TIME = datetime(2022, 3, 30, 16, 3, 18, 56148)

Get DataSource URL from config file

In [None]:
with open('../../configs/ds_config.yaml') as f:
    data = yaml.load(f, Loader=yaml.FullLoader)
    URL = data['provider_data_source']

Create DataSource and EventsTree objects

In [None]:
data_source = HTTPProvider5DataSource(URL)
events: Data = data_source.command(
    http.GetEvents(
        start_timestamp=START_TIME,
        end_timestamp=END_TIME,
        attached_messages=True,
        cache=True,
    )
)
    
collection = EventsTreeCollectionProvider5(events, data_source=data_source)

# [1] General statistic

## [1.1] Number of Events in the tree

In [None]:
len(collection)

## [1.2] Statistic by all event types
The table aggregates all event types and shows how many of them and how many of them were failed.


In [None]:
Utils.aggregate_by_groups(events.map(extract_basic), "super_type", total_row=True)

# [2] Recons analysis

## [2.1] Summary table
The table summarizes the work of the check2-recon th2 component, including the list of executed rules and their statuses.

In [None]:
def transform_output(record):
    full_path = collection.get_full_path(record.get("eventId"))
    new_obj = {
        "Recon Root": full_path[0].get("eventName"),
        "Recon Rule" : full_path[1].get("eventName"),
        "Rule Status": full_path[2].get("eventName"),
        "Number of Events": 1,
    }
    return new_obj

data: Data = events\
        .map(extract_basic)\
        .filter(lambda record: record.get("super_type") == "Recon Event")\
        .map(transform_output)\

# Functions from pandas.
df = DataFrame(data).groupby(['Recon Root', "Recon Rule", 'Rule Status']).agg({"Number of Events": "sum"})
df = Utils.append_total_rows(df, {"Number of Events": "sum"})
df

## [2.2] Distribution by message type
This table presents how many messages were processed per each message type.

In [None]:
def is_recon_ancestor(record):
    parent_id = record.get("parentEventId")
    if parent_id is not None:
        ancestor = collection.find_ancestor(record.get("eventId"), lambda record: get_super_type(record) == "Recon Folder")
        if ancestor:
            return True
    return False

def is_match_or_match_failed(record):
    ancestor = collection.find_ancestor(record.get("eventId"), lambda record: record.get("eventName") == "No match")
    if ancestor and not ancestor.get("successful"):
        return True
    ancestor = collection.find_ancestor(record.get("eventId"), lambda record: record.get("eventName") == "Matched passed")
    if ancestor:
        return True
    return False

def exctract_block(record):
    messages_id = record.get("attachedMessageIds")
    
    if not messages_id:
        return None
    
    messages = data_source.command(http.GetMessagesById(messages_id))
    output = [{"MsgType": message.get("body", {}).get("metadata", {}).get("messageType")} for message in messages]
    ## [2.2] Distribution by message type
    ## This table presents how many messages were processed per each message type.
    return output

data = events\
        .map(extract_basic)\
        .filter(is_recon_ancestor)\
        .filter(is_match_or_match_failed)\
        .map(exctract_block)

df = DataFrame(data)
df.groupby("MsgType").size().reset_index(name="count")

# [3] Script analysis

## [3.1] Basic statistics by test cases
Shows how many test cases were failed and passed.

In [None]:
def transform_output(record):    
    new_obj = {
        "Test Case": 1,
        "Status": record.get("status")
    }
    return new_obj

data = events\
        .map(extract_basic)\
        .filter(lambda record: record.get("super_type") == "Test Case")\
        .map(transform_output)

df = DataFrame(data=data)
df = df.groupby(["Status"]).sum()
df["Percent"] = df["Test Case"] / df["Test Case"].sum() * 100
df

## [3.2] Detailed test case statistics
This table shows all test runs, their test cases, statuses and time execution.

In [None]:
def ancestor_is_test_case(record):
    if not record.get("parentEventId"):
        return False
    ancestor = collection.find_ancestor(record.get("eventId"), lambda record: get_super_type(record) == "Test Case")
    if ancestor:
        return True
    return False

def transform_output(record):
    fullpath = collection.get_full_path(record.get("eventId"))
    
    start_time = datetime.fromtimestamp(fullpath[1].get("startTimestamp", {}).get("epochSecond", 0))
    start_time += timedelta(microseconds=fullpath[1].get("startTimestamp", {}).get("nano", 0)/1000)
    
    message_id = record.get("attachedMessageIds")
    if not message_id:
        return None
    message_id = message_id[0]
    
    message = data_source.command(http.GetMessageById(message_id))
    if not message:
        return None
    
    body = message.get("body", {})
    if not body:
        return None
    
    end_time = body.get("metadata", {}).get("timestamp")
    end_time = datetime.strptime(end_time, "%Y-%m-%dT%H:%M:%S.%fZ")
    end_time += timedelta(hours=3)
    
    new_obj = {
        "Test Run": fullpath[0].get("eventName"),
        "Test Case": fullpath[1].get("eventName"),
        "Status": "SUCCESSFUL" if fullpath[1].get("successful") else "FAILED",
        'Start Time': start_time,
        'End Time': end_time,
    }
    return new_obj

data = events\
        .map(extract_basic)\
        .filter(ancestor_is_test_case)\
        .filter(lambda record: record.get("super_type") in ["Verification", "message"])\
        .map(transform_output)

df = DataFrame(data=data)
df = df.groupby(["Test Run", "Test Case", "Status"]).agg({"Start Time": "min", "End Time": "max"}).reset_index()
df["duration"] = df["End Time"] - df["Start Time"]
df.sort_values(by=["Start Time"])

## [3.3] Failed Verifications
The table shows detailed information on failed verifications, including test event ids, failed tags and test case details. 

In [None]:
def is_test_case_ancestor(record):
    parent_id = record.get("parentEventId")
    if parent_id is not None:
        ancestor = collection.find_ancestor(record.get("eventId"), lambda record: get_super_type(record) == "Test Case")
        if ancestor:
            return True
    return False

def extract_failed_tags(record):
    new_obj = {
        "Event Id": record.get("eventId"),
        "Event Name": record.get("eventName"),
        "Test Run": collection.find_ancestor(record.get("eventId"), lambda record: get_super_type(record) == "Test Run").get("eventName"),
        "Test Case": collection.find_ancestor(record.get("eventId"), lambda record: get_super_type(record) == "Test Case").get("eventName"),
    }
    for content in record.get("body"):
        tags = Utils.search_fields(content, "OrderCapacity", "AccountType")
    new_obj.update({"tags": tags})
    return new_obj

data = events\
        .map(extract_basic)\
        .filter(is_test_case_ancestor)\
        .filter(lambda record: record.get("super_type") == "Verification")\
        .filter(lambda record: record.get("status") == "FAILED")\
        .map(extract_failed_tags)

transform_data = []
for i in data:
    common = {
        "Event Name": i.get("Event Name"),
        "Test Run": i.get("Test Run"),
        "Test Case": i.get("Test Case"),
        "Event_Id": i.get("Event Id"),
    }
    for tag, payload in i["tags"].items():
        for value in payload:
            if value.get("status") == "FAILED":
                transform_data.append({
                    **common, 
                    "failed_tag": tag, 
                    "failed_actual": value.get("actual"),
                    "failed_expected": value.get("expected"),
                    "failed_operation": value.get("operation")
                })

# From pandas for comforted view
failed_verifications = DataFrame(data=transform_data)
failed_verifications

## [3.4] Plot all data aggregated by supertype into a single chart with filters

In [None]:
def transform_output(record):
    new_obj = {
        "time": record.get("start_time"),
        "super_type": record.get("super_type"),
        "status": record.get("status"),
    }
    return new_obj

data = events\
         .map(extract_basic)\
         .filter(lambda record: record.get("super_type") in ["Recon Event", "Verification"])\
         .map(transform_output)

df = Utils.aggregate_groups_by_intervals(data, "time", "super_type", intervals="10s", pivot="super_type")
Utils.create_tick_diagram(df)  # The plot may not be shown if you have not restarted the notebook.

## [3.5] Latency density
Searches pairs messages with type NewOrderSingle and ExecutionReport. Then calculates latency and demonstrates on plot.

In [None]:
def is_new_single_order_or_execution_report(record):
    body = record.get("body")
    if body:
        message_type = body.get("metadata", {}).get("messageType")
        if message_type in ["NewOrderSingle", "ExecutionReport"]:
            return True
    return False

def clear_unnecessery_fields(record):
    new_obj = None
    body = record.get("body")
    if body:
        fields = body.get("fields", {})
        clOrdID = fields.get("ClOrdID", {}).get("simpleValue")
        ord_status = fields.get("OrdStatus", {}).get("simpleValue")
        
        metadata = body.get("metadata", {})
        message_type = metadata.get("messageType")
        session_alias = metadata.get("id", {}).get("connectionId", {}).get("sessionAlias")
        time = metadata.get("timestamp")
        
        new_obj = {
            "clOrdID": clOrdID,
            "OrdStatus": ord_status,
            "MessageType": message_type,
            "sessionAlias": session_alias,
            "time": time,
        }
    return new_obj

streams = set()
for record in collection.get_all_events_iter():
    messages = record.get("attachedMessageIds")
    for msg in messages:
        streams.add(msg.split(":")[0])
        
messages = data_source.command(
    http.GetMessages(
        start_timestamp=START_TIME,
        end_timestamp=END_TIME,
        stream=list(streams)
    )
)

data = messages\
        .filter(is_new_single_order_or_execution_report)\
        .map(clear_unnecessery_fields)

roundtrips = {}
latency = []

for record in data:
    msg_type = record.get("MessageType")
    clOrdID = record.get("clOrdID")
    
    if msg_type == "NewOrderSingle":
        if clOrdID not in roundtrips:
            roundtrips[clOrdID] = record.get("time")
    elif msg_type == "ExecutionReport":
        if record.get("OrdStatus") == '0':
            if clOrdID in roundtrips:
                current_latency = datetime.strptime(record.get("time"), "%Y-%m-%dT%H:%M:%S.%fZ") -  datetime.strptime(roundtrips[clOrdID], "%Y-%m-%dT%H:%M:%S.%fZ")
                latency.append({"latency": 1, "time": datetime.strptime(str(current_latency), "%H:%M:%S.%f")})

df = DataFrame(data=latency).set_index("time").groupby(Grouper(freq="10ms")).sum()
df.index = df.index.strftime("%S.%f")

Utils.create_tick_diagram(df)  # The plot may not be shown if you have not restarted the notebook.