# Notebook Basic 
It is a notebook which shows common examples:
- how use common aggregation
- create common plots

In [1]:
import time
from datetime import timedelta
from pprint import pprint
from datetime import datetime

import pandas as pd
import th2_data_services_utils.utils as Utils
from th2_data_services.data_source import DataSource
from th2_data_services.data import Data
from th2_data_services.events_tree import EventsTree
from IPython.core.display import display, HTML
from pandas import DataFrame
from plotly import graph_objects, express

# This settings for increase display jupyter notebook and dataframe table.
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.options.display.max_rows = 550
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_colwidth', 1000)

In [2]:
# For understand which event type on based name we get from stream.
def get_super_type(record: dict, tree):
    name = record.get("eventName")
    parent_id = record.get("parentEventId")
    super_type = record.get("eventType")
    if super_type == "":
        if "Recon" in name:
            super_type = "Recon Folder"
        else:
            if not parent_id:
                super_type = "Test Run"
            else:
                parent_event = tree.get(parent_id)
                if parent_event:
                    parent_super_type = get_super_type(parent_event, tree)
                    if parent_super_type == "Test Run":
                        super_type = "Test Case"
                    elif parent_super_type == "Recon Folder":
                        super_type = "Recon Rule"
                    elif parent_super_type == "Recon Rule":
                        super_type = "Recon Status"
                    elif parent_super_type == "Recon Status":
                        super_type = "Recon Event"

    return super_type

# Base extract (transform function)
# record is required argument.
def extract_basic(record: dict):
    new_object = {}
    time = datetime.fromtimestamp(record.get("startTimestamp", {}).get("epochSecond", 0))
    time += timedelta(microseconds=record.get("startTimestamp", {}).get("nano", 0))
    new_object.update(
        {
            "super_type": get_super_type(record, tree),
            "time": time,
            "status": "SUCCESSFUL" if record.get("successful") else "FAILED"
        }
    )
    return new_object

## Create Data Source object
The DataSource object lets you retrieve data in the easiest way.

NOTE: You can change the URL via eponymous property of this object.

In [3]:
START_TIME = datetime(year=2021, month=6, day=20, hour=10, minute=44, second=41, microsecond=692724)
END_TIME = datetime(year=2021, month=6, day=20, hour=10, minute=45, second=49, microsecond=28579)

DEMO_HOST = "10.64.66.66"  # th2-kube-demo  Host port where rpt-data-provider is located.
DEMO_PORT = "30999"  # Node port of rpt-data-provider.
data_source = DataSource(F"http://{DEMO_HOST}:{DEMO_PORT}")

events: Data = data_source.get_events_from_data_provider(
    startTimestamp=START_TIME,
    endTimestamp=END_TIME,
    metadataOnly=False
)

## This example demonstrates events retrieving


In [4]:
# We build events tree for further assistance.
events_tree = EventsTree(events)
tree = events_tree.events

# Here we get events which doesn't exist in data source interval.
events_tree.recover_unknown_events(data_source)

## [1] Aggregation

### [1.1] Simple aggregation example
Aggregate events by super type

In [5]:
data: Data = events.map(extract_basic)

Utils.aggregate_by_groups(data, "super_type", total_row=True)

Unnamed: 0_level_0,count
super_type,Unnamed: 1_level_1
Checkpoint,16.0
Checkpoint for session,160.0
Outgoing message,16.0
Recon Event,26.0
Send message,88.0
Service event,2.0
Test Case,6.0
Test Run,1.0
Verification,71.0
checkMessages,20.0


### [1.2] Aggregation by intervals
Aggregate events within 5 minutes interval

In [6]:
Utils.aggregate_by_intervals(data, "time", resolution="m", every=5)

Unnamed: 0,time,count
0,2021-06-20 13:46:00,209
1,2021-06-20 13:51:00,84
2,2021-06-20 13:56:00,134
3,2021-06-20 14:01:00,26


### [1.3] Aggregation by interval with another fields
Aggregate events by super type within 5 minutes interval

In [7]:
Utils.aggregate_groups_by_intervals(data, "time", "super_type", intervals="5min", pivot="super_type").fillna(0)

super_type,Checkpoint,Checkpoint for session,Outgoing message,Recon Event,Send message,Service event,Test Case,Test Run,Verification,checkMessages,checkSequence,checkSequenceRule,message,placeOrderFIX,preFiltering
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-06-20 13:45:00,10.0,100.0,11.0,0.0,25.0,0.0,2.0,0.0,28.0,11.0,11.0,11.0,0.0,10.0,12.0
2021-06-20 13:50:00,1.0,10.0,1.0,1.0,53.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0
2021-06-20 13:55:00,2.0,20.0,1.0,19.0,6.0,0.0,2.0,1.0,41.0,8.0,8.0,8.0,1.0,1.0,8.0
2021-06-20 14:00:00,3.0,30.0,3.0,6.0,4.0,0.0,2.0,0.0,2.0,1.0,1.0,1.0,11.0,3.0,0.0


### [1.4] Aggregation by several groups
Aggregate events separately by CumQty and OrdType

In [8]:
def get_needed_fields(record):
    output = []
    fields = Utils.search_fields(record, "CumQty", "OrdType")
    output.append(
        {
            "CumQty": fields.get("CumQty")[0].get("actual"),
            "OrdType": fields.get("OrdType")[0].get("actual")
        }
    )
    return output

verifications = events.filter(lambda rec: rec.get("eventType") == "Verification")
fields = verifications.map(get_needed_fields)

Utils.aggregate_several_group(fields)

Unnamed: 0,CumQty,count
0,0,20.0
1,10,20.0
2,30,10.0
3,40,21.0
4,Total,71.0

Unnamed: 0,OrdType,count
0,2,71.0
1,Total,71.0


## [2] Plottig

### [2.1] Line chart
Plot all data aggregated by supertype into a single chart

In [None]:
df = Utils.aggregate_groups_by_intervals(data, "time", "super_type", intervals="5min", pivot="super_type").fillna(0)

Utils.create_tick_diagram(df)  # The plot may not be shown if you have not restarted the notebook.

![alt text](intervals_super_type.png "123")

### [2.2] Pie chart

In [None]:
def extract_basic(record):
    new_object = {
            "super_type": get_super_type(record, tree),
            "status": "SUCCESSFUL" if record.get("successful") else "FAILED",
            "body": record.get("body"),
            "parentEventId": record.get("parentEventId"),
            "eventId": record.get("eventId"),
            "eventName": record.get("eventName"),
            "body": record.get("body")
        }
    return new_object

def transform_output(record):
    new_obj = {
        "Test Case": record.get("eventName"),
        "status": record.get("status"),
        'time': record.get("time"),
    }
    return new_obj

data = events\
    .map(extract_basic)\
    .filter(lambda record: record.get("super_type") == "Test Case")\
    .map(transform_output)

df = DataFrame(data=data)
total = len(df.index)
total_failed = len(df.query('status == "FAILED"').index)
total_passed = total - total_failed

labels_tc_pie = ["FAILED", "SUCCESSFUL"]
values_tc_pie = [total_failed, total_passed]
df_grouped = df.loc[:, :"status"]

counts_per_test = df_grouped.value_counts().reset_index(level=[0,1])
counts_per_test.columns = ["Test Case", "status", "count"]

tc_pie = graph_objects.Figure(data=[graph_objects.Pie(labels=labels_tc_pie, values=values_tc_pie, hole=0.6, marker={"colors": ["red", "green"]})])
tc_pie.show()

![alt text](pie_chart.png "123")

### [2.3] Bar chart

In [None]:
tc_bars = express.bar(counts_per_test, x="Test Case", y="count", color="status", color_discrete_map={"FAILED": "red", "SUCCESSFUL": "green"})
tc_bars.show()

![alt text](bar_chart.png "123")

## [3] Helpful utils

## [3.1] Search fields
Search needed fields in a record.

In [9]:
verification_events = next(events.filter(lambda record: record.get("eventType") == "Verification").sift(limit=1))

pprint(verification_events)

{'attachedMessageIds': [],
 'batchId': '53c1abd6-b2f9-4378-b061-434a3315c4a2',
 'body': [{'fields': {'AccountType': {'actual': '1',
                                      'expected': '1',
                                      'key': False,
                                      'operation': 'EQUAL',
                                      'status': 'PASSED',
                                      'type': 'field'},
                      'ClOrdID': {'actual': '9601585',
                                  'expected': '9601585',
                                  'key': True,
                                  'operation': 'EQUAL',
                                  'status': 'PASSED',
                                  'type': 'field'},
                      'CumQty': {'actual': '0',
                                 'expected': '0',
                                 'key': False,
                                 'operation': 'EQUAL',
                                 'status': 'PASSED',
             

In [10]:
pprint(Utils.search_fields(verification_events, "OrdType", "PartyID", "TestField")) # OrdType and PartyID are existing. TestField isn't existing.

defaultdict(<class 'list'>,
            {'OrdType': [{'actual': '2',
                          'expected': '2',
                          'key': False,
                          'operation': 'EQUAL',
                          'status': 'PASSED',
                          'type': 'field'}],
             'PartyID': [{'actual': 'DEMO-CONN1',
                          'expected': 'DEMO-CONN1',
                          'key': False,
                          'operation': 'EQUAL',
                          'status': 'PASSED',
                          'type': 'field'},
                         {'actual': '0',
                          'expected': '0',
                          'key': False,
                          'operation': 'EQUAL',
                          'status': 'PASSED',
                          'type': 'field'},
                         {'actual': '0',
                          'expected': '0',
                          'key': False,
                          'operation': 'EQU

### [3.2] Delete string by pattern

In [11]:
line = "Case[TC_1.6]: Trader DEMO-CONN1 vs trader DEMO-CONN2 for instrument INSTR6"
Utils.delete_string_by_pattern(line, "DEMO-CONN")

'Case[TC_1.6]: Trader 1 vs trader 2 for instrument INSTR6'