In [1]:
from __future__ import annotations

import pandas as pd

from src.data_processors_lib.simple_processors import SimpleGroup, SimpleGroupParams
from src.eventstream.schema import RawDataSchema
from src.graph.p_graph import PGraph, EventsNode, Eventstream, MergeNode

source_df = pd.DataFrame([
    {"event_name": "pageview",
     "event_timestamp": "2021-10-26 12:00", "user_id": "1"},
    {"event_name": "cart_btn_click",
     "event_timestamp": "2021-10-26 12:02", "user_id": "1"},
    {"event_name": "pageview",
     "event_timestamp": "2021-10-26 12:03", "user_id": "1"},
    {"event_name": "trash_event",
     "event_timestamp": "2021-10-26 12:03", "user_id": "1"},
    {"event_name": "exit_btn_click",
     "event_timestamp": "2021-10-26 12:04", "user_id": "2"},
    {"event_name": "plus_icon_click",
     "event_timestamp": "2021-10-26 12:05", "user_id": "1"},
])

source = Eventstream(
    raw_data=source_df,
    raw_data_schema=RawDataSchema(
        event_name="event_name",
        event_timestamp="event_timestamp",
        user_id="user_id"
    )
)

cart_events = EventsNode(
    SimpleGroup(SimpleGroupParams(**{
        "event_name": "add_to_cart",
        "filter": lambda df, schema: df[schema.event_name].isin(
            ["cart_btn_click", "plus_icon_click"])
    }))
)
logout_events = EventsNode(
    SimpleGroup(SimpleGroupParams(**{
        "event_name": "logout",
        "filter": lambda df, schema: df[schema.event_name] == "exit_btn_click"
    }))
)
merge = MergeNode()

graph = PGraph(source)
graph.add_node(
    node=cart_events,
    parents=[graph.root]
)
graph.add_node(
    node=logout_events,
    parents=[graph.root]
)
graph.add_node(
    node=merge,
    parents=[
        cart_events,
        logout_events,
    ]
)

result = graph.combine(merge)
result_df = result.to_dataframe()

result_df


Unnamed: 0,event_id,event_type,event_index,event_name,event_timestamp,user_id
0,11fb499d-f89f-4233-acc0-d7df5d9542fa,raw,0,pageview,2021-10-26 12:00:00,1
1,eed73be5-924c-47bd-b72e-333e11d93b1b,group_alias,1,add_to_cart,2021-10-26 12:02:00,1
3,48da70e7-c67e-4a3b-b5e5-bd9a1b87d291,raw,3,pageview,2021-10-26 12:03:00,1
4,0ce58445-2385-4db3-827f-5b28ab6bb1c8,raw,4,trash_event,2021-10-26 12:03:00,1
5,448ba406-3d89-46a2-b318-b91d0bba7f75,group_alias,5,logout,2021-10-26 12:04:00,2
7,35d81db1-ee01-4e75-b2df-928bb3e3dc4c,group_alias,7,add_to_cart,2021-10-26 12:05:00,1


In [2]:
cart_events.processor.params.schema_json()

  cart_events.processor.params.schema_json()


'{"title": "SimpleGroupParams", "type": "object", "properties": {"event_name": {"title": "Event Name", "type": "string"}, "event_type": {"title": "Event Type", "default": "group_alias", "type": "string"}}, "required": ["event_name"]}'

In [3]:
# from __future__ import annotations
#
# from src.data_processor.data_processor import DataProcessor, ParamsModel
# from typing import Optional
#
# class CustomParams(ParamsModel):
#     name: str
#     last_name: Optional[str]
#     tags: list[str]
#     numbers: list[int] | str
#
#
# class CustomProcessor(DataProcessor):
#     params: CustomParams
#
#     def __init__(self, params: CustomParams) -> None:
#         super().__init__(params=params)
#
#     def apply(self, eventstream: Eventstream) -> Eventstream:
#         return eventstream
#
#     def schema(self) -> dict[str, dict | str]:
#         schema = {
#             "params": self.params.schema(),
#             "values": self.params.dict()
#         }
#         return schema

In [4]:
dir(graph._PGraph__ngraph)

['__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_adj',
 '_node',
 '_pred',
 '_succ',
 'add_edge',
 'add_edges_from',
 'add_node',
 'add_nodes_from',
 'add_weighted_edges_from',
 'adj',
 'adjacency',
 'adjlist_inner_dict_factory',
 'adjlist_outer_dict_factory',
 'clear',
 'clear_edges',
 'copy',
 'degree',
 'edge_attr_dict_factory',
 'edge_subgraph',
 'edges',
 'get_edge_data',
 'graph',
 'graph_attr_dict_factory',
 'has_edge',
 'has_node',
 'has_predecessor',
 'has_successor',
 'in_degree',
 'in_edges',
 'is_directed',
 'is_multigraph',
 'name',
 'nbunch_iter',
 'neighbors',
 'node_attr_dict_factory',
 'node_di

In [5]:
graph._PGraph__ngraph.nodes.data()

NodeDataView({{'name': 'SourceNode', 'pk': '759e088b-ea54-400d-9e5a-495d01e24a83'}: {}, {'name': 'EventsNode', 'pk': 'e68718d9-9322-49b2-8dfa-180eb7ba59fa'}: {}, {'name': 'EventsNode', 'pk': '3e54c45d-e684-438a-9423-e9e1de3c1be2'}: {}, {'name': 'MergeNode', 'pk': '832cdfb1-f5b5-4ca6-a378-d970b7bc278d'}: {}})

In [6]:
import networkx as nx
from itertools import chain


def node_link_data(
        G,
        attrs=None,
        *,
        source="source",
        target="target",
        name="id",
        key="key",
        link="links",
):
    if attrs is not None:
        import warnings

        msg = (
            "\n\nThe `attrs` keyword argument of node_link_data is deprecated\n"
            "and will be removed in networkx 3.2. It is replaced with explicit\n"
            "keyword arguments: `source`, `target`, `name`, `key` and `link`.\n"
            "To make this warning go away, and ensure usage is forward\n"
            "compatible, replace `attrs` with the keywords. "
            "For example:\n\n"
            "   >>> node_link_data(G, attrs={'target': 'foo', 'name': 'bar'})\n\n"
            "should instead be written as\n\n"
            "   >>> node_link_data(G, target='foo', name='bar')\n\n"
            "in networkx 3.2.\n"
            "The default values of the keywords will not change.\n"
        )
        warnings.warn(msg, DeprecationWarning, stacklevel=2)

        source = attrs.get("source", "source")
        target = attrs.get("target", "target")
        name = attrs.get("name", "name")
        key = attrs.get("key", "key")
        link = attrs.get("link", "links")
    # -------------------------------------------------- #
    multigraph = G.is_multigraph()

    # Allow 'key' to be omitted from attrs if the graph is not a multigraph.
    key = None if not multigraph else key
    if len({source, target, key}) < 3:
        raise nx.NetworkXError("Attribute names are not unique.")
    data = {
        "directed": G.is_directed(),
        # "multigraph": multigraph,
        # "graph": G.graph,
        "nodes": [
            dict(
                chain(
                    G.nodes[n].items(),
                    # [(name, n)]
                    [(name, n.export())]
                )
            ) for n in G],
    }
    if multigraph:
        data[link] = [
            dict(chain(d.items(), [(source, u), (target, v), (key, k)]))
            for u, v, k, d in G.edges(keys=True, data=True)
        ]
    else:
        data[link] = [
            dict(chain(d.items(), [(source, u), (target, v)]))
            for u, v, d in G.edges(data=True)
        ]
    return data


node_link_data(graph._PGraph__ngraph)

{'directed': True,
 'nodes': [{'id': {'name': 'SourceNode',
    'pk': '759e088b-ea54-400d-9e5a-495d01e24a83'}},
  {'id': {'name': 'EventsNode',
    'pk': 'e68718d9-9322-49b2-8dfa-180eb7ba59fa',
    'processor': "{'schema': {'title': 'SimpleGroupParams', 'type': 'object', 'properties': {'event_name': {'title': 'Event Name', 'type': 'string'}, 'event_type': {'title': 'Event Type', 'default': 'group_alias', 'type': 'string'}}, 'required': ['event_name']}, 'values': {'event_name': 'add_to_cart', 'filter': <function <lambda> at 0x7f18004bc3a0>, 'event_type': 'group_alias'}}"}},
  {'id': {'name': 'EventsNode',
    'pk': '3e54c45d-e684-438a-9423-e9e1de3c1be2',
    'processor': "{'schema': {'title': 'SimpleGroupParams', 'type': 'object', 'properties': {'event_name': {'title': 'Event Name', 'type': 'string'}, 'event_type': {'title': 'Event Type', 'default': 'group_alias', 'type': 'string'}}, 'required': ['event_name']}, 'values': {'event_name': 'logout', 'filter': <function <lambda> at 0x7f17ad

In [7]:
node_link_data(graph._PGraph__ngraph)

{'directed': True,
 'nodes': [{'id': {'name': 'SourceNode',
    'pk': '759e088b-ea54-400d-9e5a-495d01e24a83'}},
  {'id': {'name': 'EventsNode',
    'pk': 'e68718d9-9322-49b2-8dfa-180eb7ba59fa',
    'processor': "{'schema': {'title': 'SimpleGroupParams', 'type': 'object', 'properties': {'event_name': {'title': 'Event Name', 'type': 'string'}, 'event_type': {'title': 'Event Type', 'default': 'group_alias', 'type': 'string'}}, 'required': ['event_name']}, 'values': {'event_name': 'add_to_cart', 'filter': <function <lambda> at 0x7f18004bc3a0>, 'event_type': 'group_alias'}}"}},
  {'id': {'name': 'EventsNode',
    'pk': '3e54c45d-e684-438a-9423-e9e1de3c1be2',
    'processor': "{'schema': {'title': 'SimpleGroupParams', 'type': 'object', 'properties': {'event_name': {'title': 'Event Name', 'type': 'string'}, 'event_type': {'title': 'Event Type', 'default': 'group_alias', 'type': 'string'}}, 'required': ['event_name']}, 'values': {'event_name': 'logout', 'filter': <function <lambda> at 0x7f17ad

In [8]:
g = graph._PGraph__ngraph

nodes = [x for x in g]
# dir(next(iter(g)))
# d = next(iter(g))
d = nodes[1]
# [dict(chain(
#     g.nodes[n].items(), [('name', n)]
# )) for n in g]
# [n for n in g]
g.nodes[d].items()

dict_items([])

In [9]:
d.processor

{'schema': {'title': 'SimpleGroupParams', 'type': 'object', 'properties': {'event_name': {'title': 'Event Name', 'type': 'string'}, 'event_type': {'title': 'Event Type', 'default': 'group_alias', 'type': 'string'}}, 'required': ['event_name']}, 'values': {'event_name': 'add_to_cart', 'filter': <function <lambda> at 0x7f18004bc3a0>, 'event_type': 'group_alias'}}