In [6]:
import sys
sys.path.insert(0, '..')

In [7]:
import pandas as pd

# download_url = 'https://drive.google.com/uc?id=1tY-4xg6m_dv6IaVPIcd4oC1bK1lW5Tr9&export=download&confirm=t'
# df0 = pd.read_csv(download_url, compression='gzip')
df0 = pd.DataFrame(data=[], columns=['event', 'timestamp', 'user_id'])
df0

Unnamed: 0,event,timestamp,user_id


In [41]:
from src.eventstream.schema import RawDataSchema, EventstreamSchema
from src.eventstream.eventstream import Eventstream
from src.graph.p_graph import PGraph, EventsNode
from src.data_processors_lib import CollapseLoops, CollapseLoopsParams
from src.data_processors_lib import DeleteUsersByPathLength, DeleteUsersByPathLengthParams
from src.data_processors_lib import FilterEvents, FilterEventsParams
from src.data_processors_lib import GroupEvents, GroupEventsParams
from src.data_processors_lib import NewUsersEvents, NewUsersParams
from src.data_processors_lib import SplitSessions, SplitSessionsParams
from src.data_processors_lib import StartEndEvents, StartEndEventsParams
from src.data_processors_lib import TruncatePath, TruncatePathParams
from src.data_processors_lib import TruncatedEvents, TruncatedEventsParams
from src.graph.p_graph import PGraph, EventsNode
import inspect


raw_data_schema = RawDataSchema(
    event_name='event', 
    event_timestamp='timestamp', 
    user_id='user_id'
)

stream = Eventstream(
    raw_data=df0,
    raw_data_schema=raw_data_schema,
    schema=EventstreamSchema()
)

graph = PGraph(source_stream=stream)

TARGET_EVENT = 'finances/deposit/<payment_name>/success'

def users_with_target_event(df, schema) -> pd.DataFrame:
    target_users = df[df['event_name'] == TARGET_EVENT]['user_id'].unique()
    return df['user_id'].isin(target_users)

def first_session_filter(df, schema)  -> pd.DataFrame:
    return df['session_id'].str.endswith('_1')

def new_and_not_truncated_users(df, schema)  -> pd.DataFrame:
    truncated_users = df[(df['event_name'] == 'truncated_right')]['user_id'].unique()
    new_users = df[(df['event_name'] == 'new_user')]['user_id'].unique()
    target_users = np.setdiff1d(new_users, truncated_users)
    return df['user_id'].isin(target_users)

def first_session_filter(df, schema)  -> pd.DataFrame:
    return df['session_id'].str.endswith('_1')

node0 = EventsNode(CollapseLoops(params=CollapseLoopsParams(**{})))
node1 = EventsNode(StartEndEvents(params=StartEndEventsParams(**{})))
node2 = EventsNode(NewUsersEvents(params=NewUsersParams(new_users_list="all")))
node3 = EventsNode(TruncatedEvents(params=TruncatedEventsParams(right_truncated_cutoff=(12, 'D'))))
node4 = EventsNode(FilterEvents(params=FilterEventsParams(func=new_and_not_truncated_users)))
node5 = EventsNode(SplitSessions(params=SplitSessionsParams(
    session_cutoff=(1, 'h'),
    session_col='session_id'
)))
node6 = EventsNode(FilterEvents(params=FilterEventsParams(func=first_session_filter)))

node7 = EventsNode(FilterEvents(params=FilterEventsParams(func=users_with_target_event)))
node8 = EventsNode(TruncatePath(params=TruncatePathParams(drop_after=TARGET_EVENT)))
node9 = EventsNode(FilterEvents(params=FilterEventsParams(func=first_session_filter)))

graph.add_node(node=node0, parents=[graph.root])
graph.add_node(node=node1, parents=[graph.root])
graph.add_node(node=node2, parents=[node1])
graph.add_node(node=node3, parents=[node2])
graph.add_node(node=node4, parents=[node3])
graph.add_node(node=node5, parents=[node4])

graph.add_node(node=node6, parents=[node5])
graph.add_node(node=node7, parents=[node6])

graph.add_node(node=node8, parents=[node5])
graph.add_node(node=node9, parents=[node8])


graph_data = graph.export({})
graph_data["nodes"][1]["processor"]["values"]["suffix"] = "invalid_suffix"
graph_data["nodes"][1]["processor"]["values"]["timestamp_aggregation_type"] = "invalid_timestamp_aggregation_type"

errrr = None
try:
    graph._set_graph_handler(graph_data)
except Exception as err:
    errrr = err



In [13]:
exported = graph.export({})

graph._set_graph_handler(exported)

graph.export({})

{'name': 'SourceNode', 'pk': 'eb8879a6-dd75-401f-959f-52988d13ae9c'}
{'name': 'EventsNode', 'pk': '1a2fd3fd-1e33-49fd-892a-8cfc5fa892d9'}
{'name': 'EventsNode', 'pk': 'ac8b8644-8d32-41aa-80ff-f76d18f6e50f'}
{'name': 'EventsNode', 'pk': '5ef34580-c8d1-40e1-89e5-e6ad4ac5f459'}
{'name': 'EventsNode', 'pk': 'd13b0b7e-5fcf-4391-9890-29e49d3bda1d'}
{'name': 'EventsNode', 'pk': '7a18fc9a-e53d-4545-803f-5265851c786c'}
{'name': 'EventsNode', 'pk': '7b674f3a-deea-4a4e-8d21-7148c5fc8329'}
{'name': 'EventsNode', 'pk': 'f228fd08-b961-44bb-83ba-88b481ad7ee9'}
{'name': 'EventsNode', 'pk': 'f2d6a105-20a8-48bd-8ac6-3c7891b20322'}
{'name': 'EventsNode', 'pk': '0ffc6e7c-c866-4da4-9e16-eae7b2b66a36'}
{'name': 'EventsNode', 'pk': '0955f9be-698a-4bf1-b676-8baee0102a1a'}
<class 'src.graph.nodes.SourceNode'>
<class 'src.graph.nodes.EventsNode'>
<class 'src.graph.nodes.EventsNode'>
<class 'src.graph.nodes.EventsNode'>
<class 'src.graph.nodes.EventsNode'>
<class 'src.graph.nodes.EventsNode'>
<class 'src.graph.n

{'directed': True,
 'nodes': [{'name': 'SourceNode',
   'pk': 'eb8879a6-dd75-401f-959f-52988d13ae9c'},
  {'name': 'EventsNode',
   'pk': '1a2fd3fd-1e33-49fd-892a-8cfc5fa892d9',
   'processor': {'values': {'suffix': 'loop',
     'timestamp_aggregation_type': 'max'},
    'name': 'CollapseLoops'}},
  {'name': 'EventsNode',
   'pk': 'ac8b8644-8d32-41aa-80ff-f76d18f6e50f',
   'processor': {'values': {}, 'name': 'StartEndEvents'}},
  {'name': 'EventsNode',
   'pk': '5ef34580-c8d1-40e1-89e5-e6ad4ac5f459',
   'processor': {'values': {'new_users_list': 'all'},
    'name': 'NewUsersEvents'}},
  {'name': 'EventsNode',
   'pk': 'd13b0b7e-5fcf-4391-9890-29e49d3bda1d',
   'processor': {'values': {'left_truncated_cutoff': None,
     'right_truncated_cutoff': (12.0, 'D')},
    'name': 'TruncatedEvents'}},
  {'name': 'EventsNode',
   'pk': '7a18fc9a-e53d-4545-803f-5265851c786c',
   'processor': {'values': {'func': ''}, 'name': 'FilterEvents'}},
  {'name': 'EventsNode',
   'pk': '7b674f3a-deea-4a4e-8d21

In [42]:
extracted_nodes = []
for node in graph._ngraph:
  extracted_nodes.append(node)

In [24]:
graph.export({})

{'directed': True,
 'nodes': [{'name': 'SourceNode',
   'pk': 'eb8879a6-dd75-401f-959f-52988d13ae9c'},
  {'name': 'EventsNode',
   'pk': '1a2fd3fd-1e33-49fd-892a-8cfc5fa892d9',
   'processor': {'values': {'suffix': 'loop',
     'timestamp_aggregation_type': 'max'},
    'name': 'CollapseLoops'}},
  {'name': 'EventsNode',
   'pk': 'ac8b8644-8d32-41aa-80ff-f76d18f6e50f',
   'processor': {'values': {}, 'name': 'StartEndEvents'}},
  {'name': 'EventsNode',
   'pk': '5ef34580-c8d1-40e1-89e5-e6ad4ac5f459',
   'processor': {'values': {'new_users_list': 'all'},
    'name': 'NewUsersEvents'}},
  {'name': 'EventsNode',
   'pk': 'd13b0b7e-5fcf-4391-9890-29e49d3bda1d',
   'processor': {'values': {'left_truncated_cutoff': None,
     'right_truncated_cutoff': (12.0, 'D')},
    'name': 'TruncatedEvents'}},
  {'name': 'EventsNode',
   'pk': '7a18fc9a-e53d-4545-803f-5265851c786c',
   'processor': {'values': {'func': ''}, 'name': 'FilterEvents'}},
  {'name': 'EventsNode',
   'pk': '7b674f3a-deea-4a4e-8d21

In [25]:
from __future__ import annotations

import inspect
import types
from dataclasses import dataclass, field
from typing import Any, Callable, Type, Union

from src.constants import DATETIME_UNITS_LIST
from src.exceptions.widget import ParseReteFuncError

def _serialize(value: Callable) -> str:
    try:
        code = inspect.getsource(value)
        return code
    except OSError:
        return ""

def _parse(value: str) -> Callable:  # type: ignore
    try:
        code_obj = compile(value, "<string>", "exec")
    except:
        raise ParseReteFuncError("parsing error. You must implement a python function here")

    new_func_type = None

    for i in code_obj.co_consts:
        try:
            new_func_type = types.FunctionType(i, {})
        except Exception as err:
            continue

    if new_func_type is None:
        raise ParseReteFuncError("parsing error. You must implement a python function here")

    return new_func_type

In [72]:
def first_session_filter(df, schema)  -> pd.DataFrame:
    return df['session_id'].str.endswith('_1')

_parse(_serialize(first_session_filter))


<function first_session_filter(df, schema)>