In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
from retentioneering.eventstream.schema import RawDataSchema, EventstreamSchema
from retentioneering.eventstream.eventstream import Eventstream

import pandas as pd

df0 = pd.read_csv('./sample.gz', compression='gzip')

raw_data_schema = RawDataSchema(
    event_name='event', 
    event_timestamp='timestamp', 
    user_id='user_id'
)

stream = Eventstream(
    raw_data=df0,
    raw_data_schema=raw_data_schema,
    schema=EventstreamSchema(),
    user_sample_size=500,
)

  params_schema: dict[str, Any] = cls.schema()


In [3]:
from retentioneering.graph.p_graph import PGraph, EventsNode, MergeNode
from retentioneering.data_processors_lib import CollapseLoops, CollapseLoopsParams
from retentioneering.data_processors_lib import DeleteUsersByPathLength, DeleteUsersByPathLengthParams
from retentioneering.data_processors_lib import FilterEvents, FilterEventsParams
from retentioneering.data_processors_lib import GroupEvents, GroupEventsParams
from retentioneering.data_processors_lib import NewUsersEvents, NewUsersParams
from retentioneering.data_processors_lib import SplitSessions, SplitSessionsParams
from retentioneering.data_processors_lib import StartEndEvents, StartEndEventsParams
from retentioneering.data_processors_lib import TruncatePath, TruncatePathParams
from retentioneering.data_processors_lib import TruncatedEvents, TruncatedEventsParams
from retentioneering.graph.p_graph import PGraph, EventsNode


TARGET_EVENT = 'finances/deposit/<payment_name>/success'

def users_with_target_event(df, schema) -> pd.DataFrame:
    event_name = schema.event_name
    user_id = schema.user_id
    
    target_users = df[df[event_name] == TARGET_EVENT][user_id].unique()
    return df[user_id].isin(target_users)

def first_session_filter(df, schema)  -> pd.DataFrame:
    return df['session_id'].str.endswith('_1')

def new_and_not_truncated_users(df, schema)  -> pd.DataFrame:
    import numpy as np
    event_name = schema.event_name
    user_id = schema.user_id

    truncated_users = df[(df[event_name] == 'truncated_right')][user_id].unique()
    new_users = df[(df[event_name] == 'new_user')][user_id].unique()
    target_users = np.setdiff1d(new_users, truncated_users)
    return df[user_id].isin(target_users)


def create_graph():
    graph = PGraph(source_stream=stream)

    node0 = EventsNode(CollapseLoops(params=CollapseLoopsParams(**{})))
    node1 = EventsNode(StartEndEvents(params=StartEndEventsParams(**{})))
    node2 = EventsNode(NewUsersEvents(params=NewUsersParams(new_users_list="all")))
    node3 = EventsNode(TruncatedEvents(params=TruncatedEventsParams(right_truncated_cutoff=(12, 'D'))))
    node4 = EventsNode(FilterEvents(params=FilterEventsParams(func=new_and_not_truncated_users)))
    node5 = EventsNode(SplitSessions(params=SplitSessionsParams(
        session_cutoff=(1, 'h'),
        session_col='session_id'
    )))
    node6 = EventsNode(FilterEvents(params=FilterEventsParams(func=first_session_filter)))

    node7 = EventsNode(FilterEvents(params=FilterEventsParams(func=users_with_target_event)))
    node8 = EventsNode(TruncatePath(params=TruncatePathParams(drop_after=TARGET_EVENT)))
    node9 = EventsNode(FilterEvents(params=FilterEventsParams(func=first_session_filter)))

    graph.add_node(node=node0, parents=[graph.root])
    graph.add_node(node=node1, parents=[graph.root])
    graph.add_node(node=node2, parents=[node1])
    graph.add_node(node=node3, parents=[node2])
    graph.add_node(node=node4, parents=[node3])
    graph.add_node(node=node5, parents=[node4])

    graph.add_node(node=node6, parents=[node5])
    graph.add_node(node=node7, parents=[node6])

    graph.add_node(node=node8, parents=[node5])
    graph.add_node(node=node9, parents=[node8])

    return graph




In [6]:
graph = create_graph()
graph.display()

In [14]:
graph.combine_result.transition_graph()

<src.transition_graph.transition_graph.TransitionGraph at 0x29a2683a0>