In [1]:
import numpy as np
import pandas as pd

In [2]:
!pip install retentioneering

In [3]:
from retentioneering import datasets
from retentioneering.eventstream import Eventstream
stream = datasets.load_simple_shop()

## Basic example

In [4]:
stream.sequences()

Unnamed: 0_level_0,user_id,user_id_share,count,count_share,sequence_type,user_id_sample
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
path_end,3 751,1.0,3 751,0.09,other,[921396444]
path_start,3 751,1.0,3 751,0.09,other,[302653277]
catalog,3 611,0.96,14 518,0.36,other,[539857357]
main,2 385,0.64,5 635,0.14,other,[104008042]
cart,1 924,0.51,2 842,0.07,other,[314468142]
product2,1 430,0.38,2 172,0.05,other,[205492887]
delivery_choice,1 356,0.36,1 686,0.04,other,[720173080]
product1,1 122,0.3,1 515,0.04,other,[704020228]
payment_choice,958,0.26,1 107,0.03,other,[653107020]
delivery_courier,748,0.2,834,0.02,other,[219678075]


<retentioneering.tooling.sequences.sequences.Sequences at 0x17ffcdc70>

## Tuning the arguments

In [5]:
stream\
    .split_sessions(timeout=(30, 'm'))\
    .sequences(
        ngram_range=(2, 3),
        weight_col='session_id',
        metrics=['count', 'count_share', 'paths_share'],
        threshold=['count', 1200],
        sorting=['count_share', False],
        heatmap_cols=['session_id_share'],
        sample_size=3
    )

Unnamed: 0_level_0,count,count_share,session_id_share,sequence_type,session_id_sample
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
catalog -> catalog,4 857,0.06,0.4,loop,"['736786060_1', '696093962_2', '226908074_1']"
main -> catalog,4 064,0.05,0.51,other,"['962262431_2', '206756025_2', '685067898_1']"
session_start -> main,3 768,0.04,0.58,other,"['392174907_3', '934489861_3', '536170195_3']"
path_start -> session_start,3 751,0.04,0.58,other,"['680256022_1', '110374635_1', '55520661_1']"
session_end -> path_end,3 751,0.04,0.58,other,"['256366749_2', '353256438_1', '540528050_2']"
catalog -> session_end,2 852,0.03,0.44,other,"['336287644_2', '623854494_2', '981878337_2']"
path_start -> session_start -> catalog,2 686,0.03,0.42,other,"['802760330_1', '236467681_1', '934489861_1']"
session_start -> catalog,2 686,0.03,0.42,other,"['161089315_1', '110335585_1', '686865201_1']"
session_start -> main -> catalog,2 619,0.03,0.41,other,"['835353025_1', '737390690_3', '716009752_2']"
catalog -> product2,2 172,0.03,0.27,other,"['742899356_7', '709334840_1', '171887017_1']"


<retentioneering.tooling.sequences.sequences.Sequences at 0x17f959400>

## Comparing groups

In [6]:
np.random.seed(111)
users = set(stream.to_dataframe()['user_id'])
group1 = set(np.random.choice(list(users), size=len(users)//2))
group2 = users - group1

In [7]:
stream.sequences(
    groups=[group1, group2],
    group_names=['A', 'B'],
    metrics=['paths_share', 'count_share'],
    threshold=[('user_id_share', 'delta_abs'), 0],
    sorting=[('count_share', 'delta'), False]
)

  vec_data.loc[groups[0], "group"] = group_names[0]
  vec_data.loc[groups[1], "group"] = group_names[1]


Unnamed: 0_level_0,user_id_share,user_id_share,user_id_share,user_id_share,count_share,count_share,count_share,count_share,sequence_type,user_id_sample,user_id_sample
Unnamed: 0_level_1,A,B,delta_abs,delta,A,B,delta_abs,delta,Unnamed: 9_level_1,A,B
Sequence,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
delivery_choice,0.35,0.37,0.02,0.06,0.04,0.04,0.0,0.04,other,[799913972],[333582245]
delivery_pickup,0.12,0.13,0.0,0.02,0.01,0.01,0.0,0.03,other,[805683601],[475256234]
cart,0.51,0.52,0.01,0.02,0.07,0.07,0.0,0.03,other,[274110594],[371912382]
payment_choice,0.25,0.26,0.01,0.03,0.03,0.03,0.0,0.02,other,[258837206],[358346667]
payment_done,0.17,0.18,0.01,0.06,0.02,0.02,0.0,0.02,other,[900739025],[962374537]
catalog,0.96,0.97,0.01,0.01,0.36,0.37,0.01,0.01,other,[444244255],[139276360]
product2,0.38,0.38,0.01,0.02,0.05,0.05,0.0,0.01,other,[573609710],[407735919]
path_end,1.0,1.0,0.0,0.0,0.09,0.09,0.0,0.0,other,[794982519],[372695387]
path_start,1.0,1.0,0.0,0.0,0.09,0.09,0.0,0.0,other,[410014458],[613540564]
delivery_courier,0.2,0.2,0.01,0.03,0.02,0.02,-0.0,-0.01,other,[110523244],[902698857]


<retentioneering.tooling.sequences.sequences.Sequences at 0x17fe0c610>