In [44]:
import pandas as pd
import numpy as np

import argparse
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sqlalchemy import engine

from class_lib.feature_generator import *
from class_lib.segments_preparer import *
from class_lib.graph_lib import *
from class_lib.featureprocessor import *
from class_lib.feature_minorizer import *

import os
import sqlalchemy
from dotenv import load_dotenv


from config import *
from batch_config import *

from segmentation_functools.functools_beh import *
from segmentation_functools.functools_ltv import *
from segmentation_functools.functools import *

import datetime

In [45]:
load_dotenv('.env')

True

In [46]:
host = os.getenv('EVENTS_DB_HOST')
db = os.getenv('EVENTS_DB_NAME')
user = os.getenv('EVENTS_DB_USER')
password = os.getenv('EVENTS_DB_PASSWORD')
port = os.getenv('EVENTS_DB_PORT')

connection_str = 'postgresql://{0}:{1}@{2}:{3}/{4}'.format(user, password, host, port, db)

engine = sqlalchemy.create_engine(connection_str, execution_options={"stream_results":True})



In [4]:
sessions_q = '''
select * from data.customer_profile_sessions cps
where cps.account_id = 11 and cps.session_start >= '2022-10-01' and cps.session_start < '2023-01-01'
'''
sessions = pd.read_sql(sessions_q,engine)

In [None]:
events_q = '''
select * from data.customer_profile_actions cps
where cps.account_id = 11 and cps.event_time >= '2022-10-01' and cps.event_time < '2023-01-01'
'''
actions = pd.read_sql(events_q,engine)

In [None]:
visits_q = '''
select * from data.customer_profile_visits cps
where cps.account_id = 11 and cps.event_time >= '2022-10-01' and cps.event_time < '2023-01-01'
'''
visits = pd.read_sql(visits_q,engine)

In [7]:
sessions_q = '''
select * from data.customer_profile_sessions cps
where cps.account_id = 411 and add_to_basket_count > 0
'''
sessions = pd.read_sql(sessions_q,engine)

events_q = '''
select * from data.customer_profile_actions cps
where cps.account_id = 411
'''
actions = pd.read_sql(events_q,engine)

visits_q = '''
select * from data.customer_profile_visits cps
where cps.account_id = 411 
'''
visits = pd.read_sql(visits_q,engine)

actions = actions[actions.session_id.isin(sessions.id.unique())]
visits = visits[visits.session_id.isin(sessions.id.unique())]

In [4]:
sessions = pd.read_pickle('sessions_q.pkl')
actions = pd.read_pickle('actions_q.pkl')
visits = pd.read_pickle('visits_q.pkl')

In [8]:
sessions.session_start.max()

Timestamp('2023-02-07 12:42:44.815000')

In [9]:
visits['name'] = 'page_view'

In [10]:
events_columns = actions.columns

In [11]:
events = pd.concat([actions,visits[events_columns]])

In [12]:
sessions = sessions.merge(
    events[['guest_id','session_id']].drop_duplicates(),
    left_on='id',
    right_on='session_id',
    how='left'
)

In [13]:
sessions = sessions[(sessions['session_end'] - sessions['session_start']) >= np.timedelta64(1, 's')]

In [14]:
events = events[events.session_id.isin(sessions.id)]

In [15]:
events = events.sort_values(by=['event_time'])

In [None]:
def slice_before_atb(df):
    atb_events = df[df['name'] == 'add_to_basket']
    if atb_events.shape[0] !=0:
        return df[df['number'] < atb_events.iloc[0]['number']]
    return df

events_sliced = events.groupby('guest_id').apply(slice_before_atb)

In [16]:
events['number'] = events.groupby('guest_id').cumcount()
def get_atb(x):
    temp = x[x['name'] == 'add_to_basket']
    if temp.shape[0] != 0:
        return temp.iloc[0].number
    return x.shape[0] + 1
events_sliced = events.groupby('guest_id').apply(lambda x : x[x['number'] < get_atb(x)])

In [None]:
atb_guests = events[events['name'] == 'add_to_basket'].guest_id.unique()
events['number'] = events.groupby('guest_id').cumcount()
events_sliced_atb = events[events.guest_id.isin(atb_guests)].groupby('guest_id')\
    .apply(lambda x : x[x['number'] < x[x['name'] == 'add_to_basket'].iloc[0].number])
events_sliced = events[~events.guest_id.isin(atb_guests)]

pd.concat([events_sliced_atb,events_sliced])

In [17]:
events_sliced

Unnamed: 0_level_0,Unnamed: 1_level_0,account_id,name,event_id,session_id,guest_id,tracking_id,event_time,event_type,event_data,referer,url,channel,created,updated,updated_on_conflict,number
guest_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
vf-1667934834066-tCzfXk56YVy962Yurn3TX,117745,411,page_view,MamQj4WKagitYkOXK8ViY,54135559,vf-1667934834066-tCzfXk56YVy962Yurn3TX,VF-845E1IXO,2022-11-26 11:28:23.774,default,"{'url': 'https://stiliauskodas.lt/', 'utm': {}}",,https://stiliauskodas.lt/,Direct,2022-11-26 11:55:14.703420,2022-12-27 17:25:16.704980,True,0
vf-1667934834066-tCzfXk56YVy962Yurn3TX,1016891,411,document_mouse_enter,AIo_8YmfSZ--W7BVpMxR8,54135559,vf-1667934834066-tCzfXk56YVy962Yurn3TX,VF-845E1IXO,2022-11-26 11:28:26.908,default,{},,https://stiliauskodas.lt/,Direct,2022-11-26 11:55:14.703420,,True,1
vf-1667934834066-tCzfXk56YVy962Yurn3TX,386784,411,link_click,QbOijn3X1MuMqCqom52gH,54135559,vf-1667934834066-tCzfXk56YVy962Yurn3TX,VF-845E1IXO,2022-11-26 11:28:28.615,default,"{'id': '', 'innerText': 'PAESE paakių MASKUOKL...",,https://stiliauskodas.lt/,Direct,2022-11-26 11:55:14.703420,,True,2
vf-1667934834066-tCzfXk56YVy962Yurn3TX,386785,411,document_mouse_out,plXzzYiErBZKyserxGg54,54135559,vf-1667934834066-tCzfXk56YVy962Yurn3TX,VF-845E1IXO,2022-11-26 11:28:28.615,default,{},,https://stiliauskodas.lt/,Direct,2022-11-26 11:55:14.703420,,True,3
vf-1667934834066-tCzfXk56YVy962Yurn3TX,117746,411,page_view,Bx8bSSHUB_4Uy-hJz2woO,54135559,vf-1667934834066-tCzfXk56YVy962Yurn3TX,VF-845E1IXO,2022-11-26 11:28:28.615,default,{'url': 'https://stiliauskodas.lt/maskuokliai/...,https://stiliauskodas.lt/,https://stiliauskodas.lt/maskuokliai/585-2882-...,Direct,2022-11-26 11:55:14.703420,2022-12-27 17:25:16.704980,True,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
vf-1675773763040-UgfeAwLcAVtezmA0lIr19,976568,411,link_click,U80UNwKWVagep_Hk21eBR,59007194,vf-1675773763040-UgfeAwLcAVtezmA0lIr19,VF-845E1IXO,2023-02-07 12:43:15.257,default,"{'id': '', 'url': 'https://stiliauskodas.lt/kr...",https://l.facebook.com/,https://stiliauskodas.lt/?utm_source=fb&utm_me...,Social,2023-02-07 12:47:47.747676,,False,4
vf-1675773763040-UgfeAwLcAVtezmA0lIr19,976571,411,document_mouse_enter,dGlmkvhZ-PCOYHqZOwVgR,59007194,vf-1675773763040-UgfeAwLcAVtezmA0lIr19,VF-845E1IXO,2023-02-07 12:43:18.983,default,{},https://stiliauskodas.lt/?utm_source=fb&utm_me...,https://stiliauskodas.lt/kremines-pudros/132-k...,Direct,2023-02-07 12:47:47.747676,,False,5
vf-1675773763040-UgfeAwLcAVtezmA0lIr19,976597,411,scroll_50,cMCdoVWdjOSYiW2tkTKwh,59007194,vf-1675773763040-UgfeAwLcAVtezmA0lIr19,VF-845E1IXO,2023-02-07 12:44:01.425,default,{},https://stiliauskodas.lt/?utm_source=fb&utm_me...,https://stiliauskodas.lt/kremines-pudros/132-k...,Direct,2023-02-07 12:47:47.747676,,False,6
vf-1675773763040-UgfeAwLcAVtezmA0lIr19,976639,411,scroll_75,_q9YIbD0YJPxsUUE5cLei,59007194,vf-1675773763040-UgfeAwLcAVtezmA0lIr19,VF-845E1IXO,2023-02-07 12:44:16.825,default,{},https://stiliauskodas.lt/?utm_source=fb&utm_me...,https://stiliauskodas.lt/kremines-pudros/132-k...,Direct,2023-02-07 12:47:47.747676,,False,7


In [18]:
sessions.guest_id.nunique()

2645

In [19]:
target_1 = list(events[events['name'] == 'add_to_basket'].guest_id.unique())
target_0 = list(set(events.guest_id.unique()) - set(target_1))

In [20]:
target = pd.Series(
    np.concatenate([np.ones(len(target_1)),np.zeros(len(target_0))]),
    index = target_1+ target_0
)

In [21]:
events_processor = FeatureProcessorEvents('events_before_atb')
events_sliced = events_sliced.drop(columns=['guest_id'])
events_sliced = events_processor.time_func(events_sliced.reset_index(), 'guest_id','event_time')
events_float_features = []
events_cat_features = ['name', 'channel', 'referer']
events_features = events_processor.prepare_aggregated_features(events_sliced,
                                            'guest_id',
                                            events_float_features,
                                            events_cat_features,
                                            []
                                                )

In [22]:
NAN_AGG_TYPE_AEB = 'ignore'
events_features_nan = events_processor.prepare_aggregated_features_nan(
    events_sliced,
    'guest_id',
    ['interval_between'],
    interval_nan_type = NAN_AGG_TYPE_AEB
)       

In [23]:
first_sessions = sessions[sessions.id.isin(events_sliced.session_id)]

In [24]:
session_processor = FeatureProcessorSessions('sessions_before_atb')
first_sessions = session_processor.make_session_day_part(first_sessions)

In [25]:
first_sessions = session_processor.time_func(
                                        first_sessions, 
                                        'guest_id',
                                        ['session_start','session_end']
                                        )

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  return df.groupby(groupby_id).apply(self.get_intervals, field).reset_index(drop=True)


In [26]:
sessions_float_features = ['session_length','actions_count','page_views_count','attention_score']
sessions_cat_features = ['browser_family', 'os_family', 'device_family',
                        'device_brand', 'device_model', 'channel_session', 'device_type',
                        'session_day_part','time_to_link_click']

In [27]:
link_clicks = events_sliced[events_sliced['name'] =='link_click'].groupby('session_id').head(1)
first_sessions = first_sessions.merge(
    link_clicks[['session_id','event_time']],
    left_on='id',
    right_on='session_id',
    how='left'
)
first_sessions['time_to_link_click'] = (first_sessions['event_time'] - first_sessions['session_start']).dt.total_seconds()
first_sessions.loc[first_sessions['time_to_link_click'] < 0, 'time_to_link_click'] = 0
first_sessions['time_to_link_click'] =  pd.qcut(first_sessions.time_to_link_click,q=4).astype('str')

In [28]:
document_mouse_enter = events_sliced[events_sliced['name'] == 'document_mouse_enter'].groupby('session_id').size()
document_mouse_out = events_sliced[events_sliced['name'] == 'document_mouse_out'].groupby('session_id').size()
dme_dmo = document_mouse_enter.rename('document_mouse_enter').to_frame().join(document_mouse_out.rename('document_mouse_out'),how='outer').fillna(0)
dme_dmo['attention_score'] = (dme_dmo['document_mouse_out'] - dme_dmo['document_mouse_enter']) * dme_dmo.max(axis=1)
first_sessions = first_sessions.merge(
    dme_dmo['attention_score'],
    left_on=['id'],
    right_index=True,
    how='left'
)

In [29]:
page_views_count = events_sliced[events_sliced['name'] =='page_view'].groupby('session_id').size()
actions_count = events_sliced[events_sliced['name'] != 'page_view'].groupby('session_id').size()
first_atb_event_time = events[events['name'] == 'add_to_basket'].groupby('session_id').head(1)['event_time']

In [30]:
first_sessions = first_sessions.drop(columns=["actions_count",'page_views_count'])

In [31]:
first_sessions = first_sessions.set_index('id')

In [32]:
first_sessions = first_sessions.join(
    page_views_count.rename('page_views_count'),
    how='left'
)
first_sessions = first_sessions.join(
    actions_count.rename('actions_count'),
    how='left'
)
first_sessions = first_sessions.join(
    first_atb_event_time.rename('first_atb_event_time'),
    how='left'
)

In [33]:

atb_sessions = first_sessions.loc[~first_sessions['first_atb_event_time'].isna()]
first_sessions.loc[~first_sessions['first_atb_event_time'].isna(),'session_length'] =\
atb_sessions['first_atb_event_time'] - atb_sessions['session_start']

In [34]:
first_sessions = first_sessions.rename(columns={'channel' : 'channel_session'})

In [35]:
# sessions_features = session_processor.prepare_aggregated_features(
#         first_sessions,
#         'guest_id',
#         [],
#         ['time_to_link_click']
#     )


In [36]:
sessions_features = session_processor.prepare_aggregated_features(
        first_sessions,
        'guest_id',
        sessions_float_features,
        sessions_cat_features
    )

In [37]:
sessions_features.head()

Unnamed: 0,guest_id,sessions_before_atb_num_records,sessions_before_atb_browser_family_mf,sessions_before_atb_browser_family_weight_mf,sessions_before_atb_browser_family_first,sessions_before_atb_browser_family_last,sessions_before_atb_os_family_mf,sessions_before_atb_os_family_weight_mf,sessions_before_atb_os_family_first,sessions_before_atb_os_family_last,...,sessions_before_atb_actions_count_amin,sessions_before_atb_actions_count_amax,sessions_before_atb_page_views_count_mean,sessions_before_atb_page_views_count_sum,sessions_before_atb_page_views_count_amin,sessions_before_atb_page_views_count_amax,sessions_before_atb_attention_score_mean,sessions_before_atb_attention_score_sum,sessions_before_atb_attention_score_amin,sessions_before_atb_attention_score_amax
0,vf-1675108088917-S2jw_eNCGrI6CWy2MHRzt,2,Mobile Safari,1.0,Mobile Safari,Mobile Safari,iOS,1.0,iOS,iOS,...,4.0,4.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0
1,vf-1673085473310-iyLaPpxYgbg30dpnybAmY,1,Chrome Mobile iOS,1.0,Chrome Mobile iOS,Chrome Mobile iOS,iOS,1.0,iOS,iOS,...,17.0,17.0,4.0,4.0,4.0,4.0,-4.0,-4.0,-4.0,-4.0
2,vf-1673023561058-jS7wB7uWtQpnev8FQ7Y2t,1,MiuiBrowser,1.0,MiuiBrowser,MiuiBrowser,Android,1.0,Android,Android,...,15.0,15.0,3.0,3.0,3.0,3.0,-3.0,-3.0,-3.0,-3.0
3,vf-1673412270423-IRZy3BsnUkzvOV6Ej0mpG,1,Google,1.0,Google,Google,iOS,1.0,iOS,iOS,...,21.0,21.0,7.0,7.0,7.0,7.0,-7.0,-7.0,-7.0,-7.0
4,vf-1668495484935-F_nb3MLt-x9veXbxFIbZ6,1,Chrome,1.0,Chrome,Chrome,Windows,1.0,Windows,Windows,...,9.0,9.0,4.0,4.0,4.0,4.0,-2.0,-2.0,-2.0,-2.0


In [38]:
events_features.head()

Unnamed: 0,guest_id,events_before_atb_num_records,events_before_atb_name_mf,events_before_atb_name_weight_mf,events_before_atb_name_first,events_before_atb_name_last,events_before_atb_channel_mf,events_before_atb_channel_weight_mf,events_before_atb_channel_first,events_before_atb_channel_last,events_before_atb_referer_mf,events_before_atb_referer_weight_mf,events_before_atb_referer_first,events_before_atb_referer_last
0,vf-1672833385967-5PAloUB47gz5TEAtJElJI,200,link_click,0.265,page_view,button_click,Direct,0.97,Organic Search,Direct,https://stiliauskodas.lt/,0.64,https://www.google.lt,https://stiliauskodas.lt/paieska?controller=se...
1,vf-1674027833615-uIHlSlIgOeL7tck6JFpIJ,200,document_mouse_out,0.27,page_view,button_click,Direct,0.825,Email,Direct,https://stiliauskodas.lt/gamintojas/3-paese?ut...,0.19,,https://stiliauskodas.lt/18-maskuokliai
2,vf-1672177985570-5EijFIWVVQ2BxqG9AA98i,197,document_mouse_enter,0.35533,page_view,document_mouse_enter,Direct,0.918782,Organic Search,Direct,,0.807107,https://www.google.com/,
3,vf-1667976978298-ygRVvQwrySuRS2ytKU0so,169,document_mouse_out,0.360947,page_view,document_mouse_out,Direct,1.0,Direct,Direct,https://stiliauskodas.lt/8-lupoms,0.260355,https://stiliauskodas.lt/10-veidui,https://stiliauskodas.lt/19-blakstienu-tusai
4,vf-1670491509273-sxXZaWfvZb2_eYZvn7ig-,163,document_mouse_enter,0.319018,page_view,document_mouse_enter,Direct,0.852761,Organic Search,Direct,https://stiliauskodas.lt/paieska?controller=se...,0.208589,https://www.google.com/,https://stiliauskodas.lt/paieska?controller=se...


In [39]:
events_features_nan.head()

Unnamed: 0,guest_id,events_before_atb_interval_between_mean,events_before_atb_interval_between_sum,events_before_atb_interval_between_amin,events_before_atb_interval_between_amax
0,vf-1667934834066-tCzfXk56YVy962Yurn3TX,1.6488,8.244,0.0,3.403
1,vf-1667934959782-Yg78p3P5JiTHvwzTm1gsY,3.082091,33.903,0.0,12.339
2,vf-1667937566303-D6t4yHm2NosK1ZfJ0koEh,5.437378,201.183,0.0,102.384
3,vf-1667943026765-xy7m7NV3JFAz8Yi8-4T59,5.461029,185.675,0.0,43.978
4,vf-1667967934069-7BuSQjw1s9Qcwn5q8FGs-,4.218417,50.621,0.0,18.279


In [40]:
# session_features_nan.head()

NameError: name 'session_features_nan' is not defined

In [41]:
merged = sessions_features.merge(events_features, left_on='guest_id', right_on='guest_id')\
.merge(events_features_nan, left_on='guest_id', right_on='guest_id')\
# .merge(session_features_nan, left_on='guest_id', right_on='guest_id')

In [42]:
merged.to_pickle('features_merged_411.pkl')

In [43]:
merged.columns.tolist()

['guest_id',
 'sessions_before_atb_num_records',
 'sessions_before_atb_browser_family_mf',
 'sessions_before_atb_browser_family_weight_mf',
 'sessions_before_atb_browser_family_first',
 'sessions_before_atb_browser_family_last',
 'sessions_before_atb_os_family_mf',
 'sessions_before_atb_os_family_weight_mf',
 'sessions_before_atb_os_family_first',
 'sessions_before_atb_os_family_last',
 'sessions_before_atb_device_family_mf',
 'sessions_before_atb_device_family_weight_mf',
 'sessions_before_atb_device_family_first',
 'sessions_before_atb_device_family_last',
 'sessions_before_atb_device_brand_mf',
 'sessions_before_atb_device_brand_weight_mf',
 'sessions_before_atb_device_brand_first',
 'sessions_before_atb_device_brand_last',
 'sessions_before_atb_device_model_mf',
 'sessions_before_atb_device_model_weight_mf',
 'sessions_before_atb_device_model_first',
 'sessions_before_atb_device_model_last',
 'sessions_before_atb_channel_session_mf',
 'sessions_before_atb_channel_session_weight_mf'

In [None]:
session_processor.cat_feature_list

In [None]:
session_features_nan = session_processor.prepare_aggregated_features_nan(
    first_sessions,
    'guest_id',
    ['interval_between'],
    interval_nan_type = NAN_AGG_TYPE_AEB
)            

In [None]:
features_dict = {
    'events_float' : events_processor.float_feature_list,
    'events_cat' : events_processor.cat_feature_list,
    'sessions_float' : session_processor.float_feature_list,
    'sessions_cat' : session_processor.cat_feature_list
}

In [None]:
first_sessions_processed = session_processor.make_quntilies(first_sessions.reset_index(), 
                                                                'id', 
                                                                ['time_to_link_click']
                                                                )

In [None]:
# ( count(document_mouse_out) - count(document_mouse_enter) ) * MAX[count(document_mouse_out),count(document_mouse_enter)]



In [None]:
first_sessions = first_sessions.merge(
    dme_dmo['attention_score'],
    left_on=['id'],
    right_index=True,
    how='left'
)

In [None]:
def make_profile(df,group_id,column):
    freq_df = df.groupby([group_id])[column].value_counts().unstack()
    pct_df_IP = freq_df.divide(freq_df.sum(axis=1), axis=0)
    return pct_df_IP.fillna(0)

In [None]:
CAP = make_profile(first_sessions,'guest_id','channel_session')

In [None]:
features_dict['sessions_float'] = features_dict['sessions_float']

In [None]:
features_dict

In [None]:
from datetime import date, datetime
from pandas import DataFrame

from rfm.GeneralizedRFM.calculation_classes import RecencyScore, FrequencyScore, BaseScore
from rfm.GeneralizedRFM.exceptions import EmptyData
import pandas as pd

class GenerilizedRFM():
    def __init__(self,today_datetime : datetime, today_datetime_delta : datetime, range_datetime : datetime, range_datetime_delta: datetime ):
        self.today_datetime = today_datetime
        self.today_datetime_delta = today_datetime_delta
        self.range_datetime = range_datetime
        self.range_datetime_delta = range_datetime_delta
        self.groupby_id = 'customer_profile_id'
        self.datetime_column = 'purchased_at'
    
    
    def _make_transitions(self, df: DataFrame, df_delta: DataFrame):
        united_items = list(set(df.index) & set(df_delta.index))
        united_df = pd.concat(
            [
                df[df.index.isin(united_items)],
                df_delta[df_delta.index.isin(united_items)]
            ],
            axis=1
        )
        columns = ['recency_score','recency_score_delta','frequency_score','frequency_score_delta', 'segment_id','segment_delta_id']
        united_df = united_df[columns]

        united_df = united_df[(united_df['recency_score'] != united_df['recency_score_delta']) |
                     (united_df['frequency_score'] != united_df['frequency_score_delta'])]

        transitions = united_df.groupby(columns).size().reset_index(name='counts')
        return transitions
    
    def get_rated_transitions(self, data, T,t):
        
        
        params = [self.today_datetime, self.today_datetime_delta, self.groupby_id,self.datetime_column,self.range_datetime,self.range_datetime_delta]
        bs = BaseScore(*params)
        
        current_df, current_zero_df , current_df_delta, current_zero_df_delta = bs.split_datasets(data)
        if current_df.shape[0] == 0 or current_df_delta.shape[0] == 0:
            raise EmptyData(current_df.shape[0], current_df_delta.shape[0])


        current_zero_df = bs.set_zero(current_zero_df)
        current_zero_df_delta = bs.set_zero(current_zero_df_delta,delta=True)
        
        rc = RecencyScore(*params)
        rc.set_data(current_df, current_df_delta)
        rc_df,rc_df_delta = rc.calculate()
        rc_borders, _ = rc.get_borders()
        
        fc = FrequencyScore(*params, T = T, t = t)
        fc.set_data(current_df, current_df_delta)
        fc_df,fc_df_delta = fc.calculate()
        
        rf_df = pd.concat([rc_df,fc_df],axis=1)
        rf_delta_df = pd.concat([rc_df_delta,fc_df_delta],axis=1)
        
        current_zero_df = current_zero_df[current_zero_df.index.isin(list(set(current_zero_df.index) - set(rf_df.index)))]
        current_zero_df_delta = current_zero_df_delta[current_zero_df_delta.index.isin(list(set(current_zero_df_delta.index) - set(rf_delta_df.index)))]
        
        rf_df = pd.concat([rf_df, current_zero_df])
        rf_delta_df = pd.concat([rf_delta_df, current_zero_df_delta])
        
        rf_df['segment_id'] = rf_df['recency_score'] * 10 + rf_df['frequency_score']
        rf_delta_df['segment_delta_id'] = rf_delta_df['recency_score_delta'] * 10 + rf_delta_df['frequency_score_delta'] 
        
        transitions = self._make_transitions(rf_df,rf_delta_df)
        return rf_df, rc_borders, transitions

In [None]:
T = 30  # whole period
time_delay_days = 15  #
t = 5  # ? time_delay_days or t
today = datetime.datetime.now()
today_str = str(today.date())  # 2022-04-04

today_delta = (today - datetime.timedelta(days=time_delay_days))  # 2022-03-05
today_delta_str = str(today_delta.date())

one_year_ago = (today - datetime.timedelta(days=T))  # 2021-04-09

one_year_ago_delta = (today - datetime.timedelta(days=T + time_delay_days))
one_year_ago_delta_str = str(one_year_ago_delta.date())  # 2021-03-10

In [None]:
grfm = GenerilizedRFM(
    today,
    today_delta,
    one_year_ago,
    one_year_ago_delta
)
grfm.datetime_column = 'session_start'
grfm.groupby_id = 'guest_id'
mark, recency_borders, trans = grfm.get_rated_transitions(
    first_sessions,
    T,
    t
)

In [None]:
[f"before_atb_RFM_{col}" for col in mark.loc[:,'recency' : 'frequency_score'].columns]

In [None]:
features_dict['rfm_cat'] = ['before_atb_RFM_frequency_score','before_atb_RFM_recency_score']

features_dict['rfm_float'] = ['before_atb_RFM_recency','before_atb_RFM_frequency', 'before_atb_RFM_days']


In [None]:
features_dict['sessions_float'] = features_dict['sessions_float'][:33]

In [None]:
features_dict['profiles'] = [f'profiles_before_atb_CIP_{col} 'for col in CAP.columns]

In [None]:
features_dict

In [None]:
import json

with open('features_before_atb.json','w') as f:
    json.dump(features_dict,f,indent=5)
