In [1]:
import pandas as pd
import datetime as dt

In [2]:
obj = pd.read_pickle('data/transaction_and_features_2.pkl')
obj['purch_date'] = pd.to_datetime(obj['purch_date']).dt.date
obj = obj[obj.is_purchase==1]

train = obj[obj.purch_date<=dt.date(2021,11,4)]
test = obj[obj.purch_date>dt.date(2021,11,4)]

In [3]:
def postprocessing(candidates, target, months, lastnext=''):
    candidates = candidates.rename(columns={
        'id': 'user_id', 
        'item_next': target,
    })
    candidates = candidates.dropna(subset=[target])
    candidates = candidates.sort_values(by=['user_id', 'score'])
    candidates = candidates.drop_duplicates(subset=['user_id', target], keep='first')
    candidates['rnk'] = candidates.groupby('user_id')['score'].rank(ascending=False)
    candidates.to_parquet(f'apriori_output/target_{target}_{lastnext}{months}.par')

## 2 Months test

In [4]:
rule1_2m = pd.read_csv('apriori_input/rules1_lift.csv')
rule2_2m = pd.read_csv('apriori_input/rules2_lift.csv')
rule3_2m = pd.read_csv('apriori_input/rules3_lift.csv')
rule4_2m = pd.read_csv('apriori_input/rules4_lift.csv')

### Таргет - item_id по id_check_unic

In [5]:
target_item_id = test[['id', 'purch_date', 'item']].merge(rule1_2m[rule1_2m.lift>3][['antecedents', 'consequents', 'lift']], how='left', left_on='item', right_on='antecedents')
target_item_id = target_item_id[['id', 'purch_date', 'item', 'consequents', 'lift']]
target_item_id.columns = ['id', 'purch_date', 'item', 'item_next', 'score']
target_item_id = target_item_id.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()

target_item_id = test[['id', 'purch_date']].drop_duplicates().merge(target_item_id, how='left', on=['id', 'purch_date'])

In [6]:
target = 'item_id'
months = 2
postprocessing(target_item_id, target, months)

### Таргет - item_id по id_check_unic_lastnext

In [7]:
target_item_id_lastnext = test[['id', 'id_check_unique', 'purch_date', 'channel', 'item']].merge(rule3_2m[rule3_2m.lift>3][['antecedents', 'consequents', 'lift']], 
                                                                                                 how='left', left_on='item', right_on='antecedents')
target_item_id_lastnext = target_item_id_lastnext[['id', 'purch_date', 'item', 'consequents', 'lift']]
target_item_id_lastnext.columns = ['id', 'purch_date', 'item', 'item_next', 'score']

target_item_id_lastnext = target_item_id_lastnext.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_item_id_lastnext = test[['id', 'purch_date']].drop_duplicates().merge(target_item_id_lastnext, 
                                                                             how='left', on=['id', 'purch_date'])

In [8]:
target = 'item_id'
months = 2
postprocessing(target_item_id_lastnext, target, months, lastnext='lastnext_')

### Таргет - category_id по id_check_unic

In [9]:
target_category_id = test[['id', 'id_check_unique', 'purch_date', 'channel', 'category']].merge(rule2_2m[rule2_2m.lift>3][['antecedents', 'consequents', 'lift']],
                                                                                                how='left', left_on='category', right_on='antecedents')
target_category_id = target_category_id[['id', 'purch_date', 'category', 'consequents', 'lift']]
target_category_id.columns =  ['id', 'purch_date', 'item', 'item_next', 'score']

target_category_id = target_category_id.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_category_id = test[['id', 'purch_date']].drop_duplicates().merge(target_category_id, how='left', on=['id', 'purch_date'])

In [10]:
target = 'category_id'
months = 2
postprocessing(target_category_id, target, months)

### Таргет - category_id по id_check_unic_lastnext

In [11]:
target_category_id_lastnext = test[['id', 'id_check_unique', 'purch_date', 'channel', 'category']].merge(rule4_2m[rule4_2m.lift>3][['antecedents', 'consequents', 'lift']], 
                                                                                                         how='left', left_on='category', right_on='antecedents')
target_category_id_lastnext = target_category_id_lastnext[['id', 'purch_date', 'category', 'consequents', 'lift']]
target_category_id_lastnext.columns = ['id', 'purch_date', 'item', 'item_next', 'score']

target_category_id_lastnext = target_category_id_lastnext.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_category_id_lastnext = test[['id', 'purch_date']].drop_duplicates().merge(target_category_id_lastnext, how='left', on=['id', 'purch_date'])

In [12]:
target = 'category_id'
months = 2
postprocessing(target_category_id_lastnext, target, months, lastnext='lastnext_')

## 6 Months test

In [13]:
test_months = 6
max_date = obj['purch_date'].max()
test_6m = obj[obj['purch_date'] >= max_date - pd.Timedelta(weeks=test_months * 4)]

In [14]:
rule1_6m = pd.read_csv('apriori_input/rules1_lift_6m.csv')
rule2_6m = pd.read_csv('apriori_input/rules2_lift_6m.csv')
rule3_6m = pd.read_csv('apriori_input/rules3_lift_6m.csv')
rule4_6m = pd.read_csv('apriori_input/rules4_lift_6m.csv')

### Таргет - item_id по id_check_unic

In [15]:
target_item_id = test[['id', 'purch_date', 'item']].merge(rule1_6m[rule1_6m.lift>3][['antecedents', 'consequents', 'lift']], 
                                                          how='left', left_on='item', right_on='antecedents')
target_item_id = target_item_id[['id', 'purch_date', 'item', 'consequents', 'lift']]
target_item_id.columns = ['id', 'purch_date', 'item','item_next', 'score']

target_item_id = target_item_id.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_item_id = test[['id', 'purch_date']].drop_duplicates().merge(target_item_id, how='left', on=['id', 'purch_date'])

In [16]:
target = 'item_id'
months = 6
postprocessing(target_item_id, target, months)

### Таргет - item_id по id_check_unic_lastnext

In [17]:
target_item_id_lastnext = test[['id', 'id_check_unique', 'purch_date', 'channel', 'item']].merge(rule3_6m[rule3_6m.lift>3][['antecedents', 'consequents', 'lift']], 
                                                                                                 how='left', left_on='item', right_on='antecedents')
target_item_id_lastnext = target_item_id_lastnext[['id', 'purch_date', 'item', 'consequents', 'lift']]
target_item_id_lastnext.columns = ['id', 'purch_date', 'item','item_next', 'score']

target_item_id_lastnext = target_item_id_lastnext.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_item_id_lastnext = test[['id', 'purch_date']].drop_duplicates().merge(target_item_id_lastnext, how='left', on=['id', 'purch_date'])

In [18]:
target = 'item_id'
months = 6
postprocessing(target_item_id_lastnext, target, months, lastnext='lastnext_')

### Таргет - category_id  по id_check_unic

In [19]:
target_category_id = test[['id', 'id_check_unique', 'purch_date', 'channel', 'category']].merge(rule2_6m[rule2_6m.lift>3][['antecedents', 'consequents', 'lift']], 
                                                                                                how='left', left_on='category', right_on='antecedents')
target_category_id = target_category_id[['id', 'purch_date', 'category', 'consequents', 'lift']]
target_category_id.columns =  ['id', 'purch_date', 'item', 'item_next', 'score']

target_category_id = target_category_id.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_category_id = test[['id', 'purch_date']].drop_duplicates().merge(target_category_id, how='left', on=['id', 'purch_date'])

In [20]:
target = 'category_id'
months = 6
postprocessing(target_category_id, target, months)

### Таргет - category_id по id_check_unic_lastnext

In [21]:
target_category_id_lastnext = test[['id', 'id_check_unique', 'purch_date', 'channel', 'category']].merge(rule4_6m[rule4_6m.lift>3][['antecedents', 'consequents', 'lift']], 
                                                                                                         how='left', left_on='category', right_on='antecedents')
target_category_id_lastnext = target_category_id_lastnext[['id', 'purch_date',  'category', 'consequents', 'lift']]
target_category_id_lastnext.columns = ['id', 'purch_date', 'item',  'item_next', 'score']

target_category_id_lastnext = target_category_id_lastnext.groupby(['id', 'purch_date', 'item_next']).score.max().reset_index()
target_category_id_lastnext = test[['id', 'purch_date']].drop_duplicates().merge(target_category_id_lastnext, how='left', on=['id', 'purch_date'])

In [None]:
target = 'category_id'
months = 6
postprocessing(target_category_id_lastnext, target, months, lastnext='lastnext_')