In [1]:
import pandas as pd, numpy as np
from tqdm.notebook import tqdm
import os, sys, pickle, glob, gc
from collections import Counter
import itertools
pd.set_option('display.max_columns', None)
from otto_utils import *

In [2]:
full_feats = [
    'CA_estimation_frCL_trn', 'CA_estimation_frCL_tst', 'OR_estimation_frCA_trn', 'OR_estimation_frCL_trn', 
    'a2s_actions_num', 'a2s_actions_rel', 'a2s_best_action_type', 'a2s_carts_num', 'a2s_carts_rel', 
    'a2s_clicks_num', 'a2s_clicks_rel', 'a2s_last_action_index', 'a2s_last_cart_index', 'a2s_last_click_index',
    'a2s_orders_rel', 'aid_CA2OR_trn', 'aid_CA_rank_int_tst_vs_trn', 'aid_CA_vs_mean_trn', 'aid_CA_vs_mean_tst',
    'aid_CA_vs_mean_tst_vs_trn', 'aid_CL2CA_trn', 'aid_CL2CA_tst', 'aid_CL2OR_trn', 'aid_CL_rank_int_trn', 
    'aid_CL_rank_int_tst_vs_trn', 'aid_CL_rank_pct_tst_vs_trn', 'aid_CL_vs_mean_trn', 'aid_CL_vs_mean_tst_vs_trn',
    'aid_clicks_favourite_dow_diff_test', 'aid_multi_clicks_percent_full', 'aid_multi_orders_percent_train', 
    'carts_rating_full', 'carts_rating_train', 'clicks_rating_full', 'clicks_rating_train', 'orders_rating_full',
    'session_actions', 'session_avg_real_items_num', 'session_avg_real_length', 'session_carts', 
    'session_carts_avg_hour', 'session_carts_avg_real', 'session_click_diff_mean', 'session_clicks', 
    'session_full_length', 'session_items', 'session_items_carted', 'session_items_clicked', 'ts_diff', 
    'ts_diff_carts', 'ts_diff_carts_rel', 'ts_diff_clicks', 'ts_diff_clicks_rel', 'ts_diff_orders', 
    'ts_diff_orders_rel', 'ts_diff_rel', 'v11m_indmin', 'v11m_num', 'v21k_num', 'v21m_indmin', 'v31m_indmin', 
    'v31m_num', 'v51ha_indmin', 'wgt_rel_v11m_mean', 'wgt_rel_v11m_sum', 'wgt_rel_v21k_mean', 'wgt_rel_v21k_sum', 
    'wgt_rel_v21m_mean', 'wgt_rel_v21m_sum', 'wgt_rel_v31m_mean', 'wgt_rel_v31m_sum', 'wgt_rel_v51ha_mean', 
    'wgt_rel_v51ha_sum', 'wgt_v11m_mean', 'wgt_v11m_sum', 'wgt_v21k_sum', 'wgt_v21m_mean', 'wgt_v21m_sum', 
    'wgt_v31m_mean', 'wgt_v31m_sum', 'wgt_v51ha_mean', 'wgt_v51ha_sum'
]

In [4]:
def m_feats(test_df, m_name):
    m = pd.read_parquet(f"matrices/cm_{m_name}_{MODE}.pqt")
    m_ver = m_name.split("_")[-1]
    m.columns = ['aid','aid_y','wgt','wgt_rel','n']
    
    test_m = test_df[['session','aid']].drop_duplicates().merge(m, on=['aid'], how='inner')
    test_m = test_m.groupby(
        ['session','aid_y']
    ).agg(
        {
            'wgt':['sum','mean'],
            'wgt_rel':['sum','mean'],
            'n':['min','count']
        }
    ).reset_index()
    test_m.columns = [
        'session','aid',
        f'wgt_{m_ver}_sum',f'wgt_{m_ver}_mean',
        f'wgt_rel_{m_ver}_sum',f'wgt_rel_{m_ver}_mean',
        f'{m_ver}_indmin',f'{m_ver}_num'
    ]
    
    del m
    gc.collect()
    return test_m

In [5]:
def add_fact_feats(embs, sfx, df):
    with open(embs, 'rb') as f:
        emb_df = pd.DataFrame(np.load(f)).reset_index()
    N = emb_df.shape[1]-1
    
    emb_df.columns = ['aid'] + [f'emb_{i}' for i in range(N)]
    df = df.merge(emb_df,on=['aid'],how='left')
    
    emb_df.columns = ['aid_last'] + [f'emb_{i}_last' for i in range(N)]
    df = df.merge(emb_df,on=['aid_last'],how='left')
    
    df[f'emb_diff_{sfx}'] = 0
    for i in range(N):
        df[f'emb_diff_{sfx}'] = df[f'emb_diff_{sfx}'] + (df[f'emb_{i}'] - df[f'emb_{i}_last'])**2
    
    df['abs_1'] = 0
    df['abs_2'] = 0
    df['scalar'] = 0
    for i in range(N):
        df['abs_1'] = df['abs_1'] + df[f'emb_{i}']**2
        df['abs_2'] = df['abs_2'] + df[f'emb_{i}_last']**2
        df['scalar'] = df['scalar'] + df[f'emb_{i}']*df[f'emb_{i}_last']
    df['abs_1'] = np.sqrt(df['abs_1'])
    df['abs_2'] = np.sqrt(df['abs_2'])
    df[f'emb_angle_{sfx}'] = df['scalar']/(df['abs_1']*df['abs_2'])
    
    del df['scalar'], df['abs_1'], df['abs_2']
    for i in range(N):
        del df[f'emb_{i}'], df[f'emb_{i}_last']
        
    return df

### Prepare dataframes for carts/orders models training

In [6]:
MODE = 0
REDUCE = True

In [7]:
type_labels = {'clicks':0, 'carts':1, 'orders':2}

if MODE==1:
    dataset = "otto-chunk-data-inparquet-format"
if MODE==0:
    dataset = "otto-validation"

def load_test():    
    dfs = []
    for e, chunk_file in enumerate(glob.glob(f'input/{dataset}/test_parquet/*')):
        chunk = pd.read_parquet(chunk_file)
        chunk.ts = (chunk.ts/1000).astype('int32')
        chunk['type'] = chunk['type'].map(type_labels).astype('int8')
        dfs.append(chunk)
    return pd.concat(dfs).reset_index(drop=True)

test_df = load_test()
N_sessions = len(set(test_df.session))
print('Test data has shape',test_df.shape)
print(f'{N_sessions} unique sessions')

test_df_ = test_df.copy()

Test data has shape (7683577, 4)
1801251 unique sessions


In [8]:
for batch in range(4):
    
    print(f"\n\nbatch = {batch}")
    print('='*40)
    test_df = test_df_[test_df_['session']%4==batch].copy()
    print(f"len(test_df) = {len(test_df)}")
    gc_clear()
    
    if ((MODE==0) and (REDUCE==True)):
        test_labels = pd.read_parquet(
            'input/otto-validation/test_labels.parquet',
            columns = ['session','type']
        )
        sessions_to_train = set(
            test_labels[
                test_labels['type'].isin(['carts','orders'])
            ]['session']
        )
        test_df = test_df[
            test_df['session'].isin(sessions_to_train)
        ].reset_index(drop=True).copy()
        print("After sessions reduction:")
        print(f"len(test_df) = {len(test_df)}")
        
    df_last = test_df[['session','aid']].drop_duplicates(['session'], keep='last')
    df_last.columns = ['session','aid_last']
    
    df = test_df[['session','aid']].drop_duplicates()
    print(f'{len(df)/1e6:.2f}M')
    
    for m_name in [
        "30_30_012_012_0_v11m",
        "30_30_012_012_0_v21k",
        "30_30_012_012_0_v21m",
        "30_30_012_012_3_v31m",
        "30_30_012_12_0_v51ha"  
    ]:
        print(f"add features from matrix {m_name}")
        m_df = m_feats(test_df,m_name)
        df = df.merge(m_df, on=['session','aid'],how='outer')
        print(f'{len(df)/1e6:.2f}M')

    del m_df
    gc_clear()
    
    print("add aids features...")
    feats_df = pd.read_parquet(
        f'feats/FE_aids_{MODE}.pqt',
        columns = [
            'aid',
            'aid_CA2OR_trn',
            'aid_CA_rank_int_tst_vs_trn',
            'aid_CA_vs_mean_trn',
            'aid_CA_vs_mean_tst',
            'aid_CA_vs_mean_tst_vs_trn',
            'aid_CL2CA_trn',
            'aid_CL2CA_tst',
            'aid_CL2OR_trn',
            'aid_CL_rank_int_trn',
            'aid_CL_rank_int_tst_vs_trn',
            'aid_CL_rank_pct_tst_vs_trn',
            'aid_CL_vs_mean_trn',
            'aid_CL_vs_mean_tst_vs_trn',
            'aid_multi_clicks_percent_full',
            'aid_multi_orders_percent_train',
            'aid_clicks_favourite_dow_test',
            'carts_rating_full',
            'carts_rating_train',
            'clicks_rating_full',
            'clicks_rating_train',
            'orders_rating_full'
        ]
    )
    df = df.merge(feats_df,on=['aid'],how='left')
    del feats_df
    gc_clear()
    
    print("add sessions features...")
    feats_df = pd.read_parquet(
        f"feats/FE_sessions_{MODE}.pqt",
        columns = [
            'session',
            'session_actions',
            'session_avg_real_items_num',
            'session_avg_real_length',
            'session_carts',
            'session_carts_avg_hour',
            'session_carts_avg_real',
            'session_click_diff_mean',
            'session_clicks',
            'session_orders',
            'session_full_length',
            'session_items',
            'session_items_carted',
            'session_items_clicked',
            'session_last_ts'
        ]
    )
    df = df.merge(feats_df,on=['session'],how='left')
    del feats_df
    gc_clear()
    
    print("add aids2sessions features...")
    feats_df = pd.read_parquet(
        f"feats/FE_aids2sessions_{MODE}.pqt",
        columns = [
            'session','aid',
            'a2s_actions_num',
            'a2s_best_action_type',
            'a2s_carts_num',
            'a2s_clicks_num',
            'a2s_orders_num',
            'a2s_last_action_index',
            'a2s_last_cart_index',
            'a2s_last_click_index',
            'a2s_last_action_ts',
            'a2s_last_click_ts',
            'a2s_last_cart_ts',
            'a2s_last_order_ts'
        ]
    )
    df = df.merge(feats_df,on=['session','aid'],how='left')
    del feats_df
    gc_clear()
    
    print("add additional features...")
    df['matrices_num'] = 0
    df['matrices_numsum'] = 0
    df['matrices_wgt_rel_mean'] = 0
    for ver in ['v11m','v21k','v21m','v31m','v51ha']:
        df[f'{ver}_num'] = df[f'{ver}_num'].fillna(0).astype('int16')
        df[f'{ver}_indmin'] = df[f'{ver}_indmin'].fillna(-1).astype('int8')
        df[f'wgt_{ver}_sum'] = df[f'wgt_{ver}_sum'].fillna(-1)
        df[f'wgt_{ver}_mean'] = df[f'wgt_{ver}_mean'].fillna(-1)
        df[f'wgt_rel_{ver}_sum'] = df[f'wgt_rel_{ver}_sum'].fillna(-1)
        df[f'wgt_rel_{ver}_mean'] = df[f'wgt_rel_{ver}_mean'].fillna(-1)
        
        df['matrices_num'] += (df[f'{ver}_num']>0).astype('int8')
        df['matrices_numsum'] += df[f'{ver}_num']
        df['matrices_wgt_rel_mean'] += df[f'wgt_{ver}_mean']
    
    for x in ['actions','clicks','carts','orders']:
        df[f'a2s_{x}_num'] = df[f'a2s_{x}_num'].fillna(0).astype('int16')
        df[f'a2s_{x}_rel'] = df[f'a2s_{x}_num'] / df[f'session_{x}']
        
    for col in ['a2s_last_click_ts','a2s_last_cart_ts','a2s_last_order_ts','a2s_last_action_ts']:
        df[col] = df[col].fillna(-1).astype('int')
        
    df['ts_diff'] = df['session_last_ts'] - df['a2s_last_action_ts']
    df['ts_diff_clicks'] = df['session_last_ts'] - df['a2s_last_click_ts']
    df['ts_diff_carts'] = df['session_last_ts'] - df['a2s_last_cart_ts']
    df['ts_diff_orders'] = df['session_last_ts'] - df['a2s_last_order_ts']
    
    df['ts_diff_rel'] = df['ts_diff'] / (df['session_full_length']+1)
    df['ts_diff_clicks_rel'] = df['ts_diff_clicks'] / (df['session_full_length']+1)
    df['ts_diff_carts_rel'] = df['ts_diff_carts'] / (df['session_full_length']+1)
    df['ts_diff_orders_rel'] = df['ts_diff_orders'] / (df['session_full_length']+1)
    
    df['split_dow'] = (df['session_last_ts']%(7*24*60*60))//(24*60*60)
    
    for x in['clicks']:
        for y in ['test']:
            df[f'aid_{x}_favourite_dow_diff_{y}'] = np.abs(df['split_dow'] - df[f'aid_{x}_favourite_dow_{y}'])
            df.loc[
                df[f'aid_{x}_favourite_dow_diff_{y}']>3,
                f'aid_{x}_favourite_dow_diff_{y}'
            ] = 7-df[f'aid_{x}_favourite_dow_diff_{y}']
            df[f'aid_{x}_favourite_dow_diff_{y}'] = df[f'aid_{x}_favourite_dow_diff_{y}'].fillna(14).astype('int8')
            del df[f'aid_{x}_favourite_dow_{y}']

    gc_clear()
    
    df['a2s_last_click_index'] = df['a2s_last_click_index'].fillna(999).astype('int16')
    df['a2s_last_cart_index'] = df['a2s_last_cart_index'].fillna(999).astype('int16')
    df['a2s_last_action_index'] = df['a2s_last_action_index'].fillna(999).astype('int16')
    df['a2s_best_action_type'] = df['a2s_best_action_type'].fillna(-1).astype('int8')
    
    for x in ['trn']:
        df[f'CA_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2CA_{x}']
        df[f'OR_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2OR_{x}']
        df[f'OR_estimation_frCA_{x}'] = df[f'a2s_carts_num'] * df[f'aid_CA2OR_{x}']
    for x in ['tst']:
        df[f'CA_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2CA_{x}']
        
    df['a2s_clicks_rel'].fillna(-1,inplace=True)
    df['a2s_carts_rel'].fillna(-1,inplace=True)
    df['a2s_orders_rel'].fillna(-1,inplace=True)
    
    del df['session_last_ts'], df['a2s_last_click_ts']
    del df['a2s_last_cart_ts'], df['a2s_last_order_ts'],  df['a2s_last_action_ts']
    
    if MODE!=1:
        print("add labels...")
        test_labels = pd.read_parquet(f'input/{dataset}/test_labels.parquet')
        test_labels_explode = test_labels.explode('ground_truth')

        clicks_labels = test_labels_explode[test_labels_explode['type']=='clicks'].copy()
        del clicks_labels['type']
        clicks_labels.columns = ['session','aid']
        clicks_labels['clicks_gt'] = 1

        carts_labels = test_labels_explode[test_labels_explode['type']=='carts'].copy()
        del carts_labels['type']
        carts_labels.columns = ['session','aid']
        carts_labels['carts_gt'] = 1

        orders_labels = test_labels_explode[test_labels_explode['type']=='orders'].copy()
        del orders_labels['type']
        orders_labels.columns = ['session','aid']
        orders_labels['orders_gt'] = 1

        print(len(clicks_labels),len(carts_labels),len(orders_labels))

        df = df.merge(
            clicks_labels, on=['session','aid'], how='left'
        ).merge(
            carts_labels, on=['session','aid'], how='left'
        ).merge(
            orders_labels, on=['session','aid'], how='left'
        )

        df['clicks_gt'] = df['clicks_gt'].fillna(0).astype('int8')
        df['carts_gt'] = df['carts_gt'].fillna(0).astype('int8')
        df['orders_gt'] = df['orders_gt'].fillna(0).astype('int8')
        
        df['clicks_session'] = df.groupby(['session'])['clicks_gt'].transform('max').astype('int8')
        df['carts_session'] = df.groupby(['session'])['carts_gt'].transform('max').astype('int8')
        df['orders_session'] = df.groupby(['session'])['orders_gt'].transform('max').astype('int8')
    
    ###################################################################################
    
    df = df.merge(df_last, on=['session'], how='left')
    
    embs_feats = []
    embs = {
        'matrices/emb_32_1_sh1_pub.npy':'sh1_1_pub',
        'matrices/emb_32_1_sh2_pub.npy':'sh2_1_pub',
        'matrices/w2v.npy':'w2v',
        'matrices/w2v_100.npy':'w2v_100',
    }
    for emb, sfx in embs.items():
        print(f"add embs feats from {emb}")
        df = add_fact_feats(emb, sfx, df)
        embs_feats.extend([f'emb_diff_{sfx}', f'emb_angle_{sfx}'])
        
    del df['aid_last']
    gc_clear()
    
    ###################################################################################
        
    gt_feats = [
        'clicks_gt','carts_gt','orders_gt',
        'clicks_session','carts_session','orders_session'
    ] if MODE==0 else []
    
    final_feats = [
        'session','aid',
        'matrices_num','matrices_numsum','matrices_wgt_rel_mean',
    ] + [
        x for x in full_feats if x in df.columns
    ] + embs_feats + gt_feats 
    
    df[final_feats].to_parquet(f"feats/feats_{MODE}_batch_{batch}_small.pqt",index=False)
    
    
df[final_feats]



batch = 0
len(test_df) = 19129
After sessions reduction:
len(test_df) = 7234
0.00M
add features from matrix 30_30_012_012_0_v11m
0.09M
add features from matrix 30_30_012_012_0_v21k
0.12M
add features from matrix 30_30_012_012_0_v21m
0.13M
add features from matrix 30_30_012_012_3_v31m
0.14M
add features from matrix 30_30_012_12_0_v51ha
0.17M
add aids features...
add sessions features...
add aids2sessions features...
add additional features...
add labels...
1755534 580817 314021
add embs feats from matrices/emb_32_1_sh1_pub.npy
add embs feats from matrices/emb_32_1_sh2_pub.npy
add embs feats from matrices/w2v.npy
add embs feats from matrices/w2v_100.npy


batch = 1
len(test_df) = 18509
After sessions reduction:
len(test_df) = 6357
0.00M
add features from matrix 30_30_012_012_0_v11m
0.09M
add features from matrix 30_30_012_012_0_v21k
0.11M
add features from matrix 30_30_012_012_0_v21m
0.12M
add features from matrix 30_30_012_012_3_v31m
0.13M
add features from matrix 30_30_012_12_0_v51ha

Unnamed: 0,session,aid,matrices_num,matrices_numsum,matrices_wgt_rel_mean,CA_estimation_frCL_trn,CA_estimation_frCL_tst,OR_estimation_frCA_trn,OR_estimation_frCL_trn,a2s_actions_num,a2s_actions_rel,a2s_best_action_type,a2s_carts_num,a2s_carts_rel,a2s_clicks_num,a2s_clicks_rel,a2s_last_action_index,a2s_last_cart_index,a2s_last_click_index,a2s_orders_rel,aid_CA2OR_trn,aid_CA_rank_int_tst_vs_trn,aid_CA_vs_mean_trn,aid_CA_vs_mean_tst,aid_CA_vs_mean_tst_vs_trn,aid_CL2CA_trn,aid_CL2CA_tst,aid_CL2OR_trn,aid_CL_rank_int_trn,aid_CL_rank_int_tst_vs_trn,aid_CL_rank_pct_tst_vs_trn,aid_CL_vs_mean_trn,aid_CL_vs_mean_tst_vs_trn,aid_clicks_favourite_dow_diff_test,aid_multi_clicks_percent_full,aid_multi_orders_percent_train,carts_rating_full,carts_rating_train,clicks_rating_full,clicks_rating_train,orders_rating_full,session_actions,session_avg_real_items_num,session_avg_real_length,session_carts,session_carts_avg_hour,session_carts_avg_real,session_click_diff_mean,session_clicks,session_full_length,session_items,session_items_carted,session_items_clicked,ts_diff,ts_diff_carts,ts_diff_carts_rel,ts_diff_clicks,ts_diff_clicks_rel,ts_diff_orders,ts_diff_orders_rel,ts_diff_rel,v11m_indmin,v11m_num,v21k_num,v21m_indmin,v31m_indmin,v31m_num,v51ha_indmin,wgt_rel_v11m_mean,wgt_rel_v11m_sum,wgt_rel_v21k_mean,wgt_rel_v21k_sum,wgt_rel_v21m_mean,wgt_rel_v21m_sum,wgt_rel_v31m_mean,wgt_rel_v31m_sum,wgt_rel_v51ha_mean,wgt_rel_v51ha_sum,wgt_v11m_mean,wgt_v11m_sum,wgt_v21k_sum,wgt_v21m_mean,wgt_v21m_sum,wgt_v31m_mean,wgt_v31m_sum,wgt_v51ha_mean,wgt_v51ha_sum,emb_diff_sh1_1_pub,emb_angle_sh1_1_pub,emb_diff_sh2_1_pub,emb_angle_sh2_1_pub,emb_diff_w2v,emb_angle_w2v,emb_diff_w2v_100,emb_angle_w2v_100,clicks_gt,carts_gt,orders_gt,clicks_session,carts_session,orders_session
0,12631203,718253,5,21,750.959000,0.229286,0.193548,0.0,0.083377,2,0.2,0,0,-1.0,2,0.2,7,999,7,-1.0,0.363636,1.656316,37.247484,13.782599,0.370028,0.114643,0.096774,0.041688,5320.5,1.251668,2.611778,34.783420,0.429900,0,0.294235,0.062500,4811,4759,4713,4657,5622,10,7.0,2188.0,0,-1.0,0.0,243.111111,10,2188,7,0,7,314,1661652558,7.590921e+05,314,1.434445e-01,1661652558,7.590921e+05,1.434445e-01,7,4,4,3,6,5,2,0.012561,0.050243,0.013678,0.054714,0.016419,0.065678,0.011646,0.058230,0.020306,0.081223,115.0,460.0,610.0,217.500000,870.0,250.209000,1251.044922,15.750000,63.0,3.823660,0.942258,1.055732,0.997084,4.014927,0.893792,2.389182,0.901647,0,0,0,1,1,0
1,12631203,89176,5,23,861.871712,0.183070,0.197531,0.0,0.044047,2,0.2,0,0,-1.0,2,0.2,6,999,6,-1.0,0.240602,2.653804,45.035594,12.251199,0.272034,0.091535,0.098765,0.022023,2771.5,2.900956,6.053244,52.673589,0.247257,0,0.286910,0.078125,3480,3436,2621,2534,7779,10,7.0,2188.0,0,-1.0,0.0,243.111111,10,2188,7,0,7,303,1661652558,7.590921e+05,303,1.384194e-01,1661652558,7.590921e+05,1.384194e-01,4,5,5,4,4,5,6,0.013744,0.068721,0.012575,0.062873,0.011450,0.057250,0.013452,0.067262,0.012661,0.037983,138.4,692.0,843.0,208.200000,1041.0,329.338379,1646.691895,17.333333,52.0,2.318382,0.959185,0.838671,0.992499,2.647871,0.929087,1.604418,0.934928,0,0,0,1,1,0
2,12631203,1294924,5,30,3097.045207,0.101312,0.073922,0.0,0.028227,1,0.1,0,0,-1.0,1,0.1,5,999,5,-1.0,0.278614,1.058252,337.259033,165.391187,0.490398,0.101312,0.073922,0.028227,62.0,0.887097,1.851049,356.389575,0.659146,3,0.343796,0.073874,95,95,39,42,212,10,7.0,2188.0,0,-1.0,0.0,243.111111,10,2188,7,0,7,258,1661652558,7.590921e+05,258,1.178620e-01,1661652558,7.590921e+05,1.178620e-01,0,6,6,0,0,6,1,0.032603,0.195619,0.030048,0.180288,0.029449,0.176692,0.030172,0.181032,0.026286,0.157715,494.0,2964.0,3737.0,813.833333,4883.0,1114.878540,6689.270996,51.500000,309.0,1.317461,0.976910,0.214786,0.994124,1.005159,0.972240,0.604042,0.974436,0,0,0,1,1,0
3,12631203,1144273,5,10,234.882016,0.427723,0.131579,0.0,0.166337,2,0.2,0,0,-1.0,2,0.2,3,999,3,-1.0,0.388889,1.404456,18.285128,7.656999,0.418756,0.213861,0.065789,0.083168,31680.0,0.277352,0.578732,9.153531,1.335000,0,0.251291,0.023810,12646,12752,27296,29245,13987,10,7.0,2188.0,0,-1.0,0.0,243.111111,10,2188,7,0,7,167,1661652558,7.590921e+05,167,7.629054e-02,1661652558,7.590921e+05,7.629054e-02,25,1,2,22,25,2,18,0.006180,0.006180,0.005659,0.011318,0.005588,0.016764,0.006578,0.013156,0.007607,0.015213,43.0,43.0,82.0,60.333333,181.0,83.548683,167.097366,7.000000,14.0,4.579827,0.920444,2.622347,0.982801,4.576094,0.877238,2.098565,0.920268,0,0,0,1,1,0
4,12631203,457356,5,20,1825.157369,0.110229,0.072464,0.0,0.038300,1,0.1,0,0,-1.0,1,0.1,2,999,2,-1.0,0.347458,0.849418,39.956391,22.970998,0.574902,0.110229,0.072464,0.038300,4512.5,0.464709,0.969680,38.807348,0.857655,2,0.292164,0.036585,4376,4489,3759,3948,5444,10,7.0,2188.0,0,-1.0,0.0,243.111111,10,2188,7,0,7,150,1661652558,7.590921e+05,150,6.852444e-02,1661652558,7.590921e+05,6.852444e-02,6,4,4,8,6,5,9,0.013907,0.055628,0.012734,0.050935,0.012952,0.051807,0.012383,0.061916,0.013534,0.040601,304.0,1216.0,1522.0,503.750000,2015.0,595.574036,2977.870117,41.333333,124.0,0.439188,0.994256,0.910414,0.993780,2.540632,0.937298,0.708694,0.971985,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164781,12889203,1206294,1,1,-2.000000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.095238,1.472530,7.110883,3.062800,0.430720,0.087137,0.032258,0.008299,33432.0,0.341709,0.713022,8.736638,1.141048,2,0.251838,0.250000,43969,44094,32828,34803,118697,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,1661719635,1661719635,1.661720e+09,1661719635,1.661720e+09,1661719635,1.661720e+09,1.661720e+09,-1,0,0,-1,-1,0,27,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,0.007663,0.007663,-1.0,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.000000,2.000000,2.0,3.201070,0.949791,2.299770,0.990085,3.636458,0.916236,4.173400,0.875910,0,0,0,1,1,0
164782,12889203,1348825,1,1,-2.000000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.434211,1.208116,12.867313,6.125600,0.476059,0.052162,0.045977,0.022649,7946.5,0.921538,1.922915,26.409298,0.529686,1,0.242876,0.030303,21046,21098,7793,7867,18562,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,1661719635,1661719635,1.661720e+09,1661719635,1.661720e+09,1661719635,1.661720e+09,1.661720e+09,-1,0,0,-1,-1,0,26,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,0.007663,0.007663,-1.0,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.000000,2.000000,2.0,1.120570,0.983306,7.362111,0.977567,5.580354,0.872763,4.574525,0.868176,0,0,0,1,1,0
164783,12889203,1681537,1,1,-2.000000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.350318,5.150766,159.486952,32.159398,0.201643,0.141145,0.099526,0.049446,666.0,3.058559,6.382103,120.971622,0.280450,1,0.405955,0.090909,373,349,424,396,538,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,1661719635,1661719635,1.661720e+09,1661719635,1.661720e+09,1661719635,1.661720e+09,1.661720e+09,-1,0,0,-1,-1,0,29,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,0.007663,0.007663,-1.0,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.000000,2.000000,2.0,18.633125,0.704841,7.998113,0.860093,15.121169,0.629439,12.705106,0.599713,0,0,0,1,1,0
164784,12894803,151698,1,1,10.000000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.299145,0.754809,39.617778,24.502398,0.618470,0.144266,0.108108,0.043157,6819.0,0.499780,1.042860,29.400055,0.809413,0,0.358757,0.157143,4042,4101,4522,4698,5397,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,1661721433,1661721433,1.661721e+09,1661721433,1.661721e+09,1661721433,1.661721e+09,1.661721e+09,-1,0,0,-1,-1,0,21,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,0.005740,0.005740,-1.0,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.000000,14.000000,14.0,0.506654,0.997621,0.427626,0.995039,4.692613,0.888214,2.743048,0.908874,0,0,0,1,1,0


### Prepare dataframes for clicks models training

In [9]:
MODE = 0
REDUCE = False

In [10]:
type_labels = {'clicks':0, 'carts':1, 'orders':2}

if MODE==1:
    dataset = "otto-chunk-data-inparquet-format"
if MODE==0:
    dataset = "otto-validation"

def load_test():    
    dfs = []
    for e, chunk_file in enumerate(glob.glob(f'input/{dataset}/test_parquet/*')):
        chunk = pd.read_parquet(chunk_file)
        chunk.ts = (chunk.ts/1000).astype('int32')
        chunk['type'] = chunk['type'].map(type_labels).astype('int8')
        dfs.append(chunk)
    return pd.concat(dfs).reset_index(drop=True)

test_df = load_test()
N_sessions = len(set(test_df.session))
print('Test data has shape',test_df.shape)
print(f'{N_sessions} unique sessions')

test_df_ = test_df.copy()

Test data has shape (7683577, 4)
1801251 unique sessions


In [11]:
for batch in range(4):
    
    print(f"\n\nbatch = {batch}")
    print('='*40)
    test_df = test_df_[test_df_['session']%4==batch].copy()
    print(f"len(test_df) = {len(test_df)}")
    gc_clear()
    
    if ((MODE==0) and (REDUCE==True)):
        test_labels = pd.read_parquet(
            'input/otto-validation/test_labels.parquet',
            columns = ['session','type']
        )
        sessions_to_train = set(
            test_labels[
                test_labels['type'].isin(['carts','orders'])
            ]['session']
        )
        test_df = test_df[
            test_df['session'].isin(sessions_to_train)
        ].reset_index(drop=True).copy()
        print("After sessions reduction:")
        print(f"len(test_df) = {len(test_df)}")
        
    df_last = test_df[['session','aid']].drop_duplicates(['session'], keep='last')
    df_last.columns = ['session','aid_last']
    
    df = test_df[['session','aid']].drop_duplicates()
    print(f'{len(df)/1e6:.2f}M')
    
    for m_name in [
        "30_30_012_012_0_v11m",
        "30_30_012_012_0_v21k",
        "30_30_012_012_0_v21m",
        "30_30_012_012_3_v31m",
        "30_30_012_12_0_v51ha"  
    ]:
        print(f"add features from matrix {m_name}")
        m_df = m_feats(test_df,m_name)
        df = df.merge(m_df, on=['session','aid'],how='outer')
        print(f'{len(df)/1e6:.2f}M')

    del m_df
    gc_clear()
    
    print("add aids features...")
    feats_df = pd.read_parquet(
        f'feats/FE_aids_{MODE}.pqt',
        columns = [
            'aid',
            'aid_CA2OR_trn',
            'aid_CA_rank_int_tst_vs_trn',
            'aid_CA_vs_mean_trn',
            'aid_CA_vs_mean_tst',
            'aid_CA_vs_mean_tst_vs_trn',
            'aid_CL2CA_trn',
            'aid_CL2CA_tst',
            'aid_CL2OR_trn',
            'aid_CL_rank_int_trn',
            'aid_CL_rank_int_tst_vs_trn',
            'aid_CL_rank_pct_tst_vs_trn',
            'aid_CL_vs_mean_trn',
            'aid_CL_vs_mean_tst_vs_trn',
            'aid_multi_clicks_percent_full',
            'aid_multi_orders_percent_train',
            'aid_clicks_favourite_dow_test',
            'carts_rating_full',
            'carts_rating_train',
            'clicks_rating_full',
            'clicks_rating_train',
            'orders_rating_full'
        ]
    )
    df = df.merge(feats_df,on=['aid'],how='left')
    del feats_df
    gc_clear()
    
    print("add sessions features...")
    feats_df = pd.read_parquet(
        f"feats/FE_sessions_{MODE}.pqt",
        columns = [
            'session',
            'session_actions',
            'session_avg_real_items_num',
            'session_avg_real_length',
            'session_carts',
            'session_carts_avg_hour',
            'session_carts_avg_real',
            'session_click_diff_mean',
            'session_clicks',
            'session_orders',
            'session_full_length',
            'session_items',
            'session_items_carted',
            'session_items_clicked',
            'session_last_ts'
        ]
    )
    df = df.merge(feats_df,on=['session'],how='left')
    del feats_df
    gc_clear()
    
    print("add aids2sessions features...")
    feats_df = pd.read_parquet(
        f"feats/FE_aids2sessions_{MODE}.pqt",
        columns = [
            'session','aid',
            'a2s_actions_num',
            'a2s_best_action_type',
            'a2s_carts_num',
            'a2s_clicks_num',
            'a2s_orders_num',
            'a2s_last_action_index',
            'a2s_last_cart_index',
            'a2s_last_click_index',
            'a2s_last_action_ts',
            'a2s_last_click_ts',
            'a2s_last_cart_ts',
            'a2s_last_order_ts'
        ]
    )
    df = df.merge(feats_df,on=['session','aid'],how='left')
    del feats_df
    gc_clear()
    
    print("add additional features...")
    df['matrices_num'] = 0
    df['matrices_numsum'] = 0
    df['matrices_wgt_rel_mean'] = 0
    for ver in ['v11m','v21k','v21m','v31m','v51ha']:
        df[f'{ver}_num'] = df[f'{ver}_num'].fillna(0).astype('int16')
        df[f'{ver}_indmin'] = df[f'{ver}_indmin'].fillna(-1).astype('int8')
        df[f'wgt_{ver}_sum'] = df[f'wgt_{ver}_sum'].fillna(-1)
        df[f'wgt_{ver}_mean'] = df[f'wgt_{ver}_mean'].fillna(-1)
        df[f'wgt_rel_{ver}_sum'] = df[f'wgt_rel_{ver}_sum'].fillna(-1)
        df[f'wgt_rel_{ver}_mean'] = df[f'wgt_rel_{ver}_mean'].fillna(-1)
        
        df['matrices_num'] += (df[f'{ver}_num']>0).astype('int8')
        df['matrices_numsum'] += df[f'{ver}_num']
        df['matrices_wgt_rel_mean'] += df[f'wgt_{ver}_mean']
    
    for x in ['actions','clicks','carts','orders']:
        df[f'a2s_{x}_num'] = df[f'a2s_{x}_num'].fillna(0).astype('int16')
        df[f'a2s_{x}_rel'] = df[f'a2s_{x}_num'] / df[f'session_{x}']
        
    for col in ['a2s_last_click_ts','a2s_last_cart_ts','a2s_last_order_ts','a2s_last_action_ts']:
        df[col] = df[col].fillna(-1).astype('int')
        
    df['ts_diff'] = df['session_last_ts'] - df['a2s_last_action_ts']
    df['ts_diff_clicks'] = df['session_last_ts'] - df['a2s_last_click_ts']
    df['ts_diff_carts'] = df['session_last_ts'] - df['a2s_last_cart_ts']
    df['ts_diff_orders'] = df['session_last_ts'] - df['a2s_last_order_ts']
    
    df['ts_diff_rel'] = df['ts_diff'] / (df['session_full_length']+1)
    df['ts_diff_clicks_rel'] = df['ts_diff_clicks'] / (df['session_full_length']+1)
    df['ts_diff_carts_rel'] = df['ts_diff_carts'] / (df['session_full_length']+1)
    df['ts_diff_orders_rel'] = df['ts_diff_orders'] / (df['session_full_length']+1)
    
    df['split_dow'] = (df['session_last_ts']%(7*24*60*60))//(24*60*60)
    
    for x in['clicks']:
        for y in ['test']:
            df[f'aid_{x}_favourite_dow_diff_{y}'] = np.abs(df['split_dow'] - df[f'aid_{x}_favourite_dow_{y}'])
            df.loc[
                df[f'aid_{x}_favourite_dow_diff_{y}']>3,
                f'aid_{x}_favourite_dow_diff_{y}'
            ] = 7-df[f'aid_{x}_favourite_dow_diff_{y}']
            df[f'aid_{x}_favourite_dow_diff_{y}'] = df[f'aid_{x}_favourite_dow_diff_{y}'].fillna(14).astype('int8')
            del df[f'aid_{x}_favourite_dow_{y}']

    gc_clear()
    
    df['a2s_last_click_index'] = df['a2s_last_click_index'].fillna(999).astype('int16')
    df['a2s_last_cart_index'] = df['a2s_last_cart_index'].fillna(999).astype('int16')
    df['a2s_last_action_index'] = df['a2s_last_action_index'].fillna(999).astype('int16')
    df['a2s_best_action_type'] = df['a2s_best_action_type'].fillna(-1).astype('int8')
    
    for x in ['trn']:
        df[f'CA_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2CA_{x}']
        df[f'OR_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2OR_{x}']
        df[f'OR_estimation_frCA_{x}'] = df[f'a2s_carts_num'] * df[f'aid_CA2OR_{x}']
    for x in ['tst']:
        df[f'CA_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2CA_{x}']
        
    df['a2s_clicks_rel'].fillna(-1,inplace=True)
    df['a2s_carts_rel'].fillna(-1,inplace=True)
    df['a2s_orders_rel'].fillna(-1,inplace=True)
    
    del df['session_last_ts'], df['a2s_last_click_ts']
    del df['a2s_last_cart_ts'], df['a2s_last_order_ts'],  df['a2s_last_action_ts']
    
    if MODE!=1:
        print("add labels...")
        test_labels = pd.read_parquet(f'input/{dataset}/test_labels.parquet')
        test_labels_explode = test_labels.explode('ground_truth')

        clicks_labels = test_labels_explode[test_labels_explode['type']=='clicks'].copy()
        del clicks_labels['type']
        clicks_labels.columns = ['session','aid']
        clicks_labels['clicks_gt'] = 1

        carts_labels = test_labels_explode[test_labels_explode['type']=='carts'].copy()
        del carts_labels['type']
        carts_labels.columns = ['session','aid']
        carts_labels['carts_gt'] = 1

        orders_labels = test_labels_explode[test_labels_explode['type']=='orders'].copy()
        del orders_labels['type']
        orders_labels.columns = ['session','aid']
        orders_labels['orders_gt'] = 1

        print(len(clicks_labels),len(carts_labels),len(orders_labels))

        df = df.merge(
            clicks_labels, on=['session','aid'], how='left'
        ).merge(
            carts_labels, on=['session','aid'], how='left'
        ).merge(
            orders_labels, on=['session','aid'], how='left'
        )

        df['clicks_gt'] = df['clicks_gt'].fillna(0).astype('int8')
        df['carts_gt'] = df['carts_gt'].fillna(0).astype('int8')
        df['orders_gt'] = df['orders_gt'].fillna(0).astype('int8')
        
        df['clicks_session'] = df.groupby(['session'])['clicks_gt'].transform('max').astype('int8')
        df['carts_session'] = df.groupby(['session'])['carts_gt'].transform('max').astype('int8')
        df['orders_session'] = df.groupby(['session'])['orders_gt'].transform('max').astype('int8')
    
    ###################################################################################
    
    df = df.merge(df_last, on=['session'], how='left')
    
    embs_feats = []
    embs = {
        'matrices/emb_32_1_sh1_pub.npy':'sh1_1_pub',
        'matrices/emb_32_1_sh2_pub.npy':'sh2_1_pub',
        'matrices/w2v.npy':'w2v',
        'matrices/w2v_100.npy':'w2v_100',
    }
    for emb, sfx in embs.items():
        print(f"add embs feats from {emb}")
        df = add_fact_feats(emb, sfx, df)
        embs_feats.extend([f'emb_diff_{sfx}', f'emb_angle_{sfx}'])
        
    del df['aid_last']
    gc_clear()
    
    ###################################################################################
        
    gt_feats = [
        'clicks_gt','carts_gt','orders_gt',
        'clicks_session','carts_session','orders_session'
    ] if MODE==0 else []
    
    final_feats = [
        'session','aid',
        'matrices_num','matrices_numsum','matrices_wgt_rel_mean',
    ] + [
        x for x in full_feats if x in df.columns
    ] + embs_feats + gt_feats 
    
    df[final_feats].to_parquet(f"feats/feats_{MODE}_batch_{batch}.pqt",index=False)
    
    
df[final_feats]



batch = 0
len(test_df) = 19129
0.01M
add features from matrix 30_30_012_012_0_v11m
0.30M
add features from matrix 30_30_012_012_0_v21k
0.37M
add features from matrix 30_30_012_012_0_v21m
0.42M
add features from matrix 30_30_012_012_3_v31m
0.45M
add features from matrix 30_30_012_12_0_v51ha
0.55M
add aids features...
add sessions features...
add aids2sessions features...
add additional features...
add labels...
1755534 580817 314021
add embs feats from matrices/emb_32_1_sh1_pub.npy
add embs feats from matrices/emb_32_1_sh2_pub.npy
add embs feats from matrices/w2v.npy
add embs feats from matrices/w2v_100.npy


batch = 1
len(test_df) = 18509
0.01M
add features from matrix 30_30_012_012_0_v11m
0.30M
add features from matrix 30_30_012_012_0_v21k
0.37M
add features from matrix 30_30_012_012_0_v21m
0.42M
add features from matrix 30_30_012_012_3_v31m
0.45M
add features from matrix 30_30_012_12_0_v51ha
0.54M
add aids features...
add sessions features...
add aids2sessions features...
add addit

Unnamed: 0,session,aid,matrices_num,matrices_numsum,matrices_wgt_rel_mean,CA_estimation_frCL_trn,CA_estimation_frCL_tst,OR_estimation_frCA_trn,OR_estimation_frCL_trn,a2s_actions_num,a2s_actions_rel,a2s_best_action_type,a2s_carts_num,a2s_carts_rel,a2s_clicks_num,a2s_clicks_rel,a2s_last_action_index,a2s_last_cart_index,a2s_last_click_index,a2s_orders_rel,aid_CA2OR_trn,aid_CA_rank_int_tst_vs_trn,aid_CA_vs_mean_trn,aid_CA_vs_mean_tst,aid_CA_vs_mean_tst_vs_trn,aid_CL2CA_trn,aid_CL2CA_tst,aid_CL2OR_trn,aid_CL_rank_int_trn,aid_CL_rank_int_tst_vs_trn,aid_CL_rank_pct_tst_vs_trn,aid_CL_vs_mean_trn,aid_CL_vs_mean_tst_vs_trn,aid_clicks_favourite_dow_diff_test,aid_multi_clicks_percent_full,aid_multi_orders_percent_train,carts_rating_full,carts_rating_train,clicks_rating_full,clicks_rating_train,orders_rating_full,session_actions,session_avg_real_items_num,session_avg_real_length,session_carts,session_carts_avg_hour,session_carts_avg_real,session_click_diff_mean,session_clicks,session_full_length,session_items,session_items_carted,session_items_clicked,ts_diff,ts_diff_carts,ts_diff_carts_rel,ts_diff_clicks,ts_diff_clicks_rel,ts_diff_orders,ts_diff_orders_rel,ts_diff_rel,v11m_indmin,v11m_num,v21k_num,v21m_indmin,v31m_indmin,v31m_num,v51ha_indmin,wgt_rel_v11m_mean,wgt_rel_v11m_sum,wgt_rel_v21k_mean,wgt_rel_v21k_sum,wgt_rel_v21m_mean,wgt_rel_v21m_sum,wgt_rel_v31m_mean,wgt_rel_v31m_sum,wgt_rel_v51ha_mean,wgt_rel_v51ha_sum,wgt_v11m_mean,wgt_v11m_sum,wgt_v21k_sum,wgt_v21m_mean,wgt_v21m_sum,wgt_v31m_mean,wgt_v31m_sum,wgt_v51ha_mean,wgt_v51ha_sum,emb_diff_sh1_1_pub,emb_angle_sh1_1_pub,emb_diff_sh2_1_pub,emb_angle_sh2_1_pub,emb_diff_w2v,emb_angle_w2v,emb_diff_w2v_100,emb_angle_w2v_100,clicks_gt,carts_gt,orders_gt,clicks_session,carts_session,orders_session
0,12629603,722601,0,0,-5.000,0.108000,0.075000,0.0,0.062000,1,1.0,0,0,-1.0,1,1.0,0,999,0,-1.0,0.574074,1.157043e+00,9.142564,4.594200,0.502507,0.108000,0.075000,0.062000,32032.0,6.207699e-01,1.295322,9.062902,0.709658,3,0.388889,0.032258,21875,21568,21733,22083,20504,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,0,1661646288,1.661646e+09,0,0.000000e+00,1661646288,1.661646e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,-1.00,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,1,0,0,1,0,0
1,12630003,1319902,0,0,-5.000,0.058366,0.043011,0.0,0.019455,1,1.0,0,0,-1.0,1,1.0,0,999,0,-1.0,0.333333,2.071845e-01,2.539601,6.125600,2.412032,0.058366,0.043011,0.019455,66635.5,9.993922e-02,0.208537,4.658332,3.210029,3,0.097143,0.000000,113650,133694,62203,77424,113358,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,0,1661647124,1.661647e+09,0,0.000000e+00,1661647124,1.661647e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,-1.00,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0,0,0,1,0,0
2,12630403,105547,0,0,-5.000,-1.000000,0.000000,-0.0,-1.000000,1,1.0,0,0,-1.0,1,1.0,0,999,0,-1.0,-1.000000,2.000000e+06,-1.000000,0.000000,-1.000000,-1.000000,0.000000,-1.000000,2000000.0,2.000000e+06,2.000000,-1.000000,-1.000000,1,0.000000,-1.000000,2000000,2000000,1808206,2000000,2000000,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,0,1661648073,1.661648e+09,0,0.000000e+00,1661648073,1.661648e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,-1.00,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0,0,0,0,0,0
3,12630803,481124,0,0,-5.000,0.101118,0.225000,0.0,0.033085,1,1.0,0,0,-1.0,1,1.0,0,999,0,-1.0,0.327189,1.617590e+00,36.739563,13.782599,0.375143,0.101118,0.225000,0.033085,4484.5,4.434051e+00,9.252256,38.897977,0.165344,3,0.193962,0.140845,4569,4471,5581,5363,5550,1,1.0,0.0,0,-1.0,0.0,-1.000000,1,0,1,0,1,0,1661649139,1.661649e+09,0,0.000000e+00,1661649139,1.661649e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,-1.00,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0,0,0,0,0,0
4,12631203,718253,5,21,750.959,0.229286,0.193548,0.0,0.083377,2,0.2,0,0,-1.0,2,0.2,7,999,7,-1.0,0.363636,1.656316e+00,37.247484,13.782599,0.370028,0.114643,0.096774,0.041688,5320.5,1.251668e+00,2.611778,34.783420,0.429900,0,0.294235,0.062500,4811,4759,4713,4657,5622,10,7.0,2188.0,0,-1.0,0.0,243.111111,10,2188,7,0,7,314,1661652558,7.590921e+05,314,1.434445e-01,1661652558,7.590921e+05,1.434445e-01,7,4,4,3,6,5,2,0.012561,0.050243,0.013678,0.054714,0.016419,0.065678,0.011646,0.05823,0.020306,0.081223,115.0,460.0,610.0,217.5,870.0,250.209,1251.044922,15.75,63.0,3.823660,0.942258,1.055732,0.997084,4.014927,0.893792,2.389182,0.901647,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
530551,12899603,1032095,1,1,0.000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.176471,3.548544e+00,120.885015,29.096598,0.240696,0.172422,0.213483,0.030427,1514.0,4.680647e+00,9.766814,75.058958,0.190653,1,0.364303,0.023810,760,731,1140,1096,3106,2,2.0,104.0,0,-1.0,0.0,104.000000,2,104,2,0,2,1661723905,1661723905,1.582594e+07,1661723905,1.582594e+07,1661723905,1.582594e+07,1.582594e+07,-1,0,0,-1,-1,0,29,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,0.003738,0.003738,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,4.00,4.0,15.861943,0.768682,6.270036,0.985916,15.612196,0.675050,11.098768,0.653294,0,0,0,1,0,0
530552,12899603,1436525,1,1,3.000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.219512,6.185343e-01,34.707882,24.502398,0.705961,0.119255,0.123077,0.026178,6213.0,6.669886e-01,1.391763,31.158259,0.670852,3,0.274743,0.000000,4843,4948,5226,5334,13394,2,2.0,104.0,0,-1.0,0.0,104.000000,2,104,2,0,2,1661723905,1661723905,1.582594e+07,1661723905,1.582594e+07,1661723905,1.582594e+07,1.582594e+07,-1,0,0,-1,-1,0,15,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,0.006542,0.006542,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,7.00,7.0,4.519882,0.942144,3.272946,0.983600,12.151273,0.753278,7.154053,0.779200,0,0,0,1,0,0
530553,12899603,1436959,1,1,-2.000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.225352,4.079860e+00,24.041558,4.594200,0.191094,0.274131,0.250000,0.061776,30750.5,2.464952e+00,5.143461,9.389167,0.205499,2,0.303774,0.062500,9713,9393,24885,24146,18046,2,2.0,104.0,0,-1.0,0.0,104.000000,2,104,2,0,2,1661723905,1661723905,1.582594e+07,1661723905,1.582594e+07,1661723905,1.582594e+07,1.582594e+07,-1,0,0,-1,-1,0,22,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,0.007634,0.007634,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,2.00,2.0,16.872768,0.800757,2.249157,0.947789,24.427756,0.528239,17.908743,0.511952,0,0,0,1,0,0
530554,12899603,1499836,1,1,1.000,0.000000,0.000000,0.0,0.000000,0,0.0,-1,0,-1.0,0,0.0,999,999,999,-1.0,0.060000,4.411188e+00,8.465337,1.531400,0.180902,0.033156,0.021739,0.001989,7581.0,2.214813e+00,4.621513,27.333714,0.270592,1,0.197555,0.000000,33526,32471,8388,8158,215678,2,2.0,104.0,0,-1.0,0.0,104.000000,2,104,2,0,2,1661723905,1661723905,1.582594e+07,1661723905,1.582594e+07,1661723905,1.582594e+07,1.582594e+07,-1,0,0,-1,-1,0,23,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.00000,0.004673,0.004673,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000,-1.000000,5.00,5.0,1.737640,0.987138,0.468589,0.989779,5.953720,0.878803,4.244609,0.871851,0,0,0,1,0,0


### Prepare dataframes for prediction

In [12]:
MODE = 1
REDUCE = False

In [13]:
type_labels = {'clicks':0, 'carts':1, 'orders':2}

if MODE==1:
    dataset = "otto-chunk-data-inparquet-format"
if MODE==0:
    dataset = "otto-validation"

def load_test():    
    dfs = []
    for e, chunk_file in enumerate(glob.glob(f'input/{dataset}/test_parquet/*')):
        chunk = pd.read_parquet(chunk_file)
        chunk.ts = (chunk.ts/1000).astype('int32')
        chunk['type'] = chunk['type'].map(type_labels).astype('int8')
        dfs.append(chunk)
    return pd.concat(dfs).reset_index(drop=True)

test_df = load_test()
N_sessions = len(set(test_df.session))
print('Test data has shape',test_df.shape)
print(f'{N_sessions} unique sessions')

test_df_ = test_df.copy()

Test data has shape (6928123, 4)
1671803 unique sessions


In [14]:
for batch in range(4):
    
    print(f"\n\nbatch = {batch}")
    print('='*40)
    test_df = test_df_[test_df_['session']%4==batch].copy()
    print(f"len(test_df) = {len(test_df)}")
    gc_clear()
    
    if ((MODE==0) and (REDUCE==True)):
        test_labels = pd.read_parquet(
            'input/otto-validation/test_labels.parquet',
            columns = ['session','type']
        )
        sessions_to_train = set(
            test_labels[
                test_labels['type'].isin(['carts','orders'])
            ]['session']
        )
        test_df = test_df[
            test_df['session'].isin(sessions_to_train)
        ].reset_index(drop=True).copy()
        print("After sessions reduction:")
        print(f"len(test_df) = {len(test_df)}")
        
    df_last = test_df[['session','aid']].drop_duplicates(['session'], keep='last')
    df_last.columns = ['session','aid_last']
    
    df = test_df[['session','aid']].drop_duplicates()
    print(f'{len(df)/1e6:.2f}M')
    
    for m_name in [
        "30_30_012_012_0_v11m",
        "30_30_012_012_0_v21k",
        "30_30_012_012_0_v21m",
        "30_30_012_012_3_v31m",
        "30_30_012_12_0_v51ha"  
    ]:
        print(f"add features from matrix {m_name}")
        m_df = m_feats(test_df,m_name)
        df = df.merge(m_df, on=['session','aid'],how='outer')
        print(f'{len(df)/1e6:.2f}M')

    del m_df
    gc_clear()
    
    print("add aids features...")
    feats_df = pd.read_parquet(
        f'feats/FE_aids_{MODE}.pqt',
        columns = [
            'aid',
            'aid_CA2OR_trn',
            'aid_CA_rank_int_tst_vs_trn',
            'aid_CA_vs_mean_trn',
            'aid_CA_vs_mean_tst',
            'aid_CA_vs_mean_tst_vs_trn',
            'aid_CL2CA_trn',
            'aid_CL2CA_tst',
            'aid_CL2OR_trn',
            'aid_CL_rank_int_trn',
            'aid_CL_rank_int_tst_vs_trn',
            'aid_CL_rank_pct_tst_vs_trn',
            'aid_CL_vs_mean_trn',
            'aid_CL_vs_mean_tst_vs_trn',
            'aid_multi_clicks_percent_full',
            'aid_multi_orders_percent_train',
            'aid_clicks_favourite_dow_test',
            'carts_rating_full',
            'carts_rating_train',
            'clicks_rating_full',
            'clicks_rating_train',
            'orders_rating_full'
        ]
    )
    df = df.merge(feats_df,on=['aid'],how='left')
    del feats_df
    gc_clear()
    
    print("add sessions features...")
    feats_df = pd.read_parquet(
        f"feats/FE_sessions_{MODE}.pqt",
        columns = [
            'session',
            'session_actions',
            'session_avg_real_items_num',
            'session_avg_real_length',
            'session_carts',
            'session_carts_avg_hour',
            'session_carts_avg_real',
            'session_click_diff_mean',
            'session_clicks',
            'session_orders',
            'session_full_length',
            'session_items',
            'session_items_carted',
            'session_items_clicked',
            'session_last_ts'
        ]
    )
    df = df.merge(feats_df,on=['session'],how='left')
    del feats_df
    gc_clear()
    
    print("add aids2sessions features...")
    feats_df = pd.read_parquet(
        f"feats/FE_aids2sessions_{MODE}.pqt",
        columns = [
            'session','aid',
            'a2s_actions_num',
            'a2s_best_action_type',
            'a2s_carts_num',
            'a2s_clicks_num',
            'a2s_orders_num',
            'a2s_last_action_index',
            'a2s_last_cart_index',
            'a2s_last_click_index',
            'a2s_last_action_ts',
            'a2s_last_click_ts',
            'a2s_last_cart_ts',
            'a2s_last_order_ts'
        ]
    )
    df = df.merge(feats_df,on=['session','aid'],how='left')
    del feats_df
    gc_clear()
    
    print("add additional features...")
    df['matrices_num'] = 0
    df['matrices_numsum'] = 0
    df['matrices_wgt_rel_mean'] = 0
    for ver in ['v11m','v21k','v21m','v31m','v51ha']:
        df[f'{ver}_num'] = df[f'{ver}_num'].fillna(0).astype('int16')
        df[f'{ver}_indmin'] = df[f'{ver}_indmin'].fillna(-1).astype('int8')
        df[f'wgt_{ver}_sum'] = df[f'wgt_{ver}_sum'].fillna(-1)
        df[f'wgt_{ver}_mean'] = df[f'wgt_{ver}_mean'].fillna(-1)
        df[f'wgt_rel_{ver}_sum'] = df[f'wgt_rel_{ver}_sum'].fillna(-1)
        df[f'wgt_rel_{ver}_mean'] = df[f'wgt_rel_{ver}_mean'].fillna(-1)
        
        df['matrices_num'] += (df[f'{ver}_num']>0).astype('int8')
        df['matrices_numsum'] += df[f'{ver}_num']
        df['matrices_wgt_rel_mean'] += df[f'wgt_{ver}_mean']
    
    for x in ['actions','clicks','carts','orders']:
        df[f'a2s_{x}_num'] = df[f'a2s_{x}_num'].fillna(0).astype('int16')
        df[f'a2s_{x}_rel'] = df[f'a2s_{x}_num'] / df[f'session_{x}']
        
    for col in ['a2s_last_click_ts','a2s_last_cart_ts','a2s_last_order_ts','a2s_last_action_ts']:
        df[col] = df[col].fillna(-1).astype('int')
        
    df['ts_diff'] = df['session_last_ts'] - df['a2s_last_action_ts']
    df['ts_diff_clicks'] = df['session_last_ts'] - df['a2s_last_click_ts']
    df['ts_diff_carts'] = df['session_last_ts'] - df['a2s_last_cart_ts']
    df['ts_diff_orders'] = df['session_last_ts'] - df['a2s_last_order_ts']
    
    df['ts_diff_rel'] = df['ts_diff'] / (df['session_full_length']+1)
    df['ts_diff_clicks_rel'] = df['ts_diff_clicks'] / (df['session_full_length']+1)
    df['ts_diff_carts_rel'] = df['ts_diff_carts'] / (df['session_full_length']+1)
    df['ts_diff_orders_rel'] = df['ts_diff_orders'] / (df['session_full_length']+1)
    
    df['split_dow'] = (df['session_last_ts']%(7*24*60*60))//(24*60*60)
    
    for x in['clicks']:
        for y in ['test']:
            df[f'aid_{x}_favourite_dow_diff_{y}'] = np.abs(df['split_dow'] - df[f'aid_{x}_favourite_dow_{y}'])
            df.loc[
                df[f'aid_{x}_favourite_dow_diff_{y}']>3,
                f'aid_{x}_favourite_dow_diff_{y}'
            ] = 7-df[f'aid_{x}_favourite_dow_diff_{y}']
            df[f'aid_{x}_favourite_dow_diff_{y}'] = df[f'aid_{x}_favourite_dow_diff_{y}'].fillna(14).astype('int8')
            del df[f'aid_{x}_favourite_dow_{y}']

    gc_clear()
    
    df['a2s_last_click_index'] = df['a2s_last_click_index'].fillna(999).astype('int16')
    df['a2s_last_cart_index'] = df['a2s_last_cart_index'].fillna(999).astype('int16')
    df['a2s_last_action_index'] = df['a2s_last_action_index'].fillna(999).astype('int16')
    df['a2s_best_action_type'] = df['a2s_best_action_type'].fillna(-1).astype('int8')
    
    for x in ['trn']:
        df[f'CA_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2CA_{x}']
        df[f'OR_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2OR_{x}']
        df[f'OR_estimation_frCA_{x}'] = df[f'a2s_carts_num'] * df[f'aid_CA2OR_{x}']
    for x in ['tst']:
        df[f'CA_estimation_frCL_{x}'] = df[f'a2s_clicks_num'] * df[f'aid_CL2CA_{x}']
        
    df['a2s_clicks_rel'].fillna(-1,inplace=True)
    df['a2s_carts_rel'].fillna(-1,inplace=True)
    df['a2s_orders_rel'].fillna(-1,inplace=True)
    
    del df['session_last_ts'], df['a2s_last_click_ts']
    del df['a2s_last_cart_ts'], df['a2s_last_order_ts'],  df['a2s_last_action_ts']
    
    if MODE!=1:
        print("add labels...")
        test_labels = pd.read_parquet(f'input/{dataset}/test_labels.parquet')
        test_labels_explode = test_labels.explode('ground_truth')

        clicks_labels = test_labels_explode[test_labels_explode['type']=='clicks'].copy()
        del clicks_labels['type']
        clicks_labels.columns = ['session','aid']
        clicks_labels['clicks_gt'] = 1

        carts_labels = test_labels_explode[test_labels_explode['type']=='carts'].copy()
        del carts_labels['type']
        carts_labels.columns = ['session','aid']
        carts_labels['carts_gt'] = 1

        orders_labels = test_labels_explode[test_labels_explode['type']=='orders'].copy()
        del orders_labels['type']
        orders_labels.columns = ['session','aid']
        orders_labels['orders_gt'] = 1

        print(len(clicks_labels),len(carts_labels),len(orders_labels))

        df = df.merge(
            clicks_labels, on=['session','aid'], how='left'
        ).merge(
            carts_labels, on=['session','aid'], how='left'
        ).merge(
            orders_labels, on=['session','aid'], how='left'
        )

        df['clicks_gt'] = df['clicks_gt'].fillna(0).astype('int8')
        df['carts_gt'] = df['carts_gt'].fillna(0).astype('int8')
        df['orders_gt'] = df['orders_gt'].fillna(0).astype('int8')
        
        df['clicks_session'] = df.groupby(['session'])['clicks_gt'].transform('max').astype('int8')
        df['carts_session'] = df.groupby(['session'])['carts_gt'].transform('max').astype('int8')
        df['orders_session'] = df.groupby(['session'])['orders_gt'].transform('max').astype('int8')
    
    ###################################################################################
    
    df = df.merge(df_last, on=['session'], how='left')
    
    embs_feats = []
    embs = {
        'matrices/emb_32_1_sh1_pub.npy':'sh1_1_pub',
        'matrices/emb_32_1_sh2_pub.npy':'sh2_1_pub',
        'matrices/w2v.npy':'w2v',
        'matrices/w2v_100.npy':'w2v_100',
    }
    for emb, sfx in embs.items():
        print(f"add embs feats from {emb}")
        df = add_fact_feats(emb, sfx, df)
        embs_feats.extend([f'emb_diff_{sfx}', f'emb_angle_{sfx}'])
        
    del df['aid_last']
    gc_clear()
    
    ###################################################################################
        
    gt_feats = [
        'clicks_gt','carts_gt','orders_gt',
        'clicks_session','carts_session','orders_session'
    ] if MODE==0 else []
    
    final_feats = [
        'session','aid',
        'matrices_num','matrices_numsum','matrices_wgt_rel_mean',
    ] + [
        x for x in full_feats if x in df.columns
    ] + embs_feats + gt_feats 
    
    df[final_feats].to_parquet(f"feats/feats_{MODE}_batch_{batch}.pqt",index=False)
    
    
df[final_feats]



batch = 0
len(test_df) = 16668
0.01M
add features from matrix 30_30_012_012_0_v11m
0.27M
add features from matrix 30_30_012_012_0_v21k
0.33M
add features from matrix 30_30_012_012_0_v21m
0.38M
add features from matrix 30_30_012_012_3_v31m
0.40M
add features from matrix 30_30_012_12_0_v51ha
0.49M
add aids features...
add sessions features...
add aids2sessions features...
add additional features...
add embs feats from matrices/emb_32_1_sh1_pub.npy
add embs feats from matrices/emb_32_1_sh2_pub.npy
add embs feats from matrices/w2v.npy
add embs feats from matrices/w2v_100.npy


batch = 1
len(test_df) = 16792
0.01M
add features from matrix 30_30_012_012_0_v11m
0.27M
add features from matrix 30_30_012_012_0_v21k
0.33M
add features from matrix 30_30_012_012_0_v21m
0.38M
add features from matrix 30_30_012_012_3_v31m
0.40M
add features from matrix 30_30_012_12_0_v51ha
0.50M
add aids features...
add sessions features...
add aids2sessions features...
add additional features...
add embs feats fro

Unnamed: 0,session,aid,matrices_num,matrices_numsum,matrices_wgt_rel_mean,CA_estimation_frCL_trn,CA_estimation_frCL_tst,OR_estimation_frCA_trn,OR_estimation_frCL_trn,a2s_actions_num,a2s_actions_rel,a2s_best_action_type,a2s_carts_num,a2s_carts_rel,a2s_clicks_num,a2s_clicks_rel,a2s_last_action_index,a2s_last_cart_index,a2s_last_click_index,a2s_orders_rel,aid_CA2OR_trn,aid_CA_rank_int_tst_vs_trn,aid_CA_vs_mean_trn,aid_CA_vs_mean_tst,aid_CA_vs_mean_tst_vs_trn,aid_CL2CA_trn,aid_CL2CA_tst,aid_CL2OR_trn,aid_CL_rank_int_trn,aid_CL_rank_int_tst_vs_trn,aid_CL_rank_pct_tst_vs_trn,aid_CL_vs_mean_trn,aid_CL_vs_mean_tst_vs_trn,aid_clicks_favourite_dow_diff_test,aid_multi_clicks_percent_full,aid_multi_orders_percent_train,carts_rating_full,carts_rating_train,clicks_rating_full,clicks_rating_train,orders_rating_full,session_actions,session_avg_real_items_num,session_avg_real_length,session_carts,session_carts_avg_hour,session_carts_avg_real,session_click_diff_mean,session_clicks,session_full_length,session_items,session_items_carted,session_items_clicked,ts_diff,ts_diff_carts,ts_diff_carts_rel,ts_diff_clicks,ts_diff_clicks_rel,ts_diff_orders,ts_diff_orders_rel,ts_diff_rel,v11m_indmin,v11m_num,v21k_num,v21m_indmin,v31m_indmin,v31m_num,v51ha_indmin,wgt_rel_v11m_mean,wgt_rel_v11m_sum,wgt_rel_v21k_mean,wgt_rel_v21k_sum,wgt_rel_v21m_mean,wgt_rel_v21m_sum,wgt_rel_v31m_mean,wgt_rel_v31m_sum,wgt_rel_v51ha_mean,wgt_rel_v51ha_sum,wgt_v11m_mean,wgt_v11m_sum,wgt_v21k_sum,wgt_v21m_mean,wgt_v21m_sum,wgt_v31m_mean,wgt_v31m_sum,wgt_v51ha_mean,wgt_v51ha_sum,emb_diff_sh1_1_pub,emb_angle_sh1_1_pub,emb_diff_sh2_1_pub,emb_angle_sh2_1_pub,emb_diff_w2v,emb_angle_w2v,emb_diff_w2v_100,emb_angle_w2v_100
0,14200003,679945,0,0,-5.0,0.081967,0.142857,0.0,0.028689,1,1.000000,0,0,-1.0,1,1.000000,0,999,0,-1.0,0.350000,1.142124,2.601690,1.576091,0.605795,0.081967,0.142857,0.028689,92211.5,1.288999,3.052856,3.434519,0.324164,0,0.219124,0.000000,141671,143029,94750,93904,123596,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,0,1662205392,1.662205e+09,0,0.000000e+00,1662205392,1.662205e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000
1,14200403,1036074,0,0,-5.0,0.003576,0.001712,0.0,0.000000,1,1.000000,0,0,-1.0,1,1.000000,0,999,0,-1.0,0.000000,2.322868,5.333465,1.576091,0.295510,0.003576,0.001712,0.000000,375.0,0.800000,1.894715,161.366097,0.575616,3,0.124834,-1.000000,67246,66116,662,683,2000000,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,0,1662205521,1.662206e+09,0,0.000000e+00,1662205521,1.662206e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000
2,14200803,203151,0,0,-5.0,0.186508,0.200000,0.0,0.071429,1,1.000000,0,0,-1.0,1,1.000000,0,999,0,-1.0,0.382979,0.435734,6.113972,6.304365,1.031141,0.186508,0.200000,0.071429,89320.5,0.455243,1.078194,3.547126,0.896780,3,0.308824,0.000000,55107,57111,77166,81458,48187,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,0,1662205636,1.662206e+09,0,0.000000e+00,1662205636,1.662206e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000
3,14201203,567027,0,0,-5.0,0.072135,0.059946,0.0,0.009017,1,1.000000,0,0,-1.0,1,1.000000,0,999,0,-1.0,0.125000,0.274703,32.260959,34.674008,1.074798,0.072135,0.059946,0.009017,3169.5,0.226850,0.537269,48.392938,1.206192,3,0.270959,0.032258,5908,6344,2573,2783,25505,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,0,1662205752,1.662206e+09,0,0.000000e+00,1662205752,1.662206e+09,0.000000e+00,-1,0,0,-1,-1,0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000
4,14201603,1305442,0,0,-5.0,0.105442,0.119403,0.0,0.022485,1,0.333333,0,0,-1.0,1,0.333333,2,999,2,-1.0,0.213249,0.355422,206.183950,151.304761,0.733834,0.105442,0.119403,0.022485,207.0,0.700483,1.659019,211.588902,0.604360,1,0.289341,0.144970,268,272,217,226,699,3,3.0,78.0,0,-1.0,0.0,39.0,3,78,3,0,3,78,1662205945,2.104058e+07,78,9.873418e-01,1662205945,2.104058e+07,9.873418e-01,-1,0,0,-1,-1,0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,64.001634,-0.728568,55.461701,-0.949237,24.754786,0.413733,18.882220,0.315596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
509642,14571203,860194,1,1,-1.0,0.000000,0.000000,0.0,0.000000,0,0.000000,-1,0,-1.0,0,0.000000,999,999,999,-1.0,0.357143,2.907371,1.821183,0.000000,0.000000,0.083333,0.000000,0.029762,131350.0,0.532234,1.260541,2.364751,0.807102,2,0.327778,0.000000,169188,165450,111472,113758,164499,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,1662328416,1662328416,1.662328e+09,1662328416,1.662328e+09,1662328416,1.662328e+09,1.662328e+09,-1,0,0,-1,-1,0,18,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.004348,0.004348,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3.0,3.0,1.759890,0.983102,5.770884,0.974085,8.015048,0.829818,9.476064,0.812149
509643,14571203,897742,1,1,-1.0,0.000000,0.000000,0.0,0.000000,0,0.000000,-1,0,-1.0,0,0.000000,999,999,999,-1.0,0.342767,0.341241,41.366875,37.826190,0.914408,0.064023,0.051613,0.021945,1712.5,0.275328,0.652086,69.914986,1.057828,2,0.352356,0.082569,3851,4043,1262,1354,4820,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,1662328416,1662328416,1.662328e+09,1662328416,1.662328e+09,1662328416,1.662328e+09,1.662328e+09,-1,0,0,-1,-1,0,20,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.004348,0.004348,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3.0,3.0,3.193548,0.940016,6.009088,0.978039,13.306757,0.704536,5.833794,0.799018
509644,14571203,1078894,1,1,-2.0,0.000000,0.000000,0.0,0.000000,0,0.000000,-1,0,-1.0,0,0.000000,999,999,999,-1.0,0.264368,0.895459,11.317352,6.304365,0.557053,0.072020,0.088889,0.019040,14727.0,1.070890,2.536289,17.003685,0.420922,0,0.320830,0.086957,25679,25836,12799,12833,35514,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,1662328416,1662328416,1.662328e+09,1662328416,1.662328e+09,1662328416,1.662328e+09,1.662328e+09,-1,0,0,-1,-1,0,29,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.002899,0.002899,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2.0,2.0,9.984437,0.795690,7.426640,0.847369,17.409905,0.639194,9.887717,0.676382
509645,14571203,1733376,1,1,-1.0,0.000000,0.000000,0.0,0.000000,0,0.000000,-1,0,-1.0,0,0.000000,999,999,999,-1.0,0.428571,0.113675,0.910592,4.728274,5.192530,0.094595,0.130435,0.040541,264365.5,0.131893,0.312375,1.041616,3.511982,1,0.268041,0.000000,240146,308299,218073,255803,218823,1,1.0,0.0,0,-1.0,0.0,-1.0,1,0,1,0,1,1662328416,1662328416,1.662328e+09,1662328416,1.662328e+09,1662328416,1.662328e+09,1.662328e+09,-1,0,0,-1,-1,0,28,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.004348,0.004348,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3.0,3.0,3.324882,0.950284,13.776386,0.967096,7.485884,0.831934,14.238653,0.775399
