# Combine the rankings given by the xgboost and lightgbm models to arrive at the final top 20 candidates #

In [None]:
local = False
if local:
  from google.colab import drive
  drive.mount('/content/drive')
  %cd /content/drive/MyDrive/'Kaggle Otto Reccommender'/data
  path_to_module = '/content/drive/MyDrive/Kaggle Otto Reccommender/'
else:
  !mkdir /my_mnt_dir
  !google-drive-ocamlfuse /my_mnt_dir
  %cd /my_mnt_dir/'Kaggle Otto Reccommender'/data
  path_to_module = '/my_mnt_dir/Kaggle Otto Reccommender/'

import sys    
sys.path.append(path_to_module)

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from otto_utils import create_sub

In [None]:
path_to_training_data = './test_training_data'

In [None]:
def combine_preds(pred_type):

  lgbm_rank = pd.read_feather(f'{path_to_training_data}/lgb_{pred_type}_df.feather')
  xgb_rank = pd.read_feather(f'{path_to_training_data}/xgb_{pred_type}_df.feather')
  df = pd.concat([lgbm_rank, xgb_rank], ignore_index=True)

  del lgbm_rank, xgb_rank
  
  df = df.groupby(['session','aid'], as_index=False).agg({'n' : 'sum'})
  df.sort_values(by=['session','n'], ascending = [True, True], inplace=True)
  df['final_rank'] = df.groupby('session').cumcount() + 1
  df = df.loc[df['final_rank'] <= 20]
  
  return df

In [None]:
clicks_df = combine_preds('clicks')
clicks_df

Unnamed: 0,session,aid,n,final_rank
9,12899779,59625,2,1
134,12899779,1253524,5,2
79,12899779,737445,6,3
78,12899779,731692,8,4
191,12899779,1790770,9,5
...,...,...,...,...
334360590,14571581,1764910,32,16
334360489,14571581,775327,33,17
334360564,14571581,1550662,37,18
334360427,14571581,285653,39,19


In [None]:
carts_df = combine_preds('carts')
carts_df

Unnamed: 0,session,aid,n,final_rank
9,12899779,59625,2,1
78,12899779,731692,4,2
191,12899779,1790770,6,3
134,12899779,1253524,8,4
69,12899779,637538,12,5
...,...,...,...,...
334360416,14571581,174670,34,16
334360592,14571581,1780093,34,17
334360504,14571581,978060,36,18
334360533,14571581,1236674,38,19


In [None]:
orders_df = combine_preds('orders')
orders_df

Unnamed: 0,session,aid,n,final_rank
9,12899779,59625,2,1
78,12899779,731692,4,2
44,12899779,397451,6,3
199,12899779,1854910,10,4
191,12899779,1790770,11,5
...,...,...,...,...
334360489,14571581,775327,32,16
334360545,14571581,1353542,34,17
334360558,14571581,1497245,38,18
334360410,14571581,79256,39,19


Make Submission

In [None]:
import os
def make_directory(directory):
  if not os.path.exists(directory):
    os.mkdir(directory)
  return
  
def create_sub(click_preds, cart_preds, order_preds):
  ''' Takes subs with sessions and aids and converts to a kaggle submission '''

  make_directory('./submission')
  
  sub = []
  rec_types = ['clicks', 'carts', 'orders']
  rec_dfs = [click_preds, cart_preds, order_preds]
  for rec_type, rec_df in zip(rec_types, rec_dfs):
    rec_df['session'] = rec_df['session'].astype(str) + '_' + rec_type
    rec_df = rec_df.groupby('session', as_index=False)['aid'].apply(lambda x: " ".join(map(str, x)))
    sub.append(rec_df)
  sub = pd.concat(sub)
  sub.rename(columns={'session' : 'session_type', 'aid' : 'labels'}, inplace=True)

  assert sub.shape == (5015409, 2)
  assert sub.columns.tolist() == ['session_type', 'labels']
  sub.to_csv('./submission/mixed_submission.csv', index=False)

  return

In [None]:
create_sub(clicks_df, carts_df, orders_df)

In [None]:
sub = pd.read_csv('./submission/mixed_submission.csv')
sub

Unnamed: 0,session_type,labels
0,12899779_clicks,59625 1253524 737445 731692 1790770 94230 6375...
1,12899780_clicks,1142000 736515 582732 487136 889686 1758603 15...
2,12899781_clicks,199008 918667 141736 1272792 428697 528496 183...
3,12899782_clicks,595994 1007613 834354 1033148 889671 479970 82...
4,12899783_clicks,1817895 294573 1811433 169050 607638 354698 17...
...,...,...
5015404,14571577_orders,1141710 1276792 86916 367734 842555 1004292 16...
5015405,14571578_orders,519105 815460 1811714 977826 290137 822641 841...
5015406,14571579_orders,739876 1750859 785544 770418 870569 51363 2105...
5015407,14571580_orders,202353 433425 1314576 1231403 888228 891417 92...


In [None]:
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6
!kaggle competitions submit -c otto-recommender-system  -f ./submission/mixed_submission.csv -m "Submission via API"