In [1]:
import pandas as pd
import numpy as np
from pycaret.classification import *

In [2]:
aisles = pd.read_csv('./data/aisles.csv')
dept = pd.read_csv('./data/departments.csv')
orders = pd.read_csv('./data/orders.csv')
products = pd.read_csv('./data/products.csv')
orders_p = pd.read_csv('./data/order_products__prior.csv')
orders_tr = pd.read_csv('./data/order_products__train.csv')

In [3]:
def merge_products(x):
    return " ".join(list(x.astype('str')))

### Baseline (using last order per user)

In [4]:
orders['last_order'] = orders['order_id'].shift(1)

In [5]:
prior_order = orders.query('eval_set == "prior"')
train_order = orders.query('eval_set == "train"')
test_order = orders.query('eval_set == "test"')

In [6]:
baseline = pd.merge(test_order, orders_p, left_on='last_order', right_on='order_id', how='left')

In [7]:
baseline_df = baseline[['order_id_x','product_id']]
baseline_df = baseline_df.groupby(['order_id_x'])['product_id'].aggregate(merge_products).reset_index()

In [8]:
baseline_df.columns = ['order_id','products']

In [9]:
baseline_df.shape

(75000, 2)

In [10]:
baseline_df.to_csv("./data/base_model.csv", index=False)

### Classification Models

In [11]:
df_submit = pd.read_csv('./data/df_submit.csv')

In [12]:
def submit_data(model, dataset):
    pred = predict_model(model, data=dataset)
    pred.index = dataset.index    
    submit = pred.loc[pred['Label']==1].reset_index()
    submit['product_id'] = submit['product_id'].astype('str')
    df = pd.merge(test_order[['order_id', 'user_id']], submit, on=['user_id'], how='left' ).fillna(0)
    df= df.groupby(['order_id'])['product_id'].aggregate(merge_products).reset_index()
    df.columns = ['order_id','products']
    print(df.shape)
    return df

##### Naive Bayes

In [13]:
naive_b = load_model('./data/naive_b')

Transformation Pipeline and Model Sucessfully Loaded


In [14]:
submit_data(naive_b, df_submit).to_csv('./data/naiveb_model.csv', index=False)

(75000, 2)


##### Decision Tree

In [15]:
dt = load_model('./data/dt')

Transformation Pipeline and Model Sucessfully Loaded


In [17]:
submit_data(dt, df_submit).to_csv('./data/dt.csv', index=False)

(75000, 2)


##### Light Gradient Boosting 

In [19]:
lightgbm = load_model('./data/light')

Transformation Pipeline and Model Sucessfully Loaded


In [20]:
submit_data(lightgbm, df_submit).to_csv('./data/lightgbm.csv', index=False)

(75000, 2)
