In [1]:
import numpy as np
import pandas as pd
from category_encoders.binary import BinaryEncoder
import gc

In [2]:
orders = pd.read_csv('orders.csv')
order_product_prior = pd.read_csv('order_products__prior.csv')
order_product_train = pd.read_csv('order_products__train.csv')
products = pd.read_csv('products.csv')
aisles=pd.read_csv('aisles.csv')
departments = pd.read_csv('departments.csv') 

In [3]:
train_order_df = orders.merge(order_product_prior, on='order_id', how='inner')
train_order_df = train_order_df.merge(products, on='product_id', how='left')
train_order_df = train_order_df.merge(aisles, on='aisle_id', how='left')
train_order_df = train_order_df.merge(departments, on='department_id', how='left')
train_order_df.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department
0,2539329,1,prior,1,2,8,,196,1,0,Soda,77,7,soft drinks,beverages
1,2539329,1,prior,1,2,8,,14084,2,0,Organic Unsweetened Vanilla Almond Milk,91,16,soy lactosefree,dairy eggs
2,2539329,1,prior,1,2,8,,12427,3,0,Original Beef Jerky,23,19,popcorn jerky,snacks
3,2539329,1,prior,1,2,8,,26088,4,0,Aged White Cheddar Popcorn,23,19,popcorn jerky,snacks
4,2539329,1,prior,1,2,8,,26405,5,0,XL Pick-A-Size Paper Towel Rolls,54,17,paper goods,household


In [4]:
train_order_df.isnull().any()

order_id                  False
user_id                   False
eval_set                  False
order_number              False
order_dow                 False
order_hour_of_day         False
days_since_prior_order     True
product_id                False
add_to_cart_order         False
reordered                 False
product_name              False
aisle_id                  False
department_id             False
aisle                     False
department                False
dtype: bool

In [5]:
train_order_df['days_since_prior_order']=train_order_df['days_since_prior_order'].fillna(0)

In [6]:
train_order_df['total_products_by_user']=train_order_df.groupby(['user_id','product_id']).cumcount()+1
train_order_df.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,total_products_by_user
0,2539329,1,prior,1,2,8,0.0,196,1,0,Soda,77,7,soft drinks,beverages,1
1,2539329,1,prior,1,2,8,0.0,14084,2,0,Organic Unsweetened Vanilla Almond Milk,91,16,soy lactosefree,dairy eggs,1
2,2539329,1,prior,1,2,8,0.0,12427,3,0,Original Beef Jerky,23,19,popcorn jerky,snacks,1
3,2539329,1,prior,1,2,8,0.0,26088,4,0,Aged White Cheddar Popcorn,23,19,popcorn jerky,snacks,1
4,2539329,1,prior,1,2,8,0.0,26405,5,0,XL Pick-A-Size Paper Towel Rolls,54,17,paper goods,household,1


In [7]:
train_order_df.groupby('product_id')['total_products_by_user'].sum()

product_id
1        8691
2         112
3        1964
4         779
5          39
         ... 
49684      10
49685      55
49686     782
49687      22
49688     110
Name: total_products_by_user, Length: 49677, dtype: int64

Now, further features will be extracted based on products, aisles, and departments: 

# Product level features:

In [8]:
product_feat_df = train_order_df.groupby(['product_id'], as_index=False).agg({'reordered': ['count','sum','mean'],
                                                            'add_to_cart_order':'mean',
                                                             'user_id':lambda x:x.nunique(),
                                                             
                                                                             }
                                                                            )

product_feat_df.columns=['product_id','total_orders_by_products','total_reorders_by_products','reorder_ratio_by_products','add_to_cart_ratio_by_products','total_unique_customers_by_products']
product_feat_df.head()

Unnamed: 0,product_id,total_orders_by_products,total_reorders_by_products,reorder_ratio_by_products,add_to_cart_ratio_by_products,total_unique_customers_by_products
0,1,1852,1136,0.613391,5.801836,716
1,2,90,12,0.133333,9.888889,78
2,3,277,203,0.732852,6.415162,74
3,4,329,147,0.446809,9.507599,182
4,5,15,9,0.6,6.466667,6


# Aisle level features:

In [9]:
aisle_feat_df = train_order_df.groupby(['aisle_id'], as_index=False).agg({'reordered': ['count','sum','mean'],
                                                            'add_to_cart_order':['mean','std'],
                                                             'user_id':lambda x:x.nunique()}
                                                                            )

aisle_feat_df.columns=['aisle_id','total_orders_by_aisle','total_reorders_by_aisle','reorder_ratio_by_aisle','add_to_cart_ratio_by_aisle','add_to_cart_std_by_aisle','total_unique_customers_by_aisle']
aisle_feat_df.head()

Unnamed: 0,aisle_id,total_orders_by_aisle,total_reorders_by_aisle,reorder_ratio_by_aisle,add_to_cart_ratio_by_aisle,add_to_cart_std_by_aisle,total_unique_customers_by_aisle
0,1,71928,42912,0.596597,8.16764,7.104166,20711
1,2,82491,40365,0.489326,9.275497,7.473802,31222
2,3,456386,272922,0.598007,9.571935,7.899672,63592
3,4,200687,98243,0.489533,10.16145,7.745705,53892
4,5,62510,17542,0.280627,10.2976,8.187047,32312


# Department Level Features

In [10]:
department_feat_df = train_order_df.groupby(['department_id'], as_index=False).agg({'reordered': ['count','sum','mean'],
                                                            'add_to_cart_order':['mean','std'],
                                                             'user_id':lambda x:x.nunique() })

department_feat_df.columns=['department_id','total_orders_by_department','total_reorders_by_department','reorder_ratio_by_department','add_to_cart_ratio_by_department','add_to_cart_std_by_department','total_unique_customers_by_department']
department_feat_df.head()

Unnamed: 0,department_id,total_orders_by_department,total_reorders_by_department,reorder_ratio_by_department,add_to_cart_ratio_by_department,add_to_cart_std_by_department,total_unique_customers_by_department
0,1,2236432,1211890,0.541885,8.996414,7.393502,163233
1,2,36291,14806,0.40798,8.277645,7.526272,17875
2,3,1176787,739188,0.628141,8.084397,6.904849,140612
3,4,9479291,6160710,0.649913,8.022875,6.658899,193237
4,5,153696,87595,0.569924,5.428346,5.778253,15798


In [11]:
product_feat_df=product_feat_df.merge(products, on='product_id',how='left')
product_feat_df=product_feat_df.merge(aisles, on='aisle_id', how='left')
product_feat_df=product_feat_df.merge(aisle_feat_df, on='aisle_id', how='left')
product_feat_df=product_feat_df.merge(departments, on='department_id', how='left')
product_feat_df=product_feat_df.merge(department_feat_df, on='department_id', how='left')
product_feat_df.head()

Unnamed: 0,product_id,total_orders_by_products,total_reorders_by_products,reorder_ratio_by_products,add_to_cart_ratio_by_products,total_unique_customers_by_products,product_name,aisle_id,department_id,aisle,...,add_to_cart_ratio_by_aisle,add_to_cart_std_by_aisle,total_unique_customers_by_aisle,department,total_orders_by_department,total_reorders_by_department,reorder_ratio_by_department,add_to_cart_ratio_by_department,add_to_cart_std_by_department,total_unique_customers_by_department
0,1,1852,1136,0.613391,5.801836,716,Chocolate Sandwich Cookies,61,19,cookies cakes,...,9.253092,7.845107,54202,snacks,2887550,1657973,0.57418,9.187743,7.692492,174219
1,2,90,12,0.133333,9.888889,78,All-Seasons Salt,104,13,spices seasonings,...,9.996181,7.99479,76402,pantry,1875577,650301,0.346721,9.593425,7.875241,172755
2,3,277,203,0.732852,6.415162,74,Robust Golden Unsweetened Oolong Tea,94,7,tea,...,8.519846,7.818249,53197,beverages,2690129,1757892,0.65346,6.976699,6.711172,172795
3,4,329,147,0.446809,9.507599,182,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1,frozen meals,...,9.207741,7.43574,58749,frozen,2236432,1211890,0.541885,8.996414,7.393502,163233
4,5,15,9,0.6,6.466667,6,Green Chile Anytime Sauce,5,13,marinades meat preparation,...,10.2976,8.187047,32312,pantry,1875577,650301,0.346721,9.593425,7.875241,172755


In [12]:
product_feat_df.drop(['product_name', 'aisle_id','department_id'], inplace=True, axis=1)

In [13]:
encoders=BinaryEncoder(cols=['aisle','department'], return_df=True)

In [14]:
product_feat_df=encoders.fit_transform(product_feat_df)
product_feat_df.head()

Unnamed: 0,product_id,total_orders_by_products,total_reorders_by_products,reorder_ratio_by_products,add_to_cart_ratio_by_products,total_unique_customers_by_products,aisle_0,aisle_1,aisle_2,aisle_3,...,department_1,department_2,department_3,department_4,total_orders_by_department,total_reorders_by_department,reorder_ratio_by_department,add_to_cart_ratio_by_department,add_to_cart_std_by_department,total_unique_customers_by_department
0,1,1852,1136,0.613391,5.801836,716,0,0,0,0,...,0,0,0,1,2887550,1657973,0.57418,9.187743,7.692492,174219
1,2,90,12,0.133333,9.888889,78,0,0,0,0,...,0,0,1,0,1875577,650301,0.346721,9.593425,7.875241,172755
2,3,277,203,0.732852,6.415162,74,0,0,0,0,...,0,0,1,1,2690129,1757892,0.65346,6.976699,6.711172,172795
3,4,329,147,0.446809,9.507599,182,0,0,0,0,...,0,1,0,0,2236432,1211890,0.541885,8.996414,7.393502,163233
4,5,15,9,0.6,6.466667,6,0,0,0,0,...,0,0,1,0,1875577,650301,0.346721,9.593425,7.875241,172755


In [15]:
# free some memory
del aisle_feat_df, department_feat_df, aisles, departments
gc.collect()

0

# User Based Features:

In [16]:
user_feat_df=train_order_df.groupby(['user_id'], as_index=False).agg({'order_dow':['mean','std'],
                                                                      'order_hour_of_day':['mean','std'],
                                                                      'days_since_prior_order':['mean', 'std'],
                                                                      'reordered':['count','sum','mean'],
                                                                      'product_id': [lambda x:x.nunique(), 'mean'],
                                                                      'order_number':lambda x:x.nunique(),
                                                                      
                                
                                                                                   })
user_feat_df.columns=['user_id','order_dow_ratio','order_dow_std','order_hour_of_day_ratio','order_hour_of_day_std','days_since_prior_order_ratio','days_since_prior_std','total_ordered_items_by_user','total_reordered_items_by_user','reorder_item_percentage_by_user','total_unique_products_by_users','user_each_product_buying_ratio','total_orders_by_user']
user_feat_df.tail()

Unnamed: 0,user_id,order_dow_ratio,order_dow_std,order_hour_of_day_ratio,order_hour_of_day_std,days_since_prior_order_ratio,days_since_prior_std,total_ordered_items_by_user,total_reordered_items_by_user,reorder_item_percentage_by_user,total_unique_products_by_users,user_each_product_buying_ratio,total_orders_by_user
206204,206205,3.71875,1.084625,13.625,1.791557,9.6875,12.568361,32,8,0.25,24,28724.96875,3
206205,206206,2.312281,1.929394,16.796491,2.282972,3.985965,3.46204,285,135,0.473684,150,27728.403509,67
206206,206207,2.896861,2.051837,13.130045,4.499864,13.278027,11.624137,223,131,0.587444,92,27029.632287,16
206207,206208,2.760709,1.734285,13.968981,3.802901,7.310192,4.106003,677,479,0.707533,198,25954.423929,49
206208,206209,2.658915,1.538547,12.922481,2.737507,18.232558,10.919815,129,61,0.472868,68,22312.124031,13


In [17]:
user_order_feat = train_order_df.groupby(['user_id','order_number']).agg({'reordered':['count','mean']}).reset_index()
user_order_feat.columns=['user_id','order_number','order_size','reorder_ratio_by_order_number']
user_order_feat.head()

Unnamed: 0,user_id,order_number,order_size,reorder_ratio_by_order_number
0,1,1,5,0.0
1,1,2,6,0.5
2,1,3,5,0.6
3,1,4,5,1.0
4,1,5,8,0.625


In [18]:
user_feat_2= user_order_feat.groupby('user_id').agg({'order_size':'mean','reorder_ratio_by_order_number':'mean'}).reset_index()
user_feat_2.columns=['user_id','avg_order_size','avg_reorders_in_order_size']
user_feat_2.head()

Unnamed: 0,user_id,avg_order_size,avg_reorders_in_order_size
0,1,5.9,0.705833
1,2,13.928571,0.447961
2,3,7.333333,0.658817
3,4,3.6,0.028571
4,5,9.25,0.377778


In [19]:
user_feat_df= user_feat_df.merge(user_feat_2,on='user_id', how='left')
user_feat_df.head()

Unnamed: 0,user_id,order_dow_ratio,order_dow_std,order_hour_of_day_ratio,order_hour_of_day_std,days_since_prior_order_ratio,days_since_prior_std,total_ordered_items_by_user,total_reordered_items_by_user,reorder_item_percentage_by_user,total_unique_products_by_users,user_each_product_buying_ratio,total_orders_by_user,avg_order_size,avg_reorders_in_order_size
0,1,2.644068,1.256194,10.542373,3.500355,18.542373,10.559065,59,41,0.694915,18,17885.966102,10,5.9,0.705833
1,2,2.005128,0.971222,10.441026,1.649854,14.902564,9.671712,195,93,0.476923,102,25398.015385,14,13.928571,0.447961
2,3,1.011364,1.24563,16.352273,1.454599,10.181818,5.867396,88,55,0.625,33,27459.238636,12,7.333333,0.658817
3,4,4.722222,0.826442,13.111111,1.745208,11.944444,9.97333,18,1,0.055556,17,23302.666667,5,3.6,0.028571
4,5,1.621622,1.276961,15.72973,2.588958,10.189189,7.600577,37,14,0.378378,23,22513.243243,4,9.25,0.377778


Reorders in last 3 orders:

In [20]:
last_three_orders = user_order_feat.groupby('user_id')['order_number'].nlargest(3).reset_index()

In [21]:
last_three_orders=user_order_feat.merge(last_three_orders, on=['user_id','order_number'], how='inner')
last_three_orders.head()

Unnamed: 0,user_id,order_number,order_size,reorder_ratio_by_order_number,level_1
0,1,8,6,0.666667,7
1,1,9,6,1.0,8
2,1,10,9,0.666667,9
3,2,12,19,0.578947,21
4,2,13,9,0.0,22


In [22]:
last_three_orders['rank'] = last_three_orders.groupby('user_id')['order_number'].rank(method='dense', ascending=True)
last_three_orders.head()

Unnamed: 0,user_id,order_number,order_size,reorder_ratio_by_order_number,level_1,rank
0,1,8,6,0.666667,7,1.0
1,1,9,6,1.0,8,2.0
2,1,10,9,0.666667,9,3.0
3,2,12,19,0.578947,21,1.0
4,2,13,9,0.0,22,2.0


In [23]:
last_orders_items = last_three_orders.pivot(index='user_id',columns='rank',values=['order_size','reorder_ratio_by_order_number']).reset_index(drop=False)
last_orders_items.columns = ['user_id', 'third_last_order_size','second_last_order_size','last_order_size','reorder_in_third_last','reorder_in_second_last','reorder_in_last']
last_orders_items.head()

Unnamed: 0,user_id,third_last_order_size,second_last_order_size,last_order_size,reorder_in_third_last,reorder_in_second_last,reorder_in_last
0,1,6.0,6.0,9.0,0.666667,1.0,0.666667
1,2,19.0,9.0,16.0,0.578947,0.0,0.625
2,3,6.0,5.0,6.0,0.833333,1.0,1.0
3,4,7.0,2.0,3.0,0.142857,0.0,0.0
4,5,9.0,5.0,12.0,0.444444,0.4,0.666667


In [24]:
user_feat_df = user_feat_df.merge(last_orders_items, how='left', on='user_id')
user_feat_df.head()

Unnamed: 0,user_id,order_dow_ratio,order_dow_std,order_hour_of_day_ratio,order_hour_of_day_std,days_since_prior_order_ratio,days_since_prior_std,total_ordered_items_by_user,total_reordered_items_by_user,reorder_item_percentage_by_user,...,user_each_product_buying_ratio,total_orders_by_user,avg_order_size,avg_reorders_in_order_size,third_last_order_size,second_last_order_size,last_order_size,reorder_in_third_last,reorder_in_second_last,reorder_in_last
0,1,2.644068,1.256194,10.542373,3.500355,18.542373,10.559065,59,41,0.694915,...,17885.966102,10,5.9,0.705833,6.0,6.0,9.0,0.666667,1.0,0.666667
1,2,2.005128,0.971222,10.441026,1.649854,14.902564,9.671712,195,93,0.476923,...,25398.015385,14,13.928571,0.447961,19.0,9.0,16.0,0.578947,0.0,0.625
2,3,1.011364,1.24563,16.352273,1.454599,10.181818,5.867396,88,55,0.625,...,27459.238636,12,7.333333,0.658817,6.0,5.0,6.0,0.833333,1.0,1.0
3,4,4.722222,0.826442,13.111111,1.745208,11.944444,9.97333,18,1,0.055556,...,23302.666667,5,3.6,0.028571,7.0,2.0,3.0,0.142857,0.0,0.0
4,5,1.621622,1.276961,15.72973,2.588958,10.189189,7.600577,37,14,0.378378,...,22513.243243,4,9.25,0.377778,9.0,5.0,12.0,0.444444,0.4,0.666667


In [25]:
product_feat_df.shape

(49677, 31)

# User's Product based features

In [26]:
user_product_features = train_order_df.groupby(['user_id','product_id']).agg({'reordered':['count','sum','mean'],
                                                                              'add_to_cart_order':'mean',
                                                                             'order_dow':'mean',
                                                                             'order_hour_of_day':'mean',
                                                                             'days_since_prior_order':'mean'}).reset_index()
user_product_features.columns=['user_id','product_id','user_product_orders','user_product_reorders','user_product_reorder_ratio','user_product_add_to_cart_ratio','user_product_dow_ratio','user_product_hour_ratio','avg_days_since_last_purchase']
user_product_features.head()

Unnamed: 0,user_id,product_id,user_product_orders,user_product_reorders,user_product_reorder_ratio,user_product_add_to_cart_ratio,user_product_dow_ratio,user_product_hour_ratio,avg_days_since_last_purchase
0,1,196,10,9,0.9,1.4,2.5,10.3,17.6
1,1,10258,9,8,0.888889,3.333333,2.555556,10.555556,19.555556
2,1,10326,1,0,0.0,5.0,4.0,15.0,28.0
3,1,12427,10,9,0.9,3.3,2.5,10.3,17.6
4,1,13032,3,2,0.666667,6.333333,2.666667,8.0,21.666667


In [27]:
user_last_orders_by_products_df = train_order_df.merge(last_three_orders, how='inner', on=['user_id','order_number'])
user_last_orders_by_products_df.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,total_products_by_user,order_size,reorder_ratio_by_order_number,level_1,rank
0,3108588,1,prior,8,1,14,14.0,12427,1,1,Original Beef Jerky,23,19,popcorn jerky,snacks,8,6,0.666667,7,1.0
1,3108588,1,prior,8,1,14,14.0,196,2,1,Soda,77,7,soft drinks,beverages,8,6,0.666667,7,1.0
2,3108588,1,prior,8,1,14,14.0,10258,3,1,Pistachios,117,19,nuts seeds dried fruit,snacks,7,6,0.666667,7,1.0
3,3108588,1,prior,8,1,14,14.0,25133,4,1,Organic String Cheese,21,16,packaged cheese,dairy eggs,6,6,0.666667,7,1.0
4,3108588,1,prior,8,1,14,14.0,46149,5,0,Zero Calorie Cola,77,7,soft drinks,beverages,1,6,0.666667,7,1.0


In [28]:
user_last_orders_by_products_df['rank']=user_last_orders_by_products_df.groupby(['user_id','product_id'])['order_number'].rank(method='dense', ascending=True)
user_last_orders_by_products_df.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,total_products_by_user,order_size,reorder_ratio_by_order_number,level_1,rank
0,3108588,1,prior,8,1,14,14.0,12427,1,1,Original Beef Jerky,23,19,popcorn jerky,snacks,8,6,0.666667,7,1.0
1,3108588,1,prior,8,1,14,14.0,196,2,1,Soda,77,7,soft drinks,beverages,8,6,0.666667,7,1.0
2,3108588,1,prior,8,1,14,14.0,10258,3,1,Pistachios,117,19,nuts seeds dried fruit,snacks,7,6,0.666667,7,1.0
3,3108588,1,prior,8,1,14,14.0,25133,4,1,Organic String Cheese,21,16,packaged cheese,dairy eggs,6,6,0.666667,7,1.0
4,3108588,1,prior,8,1,14,14.0,46149,5,0,Zero Calorie Cola,77,7,soft drinks,beverages,1,6,0.666667,7,1.0


In [29]:
reorder_products_in_last_orders = user_last_orders_by_products_df.pivot(index=['user_id','product_id'],columns='rank', values='reordered').reset_index(drop=False)
reorder_products_in_last_orders.columns=['user_id','product_id','reorder_in_third_last_order','reorder_in_second_last_order','reorder_in_last_order']
reorder_products_in_last_orders.head()

Unnamed: 0,user_id,product_id,reorder_in_third_last_order,reorder_in_second_last_order,reorder_in_last_order
0,1,196,1.0,1.0,1.0
1,1,10258,1.0,1.0,1.0
2,1,12427,1.0,1.0,1.0
3,1,13032,1.0,,
4,1,25133,1.0,1.0,1.0


In [30]:
#fill nan values as 0, which represents the products not reordered
reorder_products_in_last_orders.fillna(0, inplace=True)
reorder_products_in_last_orders.head()

Unnamed: 0,user_id,product_id,reorder_in_third_last_order,reorder_in_second_last_order,reorder_in_last_order
0,1,196,1.0,1.0,1.0
1,1,10258,1.0,1.0,1.0
2,1,12427,1.0,1.0,1.0
3,1,13032,1.0,0.0,0.0
4,1,25133,1.0,1.0,1.0


In [31]:
user_product_features=user_product_features.merge(reorder_products_in_last_orders, on=['user_id','product_id'], how='left')
user_product_features.head()

Unnamed: 0,user_id,product_id,user_product_orders,user_product_reorders,user_product_reorder_ratio,user_product_add_to_cart_ratio,user_product_dow_ratio,user_product_hour_ratio,avg_days_since_last_purchase,reorder_in_third_last_order,reorder_in_second_last_order,reorder_in_last_order
0,1,196,10,9,0.9,1.4,2.5,10.3,17.6,1.0,1.0,1.0
1,1,10258,9,8,0.888889,3.333333,2.555556,10.555556,19.555556,1.0,1.0,1.0
2,1,10326,1,0,0.0,5.0,4.0,15.0,28.0,,,
3,1,12427,10,9,0.9,3.3,2.5,10.3,17.6,1.0,1.0,1.0
4,1,13032,3,2,0.666667,6.333333,2.666667,8.0,21.666667,1.0,0.0,0.0


In [32]:
user_product_features.isnull().any()

user_id                           False
product_id                        False
user_product_orders               False
user_product_reorders             False
user_product_reorder_ratio        False
user_product_add_to_cart_ratio    False
user_product_dow_ratio            False
user_product_hour_ratio           False
avg_days_since_last_purchase      False
reorder_in_third_last_order        True
reorder_in_second_last_order       True
reorder_in_last_order              True
dtype: bool

In [33]:
user_product_features.fillna(0,inplace=True)

# Saving the extracted features 

In [34]:
product_feat_df.to_pickle('product_feat_df.pkl')
user_feat_df.to_pickle('user_feat_df.pkl')
user_product_features.to_pickle('user_product_feat.pkl')
train_order_df.to_pickle('train_order_df')