In [1]:
import time
import numpy as np 
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import CountVectorizer

### Features

In [2]:
df = pd.read_csv('./data/data.txt', sep=' ', nrows=100)
df.columns

Index(['instance_id', 'item_id', 'item_category_list', 'item_property_list',
       'item_brand_id', 'item_city_id', 'item_price_level', 'item_sales_level',
       'item_collected_level', 'item_pv_level', 'user_id', 'user_gender_id',
       'user_age_level', 'user_occupation_id', 'user_star_level', 'context_id',
       'context_timestamp', 'context_page_id', 'predict_category_property',
       'shop_id', 'shop_review_num_level', 'shop_review_positive_rate',
       'shop_star_level', 'shop_score_service', 'shop_score_delivery',
       'shop_score_description', 'is_trade'],
      dtype='object')

In [5]:
df_is_trade = df[df.is_trade == 1]
df_not_trade = df[df.is_trade == 0]

df_sub_not_trade = df_not_trade.sample(frac=0.01)
df_sub_not_trade

Unnamed: 0,instance_id,item_id,item_category_list,item_property_list,item_brand_id,item_city_id,item_price_level,item_sales_level,item_collected_level,item_pv_level,...,context_page_id,predict_category_property,shop_id,shop_review_num_level,shop_review_positive_rate,shop_star_level,shop_score_service,shop_score_delivery,shop_score_description,is_trade
29,5978989494083802018,4596211878136725009,7908382889764677758;4879721024980945592,2072967855524022579;2636395404473730413;462193...,2768135360437758991,3948283326616421003,7,7,13,14,...,4010,4879721024980945592:2636395404473730413;581450...,6768355095170828465,14,0.980592,5012,0.97268,0.972113,0.97068,0


In [25]:
data = np.array(pd.concat([df_sub_not_trade, df_is_trade], axis=0).reset_index(drop=True))
data[np.where(data[:, -1] == 1),0], data.shape

(array([[7972325483281584825, 5698113918818659664, 3398773118902833938,
         8127284011992533394]], dtype=object), (5, 27))

In [24]:
from sklearn.utils import shuffle
shuffle(pd.concat([df_sub_not_trade, df_is_trade], axis=0)).reset_index(drop=True)

Unnamed: 0,instance_id,item_id,item_category_list,item_property_list,item_brand_id,item_city_id,item_price_level,item_sales_level,item_collected_level,item_pv_level,...,context_page_id,predict_category_property,shop_id,shop_review_num_level,shop_review_positive_rate,shop_star_level,shop_score_service,shop_score_delivery,shop_score_description,is_trade
0,5978989494083802018,4596211878136725009,7908382889764677758;4879721024980945592,2072967855524022579;2636395404473730413;462193...,2768135360437758991,3948283326616421003,7,7,13,14,...,4010,4879721024980945592:2636395404473730413;581450...,6768355095170828465,14,0.980592,5012,0.97268,0.972113,0.97068,0
1,7972325483281584825,285660928590172217,7908382889764677758;8277336076276184272,2072967855524022579;5131280576272319091;263639...,9057103201734987852,548352491538518780,8,9,8,13,...,4001,"4879721024980945592:2636395404473730413,719936...",4885989684392199728,15,0.985427,5012,0.974878,0.976863,0.969278,1
2,8127284011992533394,1093710751022752245,7908382889764677758;5755694407684602296,5131280576272319091;2636395404473730413;101341...,7066302540842412840,3948283326616421003,7,11,12,19,...,4012,509660095530134768:2636395404473730413;8277336...,6597981382309269962,21,0.99704,5018,0.979661,0.979589,0.975442,1
3,3398773118902833938,557883074900282934,7908382889764677758;8277336076276184272,2636395404473730413;6434796455031995313;643479...,7066302540842412840,3948283326616421003,7,12,14,19,...,4001,8277336076276184272:2636395404473730413;176016...,6597981382309269962,21,0.99704,5018,0.979661,0.979589,0.975442,1
4,5698113918818659664,919980016657888153,7908382889764677758;5755694407684602296,5131280576272319091;2636395404473730413;124376...,7066302540842412840,3948283326616421003,7,11,12,19,...,4001,"5755694407684602296:9142827274221572643,914848...",6597981382309269962,21,0.99704,5018,0.979661,0.979589,0.975442,1


In [205]:
df.is_trade.value_counts(normalize=True)

0    0.981133
1    0.018867
Name: is_trade, dtype: float64

In [193]:
def generate_ratio(df):
    """历史转化率"""
    df['item_cat_0'] = df.item_category_list.apply(lambda x: x.split(';')[0])
    df['item_cat_1'] = df.item_category_list.apply(lambda x: x.split(';')[1])

    gp = (df.groupby(['item_cat_1', 'item_id']).is_trade.sum() /
          df.groupby(['item_cat_1']).is_trade.sum()).reset_index(name='item_cat_ratio')
    df_concat = pd.merge(df, gp, how='left', on=['item_cat_1', 'item_id']).fillna(0)

    df_concat.drop(['item_cat_0'], axis=1, inplace=True)
    return df_concat

In [194]:
df = generate_ratio(df)

In [220]:
# 􏰝􏰏􏰚􏰄􏶾􏰆􏰡􏰙􏶾􏰞􏰤􏰇􏰝􏰏􏰚􏰄􏶾􏰆􏰡􏰙􏶾􏰞􏰤􏰇number of successful transaction records /total transaction records

df['user_trade_ratio'] = df.groupby(['user_id']).is_trade.transform('mean')
df.loc[df.user_id==9222260051583315159,'user_trade_ratio']

27761     0.142857
35771     0.142857
56699     0.142857
231237    0.142857
254798    0.142857
275881    0.142857
285577    0.142857
Name: user_trade_ratio, dtype: float64

In [41]:
df.item_property_list.apply(lambda x: len(x.split(';'))).min()

22

In [223]:
df.item_property_list[6]

'2072967855524022579;5131280576272319091;2636395404473730413;9148482949976129397;7199361004668592209;4678095570925618478;2033679869864207699;3802510553218572927;1782439090818545916;820214312075361939;5320468090843686429;1134982063610307090;71803110314516845;1301076623647253687;3258780649701680217;514980552440578167;6411614163944830538;478060273908663971;1418324867218214039;6457208937217973355;4859870894564764703;6048265394477193459;5208530887565657358;2559145093180392146;8119922999783109966;90892965411648070;2935986643229018712;7125679806685817518;4998393090172019193;2653795394150816137;9132046213323956404;7004759496810158079;3674308596153344033;647428987512677251;793305798399120948;8560981779145170452;1364838863277739785;6402837117468309719;1556713051218509070;5905014002669999655;4774054090293502457;1723293555671106692;7527849185955020043;5767144546798382177;730203099166226972;8497879079912276476;7152599495073237509;615076485672811995;8382752466418861499;2296315626544640613'

In [187]:
df.groupby(['item_cat_1', 'item_id']).is_trade.sum()

item_cat_1           item_id            
2436715285093487584  8237485728010278787    0
4879721024980945592  4596211878136725009    0
509660095530134768   2854920230169118186    0
                     3079313931663066657    0
                     6753712242116121802    0
5755694407684602296  379848927776544534     0
                     919980016657888153     1
                     1093710751022752245    1
                     4215883534709645310    0
                     5202355029344881809    0
                     8574382974114632001    0
                     8824610546121211352    0
                     8996223722581518529    0
5799347067982556520  3412720377098676069    0
8277336076276184272  285660928590172217     1
                     557883074900282934     1
                     1519543411555287447    0
                     1988826649999608740    0
                     2127588125899824865    0
                     2349124492995958413    0
                     237890942005009106

### Change timestamp to date

In [26]:
df['context_timestamp'].head()

0    1537236544
1    1537243232
2    1537211052
3    1537222670
4    1537271320
Name: context_timestamp, dtype: int64

In [27]:
df['date'] = df['context_timestamp'].apply(lambda x: time.strftime('%Y-%m-%d', time.localtime(x)))

In [28]:
df['date'].unique()

array(['2018-09-17', '2018-09-18', '2018-09-20', '2018-09-21',
       '2018-09-19', '2018-09-22', '2018-09-23', '2018-09-24'],
      dtype=object)

In [29]:
def build_date_buf(date_pivot, left, right):
    date_buf = []
    for i in range(left, right):
        date = date_pivot + pd.Timedelta(i, unit='d')
        date_buf.append(date.strftime('%Y-%m-%d'))

    return date_buf

day = pd.to_datetime('2018-09-20')
lag_days = build_date_buf(day, -3, 0)
day, lag_days

(Timestamp('2018-09-20 00:00:00'), ['2018-09-17', '2018-09-18', '2018-09-19'])

In [30]:
target_df = df[df['date'].isin(['2018-09-20'])]
lag_df = df[df['date'].isin(lag_days)]

In [31]:
lag_df.groupby('item_id').is_trade.mean().reset_index().head()

Unnamed: 0,item_id,is_trade
0,696490723789804,0.0
1,1097631460775571,0.0
2,1637165183538885,0.0
3,9393908124420502,0.0
4,10102212873966760,0.111111


In [57]:
df['item_cat_ratio'] = 0
df.loc[(df.item_cat_1 == '1968056100269760729') & (df.item_id==46913422739079690), :]

Unnamed: 0,instance_id,item_id,item_category_list,item_property_list,item_brand_id,item_city_id,item_price_level,item_sales_level,item_collected_level,item_pv_level,...,shop_review_positive_rate,shop_star_level,shop_score_service,shop_score_delivery,shop_score_description,is_trade,date,item_cat_0,item_cat_1,item_cat_ratio
107798,3832515220731394505,46913422739079690,7908382889764677758;1968056100269760729,2072967855524022579;6491818071284064879;374195...,-1,8762827044490678569,4,4,6,9,...,0.989834,5008,0.975714,0.97,0.962857,0,2018-09-21,7908382889764677758,1968056100269760729,0
317126,2211301062835601509,46913422739079690,7908382889764677758;1968056100269760729,2072967855524022579;6491818071284064879;374195...,-1,8762827044490678569,4,4,6,9,...,0.989834,5008,0.976296,0.97037,0.962963,0,2018-09-22,7908382889764677758,1968056100269760729,0


In [78]:
temp = (df.groupby(['item_cat_1', 'item_id']).is_trade.sum()/df.groupby(['item_cat_1']).is_trade.sum()).reset_index(name='ratio')

In [173]:
df.drop(['item_cat_1','item_cat_0','item_cat_ratio'], inplace=True, axis=1)

In [174]:
# item to category portion
df['item_cat_0'] = df.item_category_list.apply(lambda x: x.split(';')[0])
df['item_cat_1'] = df.item_category_list.apply(lambda x: x.split(';')[1])

In [175]:
gp = (df.groupby(['item_cat_1', 'item_id']).is_trade.sum() /
          df.groupby(['item_cat_1']).is_trade.sum()).reset_index(name='ratio')
df_concat = pd.merge(df, gp, how='left', on=['item_cat_1', 'item_id'])

In [177]:
df_concat['ratio'].isnull().sum()

0

In [165]:
def make_instant_feature(df):
    first, prev = -1, -1
    first_buf, prev_buf, fif_min_buf = [], [], []
    i = 0
    for row in df.itertuples():
        cur = row.context_timestamp
        
        if first == -1:
            first = row.context_timestamp
        print(i, cur, first)
        i+=1
        first_buf.append(cur - first)

        if prev == -1:
            prev_buf.append(0)
            fif_min_buf.append(1)
        else:
            prev_buf.append(cur - prev)
            if cur - prev <= 15 * 60:
                fif_min_buf.append(fif_min_buf[-1] + 1)
            else:
                fif_min_buf.append(1)
        prev = cur

    df['first_to_now'] = first_buf
    df['prev_to_now'] = prev_buf
    df['recent_15_minutes'] = fif_min_buf

    return df[['instance_id', 'first_to_now', 'prev_to_now', 'recent_15_minutes']].reset_index(drop=True)

In [168]:
df_sub = df.loc[df.user_id==24779788309075]
sorted_df = df_sub.sort_values('context_timestamp')
uig = sorted_df.groupby(['user_id', 'item_id'])
uig[['instance_id', 'context_timestamp']].apply(make_instant_feature)

0 1537366714 1537366714
0 1537366714 1537366714
0 1537366714 1537366714
0 1537366714 1537366714
0 1537364387 1537364387
1 1537368731 1537364387


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,instance_id,first_to_now,prev_to_now,recent_15_minutes
user_id,item_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
24779788309075,3429903120089063586,0,1629978529043464171,0,0,1
24779788309075,5649087492658319596,0,4636982419062975858,0,0,1
24779788309075,5649087492658319596,1,2787042360944838947,4344,4344,1


In [162]:
df.loc[df.user_id==24779788309075, ['user_id', 'item_id', 'instance_id', 'context_timestamp']]

Unnamed: 0,user_id,item_id,instance_id,context_timestamp
188643,24779788309075,5649087492658319596,2787042360944838947,1537368731
188698,24779788309075,5649087492658319596,4636982419062975858,1537364387
194228,24779788309075,3429903120089063586,1629978529043464171,1537366714


In [100]:
df_concat = pd.merge(df, temp, how='left', on=['item_cat_1', 'item_id'])#.loc[(df.item_cat_1=='1968056100269760729') & (df.item_id==1180596174796485303), :]

In [101]:
df_concat.loc[(df_concat.item_cat_1=='1968056100269760729') & (df_concat.item_id==1180596174796485303), :]

Unnamed: 0,instance_id,item_id,item_category_list,item_property_list,item_brand_id,item_city_id,item_price_level,item_sales_level,item_collected_level,item_pv_level,...,shop_star_level,shop_score_service,shop_score_delivery,shop_score_description,is_trade,date,item_cat_0,item_cat_1,item_cat_ratio,ratio
10585,5896116892883341185,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,10,9,13,...,5014,0.979302,0.979908,0.975555,0,2018-09-17,7908382889764677758,1968056100269760729,0,0.285714
10589,5314197882911123285,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,10,9,13,...,5014,0.979302,0.979908,0.975555,0,2018-09-18,7908382889764677758,1968056100269760729,0,0.285714
10590,7058238343125480221,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,10,9,13,...,5014,0.979302,0.979908,0.975555,0,2018-09-17,7908382889764677758,1968056100269760729,0,0.285714
91187,866256051274029025,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979654,0.980123,0.975934,0,2018-09-20,7908382889764677758,1968056100269760729,0,0.285714
91188,1771132447922487667,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979654,0.980123,0.975934,0,2018-09-20,7908382889764677758,1968056100269760729,0,0.285714
230483,7409677986390795391,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979675,0.980144,0.975877,0,2018-09-20,7908382889764677758,1968056100269760729,0,0.285714
230485,6817727888974699045,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979675,0.980144,0.975877,0,2018-09-20,7908382889764677758,1968056100269760729,0,0.285714
298935,4365546601997491290,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979587,0.980027,0.975716,1,2018-09-21,7908382889764677758,1968056100269760729,0,0.285714
450660,4222251925991786246,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979582,0.980048,0.975644,0,2018-09-24,7908382889764677758,1968056100269760729,0,0.285714
450661,9132993776516422070,1180596174796485303,7908382889764677758;1968056100269760729,2072967855524022579;2636395404473730413;914848...,9208708814891093062,4918413420989329604,5,11,9,13,...,5014,0.979582,0.980048,0.975644,0,2018-09-24,7908382889764677758,1968056100269760729,0,0.285714


In [50]:
(df['item_sales_level']/df['item_collected_level']).nunique()

124

In [48]:
df[['item_sales_level', 'item_collected_level', 'user_id', 'is_trade']]

Unnamed: 0,item_sales_level,item_collected_level,user_id,is_trade
0,3,4,4505772604969228686,0
1,3,4,2692638157208937547,0
2,3,4,5247924392014515924,0
3,3,4,2681414445369714628,0
4,3,4,2729475788342039013,0
5,3,4,4512655448325954611,0
6,9,8,8811056487516803043,0
7,9,8,6507704883896466138,0
8,9,10,6203308008480593423,0
9,9,8,6041712044514783312,0


In [25]:
df[['context_timestamp', 'date']].head() # holiday, weekday, season

Unnamed: 0,context_timestamp,date
0,1537236544,2018-09-17
1,1537243232,2018-09-17
2,1537211052,2018-09-17
3,1537222670,2018-09-17
4,1537271320,2018-09-18


In [10]:
df['date'].unique()

array(['2018-09-17', '2018-09-18', '2018-09-20', '2018-09-21',
       '2018-09-19', '2018-09-22', '2018-09-23', '2018-09-24'],
      dtype=object)

In [11]:
df['item_id'].unique().size

10075

In [12]:
df['item_brand_id'].unique().size

2055

In [13]:
df['item_price_level'].unique()

array([ 3,  8,  7,  5,  4,  6,  9,  2, 10,  1, 11,  0, 17, 16])

In [14]:
df.groupby('date').is_trade.mean().reset_index()

Unnamed: 0,date,is_trade
0,2018-09-17,0.022869
1,2018-09-18,0.019245
2,2018-09-19,0.020219
3,2018-09-20,0.018804
4,2018-09-21,0.01946
5,2018-09-22,0.018528
6,2018-09-23,0.016131
7,2018-09-24,0.015608


In [15]:
df.groupby('item_id').is_trade.mean().reset_index().head(10)

Unnamed: 0,item_id,is_trade
0,696490723789804,0.071429
1,1097631460775571,0.016949
2,1637165183538885,0.0
3,3341342041473146,0.0
4,4055398786868336,0.0
5,4255654217639344,0.0
6,6536469184064787,0.0
7,7683654146703952,0.0
8,9393908124420502,0.040816
9,10102212873966760,0.051282


In [16]:
df['user_gender_id'].unique()

array([ 1,  0,  2, -1])

In [14]:
oh_encoder = OneHotEncoder(sparse=True, categories='auto')
oh_encoder.fit_transform(df['user_gender_id'].values.reshape((-1, 1))).toarray()

array([[0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       ...,
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.]])

In [15]:
cv = CountVectorizer()
cv.fit_transform(df['item_category_list']).toarray()[0:10]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]], dtype=int64)

In [16]:
space = oh_encoder.fit_transform(df['user_gender_id'].values.reshape((-1, 1))).toarray()
val = cv.fit_transform(df['item_category_list']).toarray()
np.hstack((space, val))[0:10]

array([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 1., 0., 0.]])

In [17]:
feature_importance_1 = eval(open('feature_importance_1.txt', 'r').read())
sorted(feature_importance_1.items(), key=lambda x: x[1], reverse=True)

[('user_star_level', 1686),
 ('user_age_level', 1039),
 ('shop_score_description', 843),
 ('shop_score_delivery', 742),
 ('item_sales_level', 656),
 ('shop_score_service', 652),
 ('shop_review_positive_rate', 639),
 ('item_collected_level', 535),
 ('item_pv_level', 495),
 ('user_occupation_id_4', 386),
 ('item_price_level', 332),
 ('user_gender_id_2', 318),
 ('user_occupation_id_1', 304),
 ('shop_review_num_level', 286),
 ('shop_star_level', 194),
 ('user_gender_id_1', 184),
 ('item_category_list_7', 100),
 ('item_city_id_94', 95),
 ('item_category_list_13', 86),
 ('item_id_3693', 83),
 ('item_category_list_11', 77),
 ('item_category_list_8', 72),
 ('item_category_list_14', 68),
 ('item_id_3658', 66),
 ('item_city_id_92', 65),
 ('user_occupation_id_3', 63),
 ('item_category_list_9', 60),
 ('item_brand_id_1292', 59),
 ('item_category_list_5', 55),
 ('item_brand_id_47', 55),
 ('item_id_2444', 51),
 ('item_id_1937', 46),
 ('item_brand_id_557', 45),
 ('item_city_id_71', 44),
 ('item_brand_

In [None]:
feature_importance_2 = eval(open('feature_importance_2.txt', 'r').read())
sorted(feature_importance_2.items(), key=lambda x: x[1], reverse=True)

[('user_star_level', 1417),
 ('user_age_level', 892),
 ('item_convrate', 789),
 ('age_item_convraterate', 765),
 ('shop_score_service', 709),
 ('shop_score_description', 639),
 ('shop_score_delivery', 624),
 ('shop_review_positive_rate', 604),
 ('first_to_now', 518),
 ('item_sales_level', 499),
 ('prev_to_now', 491),
 ('item_pv_level', 321),
 ('item_price_level', 298),
 ('shop_review_num_level', 294),
 ('user_occupation_id_1', 277),
 ('user_occupation_id_4', 228),
 ('item_collected_level', 208),
 ('user_gender_id_1', 192),
 ('user_gender_id_2', 164),
 ('shop_star_level', 125),
 ('recent_15_minutes', 94),
 ('item_category_list_11', 86),
 ('item_city_id_96', 82),
 ('item_id_3901', 60),
 ('item_brand_id_288', 59),
 ('item_id_2532', 58),
 ('shop_id_1606', 53),
 ('item_category_list_9', 50),
 ('user_occupation_id_3', 50),
 ('item_category_list_5', 49),
 ('item_brand_id_1327', 49),
 ('item_category_list_8', 48),
 ('item_category_list_14', 48),
 ('item_category_list_13', 46),
 ('item_brand_id