In [2]:
import pandas as pd
import numpy as np
import os

In [8]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

def load_data(path):
#     user = reduce_mem_usage(pd.read_csv(path + 'user.csv',header=None))
#     item = reduce_mem_usage(pd.read_csv(path + 'item.csv',header=None))
    user=pd.read_csv(os.path.join(path,'user.csv'),header=None)
    item=pd.read_csv(os.path.join(path, 'item.csv'), header=None)
    data = pd.read_csv(os.path.join(path, 'user_behavior.csv'),header=None)

    data.columns = ['userID','itemID','behavior','timestamp']
    data['day'] = data['timestamp'] // 86400
    data['hour'] = data['timestamp'] // 3600 % 24
    
    ## 生成behavior的onehot
    for i in ['pv','fav','cart','buy']:
        data[i] = 0
        data.loc[data['behavior'] == i, i] = 1

    ## 生成behavior的加权
    data['day_hour'] = data['day'] + data['hour'] / float(24)
    data.loc[data['behavior']=='pv','behavior'] = 1
    data.loc[data['behavior']=='fav','behavior'] = 2
    data.loc[data['behavior']=='cart','behavior'] = 3
    # 'buy'权重为1
    data.loc[data['behavior']=='buy','behavior'] = 1
    max_day = max(data['day'])
    min_day = min(data['day'])
    data['behavior'] = (1 - (max_day-data['day_hour']+2)/(max_day-min_day+2)) * data['behavior'] 

    item.columns = ['itemID','category','shop','brand']
    user.columns = ['userID','sex','age','ability']
    
#     data = reduce_mem_usage(data)

    data = pd.merge(left=data, right=item, on='itemID',how='left')
    data = pd.merge(left=data, right=user, on='userID',how='left')

    return user, item, data
    

In [9]:
# path = '../ECommAI_EUIR_round2_train_20190816/'
user_dir=os.path.expanduser('~')
path=os.path.join(user_dir,r'github\CIKM-2019-AnalytiCup\data\ECommAI_EUIR_round1_testA_20190701')
user, item, data = load_data(path = path)

In [10]:
# 计算不同'itemID', 'shop', 'category','brand'的count()
for count_feature in ['itemID', 'shop', 'category','brand']:
    data[['behavior', count_feature]].groupby(count_feature, as_index=False).agg(
        {'behavior':'count'}).rename(columns={'behavior':count_feature + '_count'}).to_csv(str(count_feature)+'_count.csv', index=False)

    # 计算不同'itemID', 'shop', 'category','brand'的'behavior'权值的加权和
for count_feature in ['itemID', 'shop', 'category','brand']:
    data[['behavior', count_feature]].groupby(count_feature, as_index=False).agg(
        {'behavior':'sum'}).rename(columns={'behavior':count_feature + '_sum'}).to_csv(str(count_feature)+'_sum.csv', index=False)

In [20]:
temp=pd.read_csv('itemID_count.csv')
temp.head()

Unnamed: 0,itemID,itemID_count
0,82,1
1,116,1
2,132,1
3,136,4
4,178,1


In [24]:
type(data['behavior'].values[0])

float

In [25]:
temp = data[['behavior','category']].groupby('category', as_index=False).agg({'behavior': 'sum'}) #skew偏度？？
temp.columns = ['category','category_median','category_std','category_skew']
temp.to_csv('category_higher.csv',index=False)

ValueError: Length mismatch: Expected axis has 2 elements, new values have 4 elements

In [26]:
temp = data[['behavior','itemID']].groupby('itemID', as_index=False).agg({'behavior': ['median','std','skew']})
temp.columns = ['itemID','itemID_median','itemID_std','itemID_skew']

temp.to_csv('itemID_higher.csv',index=False)

DataError: No numeric types to aggregate

In [4]:
data['age'] = data['age'] // 10  # age分布18-90 //整除
train = data[data['day'] < 15]

In [27]:
## 注意 这个要生成一个underline版本和一个online版本
# 细粒度分组[itemID.gender] [itemID,age] [itemID,ability]
for count_feature in ['sex','ability','age']:
    data[['behavior','itemID',count_feature]].groupby(['itemID', count_feature], as_index=False).agg(
        {'behavior': 'count'}).rename(
        columns={'behavior':'user_to_' + count_feature + '_count'}).to_csv('item_to_' + str(count_feature)+'_count_online.csv', index=False)

In [28]:
itemcount = pd.read_csv('itemID_count.csv')

In [29]:
temp = pd.merge(left=item, right=itemcount, how='left', on='itemID')


In [38]:
item_rank = []
for eachcat in temp.groupby('category'): # eachcat是一个tuple,('categary_0',temp.loc[temp['categary']==catagary_0])
    each_df = eachcat[1].sort_values('itemID_count', ascending=False).reset_index(drop=True)
    each_df['rank'] = each_df.index + 1
    lenth = each_df.shape[0]
    print('category:{},lenth:{}'.format(eachcat[0],lenth))
    each_df['rank_percent'] = (each_df.index + 1) / lenth
    item_rank.append(each_df[['itemID','rank','rank_percent']])

category:3,lenth:37
category:6,lenth:1
category:7,lenth:2
category:10,lenth:10
category:14,lenth:676
category:16,lenth:21
category:20,lenth:10
category:24,lenth:2
category:25,lenth:14
category:26,lenth:40
category:28,lenth:16
category:30,lenth:153
category:33,lenth:10
category:35,lenth:2
category:37,lenth:106
category:39,lenth:11
category:40,lenth:10
category:45,lenth:39
category:47,lenth:51
category:50,lenth:112
category:51,lenth:12
category:53,lenth:1
category:55,lenth:1
category:61,lenth:1
category:62,lenth:276
category:64,lenth:363
category:65,lenth:144
category:67,lenth:102
category:68,lenth:5
category:69,lenth:157
category:71,lenth:1
category:73,lenth:1
category:74,lenth:1
category:75,lenth:1
category:79,lenth:2
category:80,lenth:34
category:86,lenth:6
category:89,lenth:14
category:91,lenth:33
category:94,lenth:26
category:96,lenth:53
category:97,lenth:1
category:98,lenth:34
category:99,lenth:1
category:100,lenth:23
category:102,lenth:20
category:103,lenth:15
category:107,lenth:6

category:858,lenth:6
category:860,lenth:24
category:861,lenth:33
category:862,lenth:17
category:863,lenth:32
category:864,lenth:1
category:865,lenth:5
category:866,lenth:1
category:867,lenth:138
category:868,lenth:4
category:870,lenth:11
category:875,lenth:13
category:876,lenth:2
category:877,lenth:28
category:884,lenth:16
category:886,lenth:17
category:890,lenth:5
category:895,lenth:4
category:896,lenth:32
category:897,lenth:6
category:899,lenth:29
category:900,lenth:5
category:901,lenth:37
category:902,lenth:12
category:904,lenth:226
category:905,lenth:1
category:906,lenth:2
category:907,lenth:64
category:909,lenth:174
category:910,lenth:8
category:911,lenth:53
category:913,lenth:9
category:916,lenth:2
category:917,lenth:53
category:918,lenth:22
category:922,lenth:6
category:926,lenth:13
category:927,lenth:7
category:928,lenth:1
category:931,lenth:2
category:932,lenth:2
category:933,lenth:6
category:934,lenth:107
category:939,lenth:22
category:940,lenth:142
category:941,lenth:1
categ

category:1706,lenth:15
category:1707,lenth:23
category:1710,lenth:9
category:1712,lenth:21
category:1713,lenth:687
category:1714,lenth:11
category:1715,lenth:13
category:1716,lenth:76
category:1720,lenth:2
category:1721,lenth:666
category:1723,lenth:9
category:1725,lenth:420
category:1726,lenth:1
category:1727,lenth:44
category:1728,lenth:2
category:1729,lenth:53
category:1730,lenth:30
category:1734,lenth:1
category:1735,lenth:1
category:1736,lenth:7
category:1737,lenth:2
category:1739,lenth:29
category:1741,lenth:16
category:1742,lenth:21
category:1745,lenth:2
category:1747,lenth:3
category:1748,lenth:34
category:1750,lenth:10
category:1751,lenth:2
category:1752,lenth:29
category:1755,lenth:12
category:1756,lenth:3
category:1758,lenth:1
category:1765,lenth:1
category:1768,lenth:82
category:1769,lenth:1
category:1770,lenth:23
category:1772,lenth:24
category:1774,lenth:2
category:1775,lenth:5
category:1777,lenth:109
category:1781,lenth:1
category:1783,lenth:58
category:1786,lenth:2
cate

category:2568,lenth:330
category:2570,lenth:1
category:2572,lenth:2
category:2575,lenth:91
category:2576,lenth:15
category:2578,lenth:32
category:2579,lenth:8
category:2585,lenth:104
category:2586,lenth:4
category:2588,lenth:6
category:2594,lenth:110
category:2595,lenth:2
category:2597,lenth:2
category:2599,lenth:1
category:2603,lenth:3
category:2604,lenth:3
category:2605,lenth:1470
category:2606,lenth:162
category:2608,lenth:1
category:2610,lenth:4
category:2611,lenth:2
category:2612,lenth:518
category:2613,lenth:39
category:2616,lenth:31
category:2618,lenth:8
category:2619,lenth:4
category:2620,lenth:1115
category:2622,lenth:7
category:2623,lenth:5
category:2624,lenth:30
category:2627,lenth:145
category:2632,lenth:2
category:2633,lenth:1
category:2635,lenth:165
category:2636,lenth:1
category:2637,lenth:8
category:2640,lenth:114
category:2641,lenth:3
category:2644,lenth:16
category:2646,lenth:44
category:2650,lenth:157
category:2652,lenth:177
category:2653,lenth:1
category:2654,lenth:

category:3420,lenth:10
category:3430,lenth:12
category:3432,lenth:548
category:3435,lenth:13
category:3438,lenth:320
category:3439,lenth:191
category:3446,lenth:16
category:3447,lenth:549
category:3451,lenth:2
category:3452,lenth:52
category:3458,lenth:33
category:3462,lenth:157
category:3467,lenth:31
category:3469,lenth:3
category:3470,lenth:54
category:3471,lenth:3
category:3472,lenth:1
category:3476,lenth:2
category:3477,lenth:1
category:3478,lenth:60
category:3481,lenth:1
category:3483,lenth:19
category:3484,lenth:18
category:3486,lenth:19
category:3491,lenth:137
category:3493,lenth:3
category:3494,lenth:2
category:3495,lenth:68
category:3496,lenth:3
category:3497,lenth:11
category:3501,lenth:1
category:3503,lenth:46
category:3504,lenth:4
category:3505,lenth:302
category:3507,lenth:26
category:3508,lenth:8
category:3510,lenth:1
category:3512,lenth:4
category:3513,lenth:11
category:3514,lenth:32
category:3518,lenth:1
category:3519,lenth:15
category:3523,lenth:1
category:3527,lenth:5

category:4267,lenth:159
category:4268,lenth:62
category:4270,lenth:9
category:4271,lenth:152
category:4273,lenth:5
category:4274,lenth:198
category:4278,lenth:184
category:4280,lenth:23
category:4281,lenth:33
category:4283,lenth:17
category:4286,lenth:3
category:4287,lenth:4
category:4292,lenth:8
category:4293,lenth:5
category:4296,lenth:51
category:4300,lenth:51
category:4301,lenth:28
category:4302,lenth:127
category:4303,lenth:190
category:4306,lenth:17
category:4310,lenth:832
category:4311,lenth:2
category:4313,lenth:15
category:4314,lenth:4
category:4316,lenth:10
category:4317,lenth:71
category:4320,lenth:21
category:4321,lenth:110
category:4322,lenth:42
category:4324,lenth:35
category:4325,lenth:7
category:4326,lenth:41
category:4330,lenth:439
category:4332,lenth:15
category:4333,lenth:12
category:4334,lenth:2
category:4335,lenth:3
category:4336,lenth:4
category:4337,lenth:284
category:4338,lenth:13
category:4339,lenth:14
category:4342,lenth:74
category:4343,lenth:5
category:4349,

category:5149,lenth:82
category:5152,lenth:38
category:5154,lenth:8
category:5155,lenth:1
category:5157,lenth:106
category:5158,lenth:3
category:5161,lenth:49
category:5162,lenth:7
category:5167,lenth:3
category:5174,lenth:35
category:5181,lenth:5
category:5182,lenth:15
category:5185,lenth:35
category:5191,lenth:1
category:5192,lenth:60
category:5193,lenth:120
category:5194,lenth:27
category:5195,lenth:97
category:5204,lenth:1
category:5207,lenth:6
category:5208,lenth:1
category:5209,lenth:52
category:5211,lenth:6
category:5214,lenth:1
category:5217,lenth:17
category:5218,lenth:8
category:5219,lenth:28
category:5223,lenth:10
category:5224,lenth:5
category:5225,lenth:77
category:5228,lenth:130
category:5229,lenth:1
category:5230,lenth:8
category:5231,lenth:24
category:5234,lenth:57
category:5236,lenth:4
category:5237,lenth:29
category:5238,lenth:18
category:5239,lenth:6
category:5245,lenth:35
category:5249,lenth:21
category:5254,lenth:1
category:5255,lenth:148
category:5267,lenth:2956
c

category:5975,lenth:11
category:5978,lenth:15
category:5979,lenth:2
category:5980,lenth:156
category:5982,lenth:15
category:5983,lenth:2
category:5986,lenth:6
category:5987,lenth:99
category:5989,lenth:4
category:5990,lenth:1
category:5993,lenth:3
category:5997,lenth:287
category:5998,lenth:1
category:5999,lenth:1
category:6000,lenth:3
category:6002,lenth:9
category:6005,lenth:7
category:6006,lenth:3
category:6007,lenth:3
category:6008,lenth:48
category:6013,lenth:40
category:6014,lenth:1
category:6016,lenth:34
category:6019,lenth:232
category:6020,lenth:25
category:6023,lenth:15
category:6025,lenth:6
category:6026,lenth:5
category:6029,lenth:9
category:6034,lenth:42
category:6035,lenth:74
category:6037,lenth:1
category:6039,lenth:300
category:6042,lenth:22
category:6045,lenth:171
category:6048,lenth:317
category:6049,lenth:24
category:6050,lenth:2
category:6051,lenth:3
category:6052,lenth:9
category:6056,lenth:1
category:6058,lenth:26
category:6059,lenth:12
category:6062,lenth:3
categ

category:6780,lenth:25
category:6784,lenth:29
category:6787,lenth:1
category:6788,lenth:54
category:6790,lenth:6
category:6793,lenth:7
category:6794,lenth:14
category:6795,lenth:1
category:6797,lenth:109
category:6798,lenth:438
category:6802,lenth:2
category:6803,lenth:63
category:6804,lenth:709
category:6810,lenth:1
category:6814,lenth:228
category:6817,lenth:50
category:6819,lenth:592
category:6821,lenth:4
category:6823,lenth:1
category:6829,lenth:18
category:6832,lenth:23
category:6836,lenth:554
category:6839,lenth:1045
category:6840,lenth:3
category:6843,lenth:31
category:6845,lenth:57
category:6849,lenth:40
category:6852,lenth:66
category:6853,lenth:1
category:6854,lenth:331
category:6856,lenth:2
category:6857,lenth:1
category:6858,lenth:10
category:6859,lenth:16
category:6860,lenth:3
category:6861,lenth:297
category:6862,lenth:21
category:6864,lenth:8
category:6865,lenth:3
category:6867,lenth:1
category:6868,lenth:5
category:6871,lenth:3
category:6875,lenth:11
category:6876,lenth

category:7629,lenth:156
category:7630,lenth:29
category:7632,lenth:4
category:7634,lenth:520
category:7635,lenth:5
category:7636,lenth:612
category:7637,lenth:2
category:7640,lenth:121
category:7644,lenth:1
category:7645,lenth:361
category:7646,lenth:76
category:7648,lenth:3
category:7649,lenth:38
category:7650,lenth:1
category:7652,lenth:9
category:7655,lenth:4
category:7656,lenth:10
category:7657,lenth:14
category:7658,lenth:16
category:7661,lenth:58
category:7662,lenth:8
category:7663,lenth:38
category:7664,lenth:2
category:7665,lenth:10
category:7667,lenth:17
category:7669,lenth:717
category:7671,lenth:5
category:7673,lenth:1
category:7674,lenth:100
category:7676,lenth:79
category:7678,lenth:1
category:7680,lenth:2
category:7683,lenth:17
category:7684,lenth:1
category:7685,lenth:5
category:7687,lenth:85
category:7690,lenth:89
category:7691,lenth:78
category:7694,lenth:2
category:7695,lenth:19
category:7699,lenth:6
category:7702,lenth:11
category:7703,lenth:12
category:7705,lenth:10

category:8377,lenth:93
category:8379,lenth:4
category:8381,lenth:74
category:8383,lenth:75
category:8385,lenth:17
category:8387,lenth:1
category:8389,lenth:6
category:8398,lenth:2
category:8400,lenth:69
category:8405,lenth:3
category:8406,lenth:3
category:8407,lenth:516
category:8408,lenth:1
category:8410,lenth:125
category:8416,lenth:53
category:8420,lenth:44
category:8425,lenth:470
category:8428,lenth:2
category:8429,lenth:12
category:8431,lenth:1
category:8432,lenth:5
category:8433,lenth:61
category:8438,lenth:56
category:8439,lenth:112
category:8441,lenth:111
category:8442,lenth:127
category:8443,lenth:13
category:8445,lenth:4
category:8447,lenth:39
category:8451,lenth:4
category:8452,lenth:15
category:8454,lenth:1
category:8455,lenth:13
category:8456,lenth:1
category:8457,lenth:2
category:8459,lenth:18
category:8461,lenth:15
category:8462,lenth:4
category:8465,lenth:38
category:8468,lenth:21
category:8470,lenth:2
category:8473,lenth:1
category:8482,lenth:51
category:8485,lenth:34


category:9170,lenth:832
category:9171,lenth:3
category:9173,lenth:32
category:9175,lenth:150
category:9176,lenth:1
category:9177,lenth:1
category:9178,lenth:69
category:9179,lenth:26
category:9183,lenth:11
category:9187,lenth:15
category:9190,lenth:6
category:9191,lenth:1
category:9193,lenth:7
category:9198,lenth:4
category:9200,lenth:13
category:9201,lenth:19
category:9203,lenth:235
category:9204,lenth:44
category:9205,lenth:21
category:9206,lenth:1
category:9207,lenth:38
category:9209,lenth:1
category:9210,lenth:269
category:9211,lenth:2
category:9212,lenth:373
category:9214,lenth:1
category:9217,lenth:570
category:9219,lenth:140
category:9221,lenth:31
category:9224,lenth:48
category:9227,lenth:7
category:9230,lenth:9
category:9232,lenth:1
category:9233,lenth:2
category:9234,lenth:1
category:9237,lenth:13
category:9241,lenth:180
category:9243,lenth:94
category:9245,lenth:147
category:9246,lenth:20
category:9249,lenth:28
category:9252,lenth:871
category:9253,lenth:2
category:9254,lent

category:9915,lenth:12
category:9919,lenth:107
category:9923,lenth:25
category:9926,lenth:1
category:9929,lenth:217
category:9930,lenth:2
category:9931,lenth:2
category:9937,lenth:2
category:9938,lenth:12
category:9939,lenth:17
category:9940,lenth:154
category:9941,lenth:2
category:9944,lenth:17
category:9945,lenth:160
category:9947,lenth:9
category:9952,lenth:177
category:9955,lenth:1
category:9959,lenth:12
category:9960,lenth:3
category:9961,lenth:5
category:9962,lenth:2
category:9963,lenth:10
category:9964,lenth:2030
category:9966,lenth:1
category:9967,lenth:30
category:9970,lenth:1
category:9971,lenth:268
category:9972,lenth:437
category:9974,lenth:3
category:9979,lenth:1
category:9981,lenth:1
category:9982,lenth:56
category:9983,lenth:3
category:9984,lenth:6
category:9988,lenth:6
category:9991,lenth:1
category:9993,lenth:589
category:9997,lenth:13
category:9999,lenth:2
category:10000,lenth:6
category:10001,lenth:7
category:10002,lenth:34
category:10003,lenth:2
category:10005,lenth

category:10647,lenth:11
category:10650,lenth:36
category:10651,lenth:53
category:10655,lenth:23
category:10657,lenth:11
category:10658,lenth:249
category:10659,lenth:5
category:10660,lenth:1
category:10661,lenth:2
category:10662,lenth:1052
category:10663,lenth:39
category:10665,lenth:12
category:10666,lenth:431
category:10667,lenth:282
category:10674,lenth:20
category:10675,lenth:1
category:10677,lenth:66
category:10684,lenth:34
category:10686,lenth:1
category:10687,lenth:13
category:10689,lenth:53
category:10694,lenth:2
category:10696,lenth:8
category:10697,lenth:9
category:10699,lenth:20
category:10700,lenth:37
category:10701,lenth:24
category:10703,lenth:54
category:10704,lenth:28
category:10707,lenth:95
category:10708,lenth:1
category:10710,lenth:5
category:10711,lenth:1
category:10712,lenth:361
category:10713,lenth:2
category:10714,lenth:1
category:10715,lenth:8
category:10716,lenth:422
category:10717,lenth:1
category:10721,lenth:1628
category:10722,lenth:5
category:10723,lenth:18

category:11397,lenth:15
category:11399,lenth:8
category:11403,lenth:63
category:11404,lenth:45
category:11406,lenth:5
category:11408,lenth:234
category:11411,lenth:1
category:11413,lenth:17
category:11414,lenth:13
category:11415,lenth:71
category:11420,lenth:974
category:11421,lenth:62
category:11426,lenth:1
category:11428,lenth:2
category:11429,lenth:2
category:11430,lenth:1
category:11432,lenth:31
category:11433,lenth:1
category:11434,lenth:14
category:11435,lenth:97
category:11436,lenth:88
category:11437,lenth:237
category:11439,lenth:76
category:11440,lenth:13
category:11441,lenth:1
category:11442,lenth:1
category:11444,lenth:528
category:11446,lenth:6
category:11450,lenth:14
category:11452,lenth:27
category:11454,lenth:3
category:11457,lenth:1
category:11458,lenth:120
category:11459,lenth:2
category:11460,lenth:3
category:11461,lenth:15
category:11464,lenth:184
category:11466,lenth:2
category:11467,lenth:1033
category:11469,lenth:19
category:11473,lenth:38
category:11475,lenth:12


category:12183,lenth:294
category:12185,lenth:1
category:12187,lenth:43
category:12194,lenth:1
category:12197,lenth:51
category:12198,lenth:493
category:12200,lenth:18
category:12201,lenth:16
category:12203,lenth:1
category:12206,lenth:1
category:12207,lenth:65
category:12208,lenth:10
category:12212,lenth:3
category:12216,lenth:193
category:12219,lenth:33
category:12223,lenth:39
category:12224,lenth:15
category:12225,lenth:1
category:12226,lenth:3
category:12227,lenth:1
category:12228,lenth:6
category:12232,lenth:1
category:12234,lenth:354
category:12236,lenth:52
category:12238,lenth:15
category:12242,lenth:509
category:12244,lenth:56
category:12247,lenth:13
category:12250,lenth:1
category:12251,lenth:110
category:12253,lenth:2
category:12254,lenth:8
category:12257,lenth:2
category:12260,lenth:1
category:12261,lenth:16
category:12263,lenth:1
category:12267,lenth:87
category:12268,lenth:11
category:12270,lenth:3
category:12271,lenth:1
category:12273,lenth:1
category:12275,lenth:26
categ

category:12890,lenth:14
category:12894,lenth:15
category:12896,lenth:41
category:12897,lenth:1
category:12898,lenth:5
category:12899,lenth:13
category:12901,lenth:4
category:12902,lenth:129
category:12909,lenth:151
category:12911,lenth:10
category:12912,lenth:79
category:12916,lenth:1059
category:12918,lenth:5
category:12919,lenth:21
category:12924,lenth:1
category:12926,lenth:15
category:12927,lenth:12
category:12930,lenth:19
category:12931,lenth:1
category:12933,lenth:6
category:12934,lenth:17
category:12935,lenth:4
category:12937,lenth:36
category:12938,lenth:3
category:12939,lenth:296
category:12941,lenth:44
category:12943,lenth:11
category:12948,lenth:67
category:12951,lenth:71
category:12952,lenth:274
category:12954,lenth:88
category:12959,lenth:1
category:12960,lenth:24
category:12962,lenth:165
category:12964,lenth:2
category:12965,lenth:1
category:12966,lenth:1
category:12967,lenth:3
category:12968,lenth:2
category:12969,lenth:360
category:12971,lenth:9
category:12972,lenth:8
c

category:13761,lenth:71
category:13764,lenth:1
category:13768,lenth:106
category:13769,lenth:2
category:13773,lenth:140
category:13775,lenth:4
category:13776,lenth:42
category:13777,lenth:19
category:13781,lenth:2
category:13783,lenth:2
category:13787,lenth:3
category:13788,lenth:128
category:13789,lenth:24
category:13793,lenth:96
category:13794,lenth:7
category:13795,lenth:143
category:13796,lenth:188
category:13801,lenth:11
category:13803,lenth:157
category:13809,lenth:3
category:13813,lenth:1
category:13814,lenth:3
category:13816,lenth:23
category:13817,lenth:18
category:13818,lenth:5
category:13819,lenth:3
category:13820,lenth:1
category:13821,lenth:138
category:13823,lenth:154
category:13824,lenth:2
category:13825,lenth:18
category:13826,lenth:252
category:13829,lenth:1
category:13833,lenth:12
category:13834,lenth:88
category:13837,lenth:1
category:13839,lenth:547
category:13841,lenth:984
category:13843,lenth:23
category:13845,lenth:23
category:13850,lenth:20
category:13851,lenth:

category:14460,lenth:6
category:14461,lenth:5
category:14463,lenth:3
category:14464,lenth:5
category:14468,lenth:182
category:14471,lenth:7
category:14479,lenth:9
category:14484,lenth:21
category:14485,lenth:8
category:14487,lenth:11
category:14488,lenth:3
category:14490,lenth:2
category:14491,lenth:1
category:14492,lenth:20
category:14498,lenth:195
category:14501,lenth:9
category:14505,lenth:24
category:14507,lenth:236
category:14508,lenth:2
category:14509,lenth:657
category:14511,lenth:1
category:14512,lenth:380
category:14513,lenth:121
category:14516,lenth:98
category:14517,lenth:155
category:14518,lenth:14
category:14521,lenth:2
category:14522,lenth:1
category:14523,lenth:1553
category:14524,lenth:148
category:14525,lenth:4
category:14527,lenth:6
category:14528,lenth:5
category:14530,lenth:61
category:14531,lenth:1
category:14534,lenth:149
category:14536,lenth:10
category:14537,lenth:7
category:14538,lenth:2
category:14542,lenth:6
category:14544,lenth:6
category:14548,lenth:35
cate

category:15339,lenth:2
category:15341,lenth:48
category:15345,lenth:109
category:15348,lenth:122
category:15349,lenth:35
category:15351,lenth:235
category:15352,lenth:4
category:15355,lenth:7
category:15356,lenth:6
category:15357,lenth:122
category:15358,lenth:55
category:15360,lenth:48
category:15361,lenth:2
category:15363,lenth:2
category:15364,lenth:9
category:15365,lenth:1575
category:15372,lenth:3
category:15378,lenth:32
category:15383,lenth:61
category:15386,lenth:2
category:15389,lenth:74
category:15394,lenth:1
category:15396,lenth:7
category:15397,lenth:3


In [39]:
item_rank = pd.concat(item_rank, sort=False)

In [None]:
item_rank.to_csv('item_rank.csv',index=False)

In [None]:
def unique_count(x):
    return len(set(x))

In [None]:
cat1 = item.groupby('category',as_index=False).agg({'itemID': unique_count}).rename(columns={'itemID':'itemnum_undercat'})

In [None]:
cat2 = item.groupby('category',as_index=False).agg({'brand': unique_count}).rename(columns={'brand':'brandnum_undercat'})

In [None]:
cat3 = item.groupby('category',as_index=False).agg({'shop': unique_count}).rename(columns={'shop':'shopnum_undercat'})

In [None]:
pd.concat([cat1, cat2[['brandnum_undercat']], cat3[['shopnum_undercat']]], axis=1).to_csv('category_lower.csv',index=False)