# 特征工程

In [191]:
import featuretools as ft
import pandas as pd
import datetime
import logging

In [192]:
# logger = logging.getLogger('ai')
# logging.basicConfig(level=logging.DEBUG,
#                     format='%(asctime)s  %(filename)s : %(levelname)s  %(message)s')

In [193]:
offline_df = pd.read_csv('../source/ccf_offline_stage1_train.csv', parse_dates = ['Date_received', 'Date'])
online_df = pd.read_csv('../source/ccf_online_stage1_train.csv', parse_dates = ['Date_received', 'Date'])

In [194]:
pred_df = pd.read_csv('../source/ccf_offline_stage1_test_revised.csv', parse_dates = ['Date_received'])

## 数据集划分

In [195]:
# 交叉训练集一：收到券的日期大于4月16日和小于5月15日
time_range = [datetime.datetime(2016, 5, 16), datetime.datetime(2016, 6, 15)]

# 交叉训练集一特征offline：线下数据中领券和用券日期大于1月1日和小于4月15日
time_range_date_received = [datetime.datetime(2016, 3, 16), datetime.datetime(2016, 6, 30)]
time_range_date = [datetime.datetime(2016, 3, 16), datetime.datetime(2016, 6, 30)]

FILENAME = 'dataset_pred'

is_pred = True

In [196]:
def _split(row, time_range_date_received, time_range_date):
    if ((row.Date >= time_range_date[0]) & (row.Date <= time_range_date[1])) | ((row.Coupon_id == 0) & (row.Date_received >= time_range_date_received[0]) & (row.Date_received <= time_range_date_received[1])):
        return row
    
def dataset_fetch(time_range):
    dataset = offline_df[(offline_df.Date_received >= time_range[0]) & (offline_df.Date_received <= time_range[1])].copy()
    return dataset
    
def dataset_split(time_range_date_received, time_range_date):
    feature_offline = offline_df.loc[
        ((offline_df.Date >= time_range_date[0]) & (offline_df.Date <= time_range_date[1])) | 
        ((offline_df.Coupon_id == 0) & (offline_df.Date_received >= time_range_date_received[0]) & (offline_df.Date_received <= time_range_date_received[1]))]
    feature_online = online_df.loc[
        ((online_df.Date >= time_range_date[0]) & (online_df.Date <= time_range_date[1])) | 
        ((online_df.Coupon_id == 0) & (online_df.Date_received >= time_range_date_received[0]) & (online_df.Date_received <= time_range_date_received[1]))]
    
    return feature_offline, feature_online

if is_pred:
    dataset_alpha = pred_df
else:
    dataset_alpha = dataset_fetch(time_range)

feature_alpha_offline, feature_alpha_online = dataset_split(time_range_date_received, time_range_date)

## offline特征的抽取

In [197]:
def merge(df, features, key, prefix):
    features.columns = [ prefix + ':' + str(col) if str(col) != key else str(col) for col in features.columns]
    return pd.merge(df, features, on=key, how='left')

def merge_by_keys(df, features, keys, prefix):
    features.columns = [ prefix + ':' + str(col) if str(col) not in keys else str(col) for col in features.columns]
    return pd.merge(df, features, on=keys, how='left')

In [198]:
def create_entity(name, user, coupon, merchant, purchase):
    es = ft.EntitySet(id = name)
    es = es.entity_from_dataframe(entity_id = 'coupon', dataframe = coupon, index='Coupon_id', variable_types = {'Coupon_id': ft.variable_types.Index,'Merchant_id': ft.variable_types.Id})

    purchase_variable_types = {'Purchase_id': ft.variable_types.Index,'User_id': ft.variable_types.Id,'Coupon_id': ft.variable_types.Id,}
    es = es.entity_from_dataframe(entity_id = 'purchase', dataframe = purchase, index = 'Purchase_id', variable_types = purchase_variable_types)

    es = es.entity_from_dataframe(entity_id = 'user', dataframe = user, index='User_id',variable_types = {'User_id': ft.variable_types.Index})
    es = es.entity_from_dataframe(entity_id = 'merchant', dataframe = merchant, index='Merchant_id', variable_types = {'Merchant_id': ft.variable_types.Index})
    
    r_user_purchase = ft.Relationship(es['user']['User_id'], es['purchase']['User_id'])
    es = es.add_relationship(r_user_purchase)

    r_purchase_coupon = ft.Relationship(es['coupon']['Coupon_id'], es['purchase']['Coupon_id'])
    es = es.add_relationship(r_purchase_coupon)

    r_merchant_coupon = ft.Relationship(es['merchant']['Merchant_id'], es['coupon']['Merchant_id'])
    es = es.add_relationship(r_merchant_coupon)
    
    return es

In [199]:
def dfs(entity_name, es_raw, agg_primitives_fn=[], trans_primitives_fn=[]):
    return ft.dfs(entityset = es_raw, n_jobs = 4, target_entity = entity_name, max_depth=2, agg_primitives=agg_primitives_fn, trans_primitives=trans_primitives_fn)

In [200]:
def cal(row):
    if isinstance(row.Discount_rate, str) and row.Discount_rate == 'fixed':
        row.Coupon_type = 2
        return row
    
    if isinstance(row.Discount_rate, float):
        coupon.Discount = row.Discount_rate
        return row
    
    arr = row.Discount_rate.split(':')
    if len(arr) == 2:
        row.Discount =  (float(arr[0]) - float(arr[1])) / float(arr[0])
        row.Coupon_type = 1
        row.Base_consume = float(arr[0])
        row.Discount_money = float(arr[1])
    else:
        row.Discount = float(row.Discount_rate)
            
    return row

In [201]:
def extract(df):
    purchase = df.copy()
    purchase = purchase[purchase['Coupon_id'] > 0]

    purchase['Duration'] = pd.to_datetime(purchase['Date'] - purchase['Date_received'])

    purchase = purchase.reset_index()
    purchase = purchase.rename(index=str, columns={"index": "Purchase_id"})

    user = purchase[['User_id']].drop_duplicates()
    merchant = purchase[['Merchant_id']].drop_duplicates()
    coupon = purchase[['Coupon_id', 'Merchant_id', 'Discount_rate']].drop_duplicates()
    purchase = purchase[['User_id', 'Coupon_id','Date_received', 'Date', 'Duration', 'Distance']]

    es = create_entity('o2o', user, coupon, merchant, purchase)

    features, _ = dfs('purchase', es, trans_primitives_fn=['day', 'week', 'month', 'days'])
    purchase = pd.merge(purchase, features[['DAY(Date_received)', 'WEEK(Date_received)', 'MONTH(Date_received)', 'DAY(Duration)']], on='Purchase_id', how='left')
    purchase = purchase.drop(['Duration'], axis=1)
    
    # 不做fillna的动作是为了后面可以方便的找到max和min
    # purchase = purchase.fillna(0)
    
    purchase['DAY(Duration)'] = purchase['DAY(Duration)'] - 1

    # 用datetime相减去获得数值会是17
    purchase['Label'] = purchase.apply(lambda row: 1 if row['DAY(Duration)']< 16 else 0, axis=1)
    purchase['Is_used_coupon'] = purchase.apply(lambda row: 1 if row['DAY(Duration)']> 0 else 0, axis=1)

    coupon['Base_consume'] = 0.0
    coupon['Discount'] = 0.0
    coupon['Discount_money'] = 0.0
    coupon['Coupon_type'] = 0

    coupon = coupon.apply(lambda row: cal(row), axis=1)
    
    # Discount_rate 做保留，后续用来计算类型
    # coupon = coupon.drop(['Discount_rate'], axis=1)

    return purchase, coupon, merchant, user

In [202]:
# extract basic features
purchase, coupon, merchant, user = extract(feature_alpha_offline)



Exception ignored in: <generator object Scheduler.add_client at 0x12a08f390>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [203]:
coupon_saw = coupon.copy()

### 抽取用户特征

In [204]:
u = user.copy()

In [205]:
coupon = coupon[['Coupon_id', 'Merchant_id', 'Base_consume', 'Discount', 'Discount_money', 'Coupon_type']]
purchase = purchase[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received', 'Date','Distance', 'DAY(Duration)', 'Label', 'Is_used_coupon']]

#### 用户消费属性

In [206]:
purchase_r = purchase[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received', 'Distance']]
coupon_r = coupon[['Coupon_id', 'Merchant_id', 'Discount']]

es = create_entity('o2o_user_base', user, coupon_r, merchant, purchase_r)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count', 'avg_time_between'])

features.columns = [ 'Receive:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
u = pd.merge(u, features, on='User_id', how='left')

tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [207]:
u['Receive:AVG(Date_received)'] = u['Receive:COUNT(purchase)'] / (purchase_r.Date_received.max() - purchase_r.Date_received.min()).days
u['Receive:AVG(Coupon_id)'] = u['Receive:COUNT(purchase)'] / u['Receive:NUM_UNIQUE(purchase.Coupon_id)']
u['Receive:AVG(Merchant_id)'] = u['Receive:COUNT(purchase)'] / u['Receive:NUM_UNIQUE(purchase.coupon.Merchant_id)']
u['Receive:AVG(15Days)'] = u['Receive:COUNT(purchase)'] / 15

#### 用户消费优惠券特征

In [208]:
purchase_c = purchase[purchase['Is_used_coupon'] == 1]
purchase_c = purchase_c[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received', 'Date', 'DAY(Duration)', 'Distance']]
coupon_c = coupon[['Coupon_id', 'Merchant_id', 'Discount']]

es = create_entity('o2o_user_base', user, coupon_c, merchant, purchase_c)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])

features.columns = [ 'Consume:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
u = pd.merge(u, features, on='User_id', how='left')

Exception ignored in: <generator object Scheduler.add_client at 0x13c9347c8>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [209]:
u['Consume:AVG(Date_received)'] = u['Consume:COUNT(purchase)'] / (purchase_c.Date_received.max() - purchase_c.Date_received.min()).days
u['Consume:AVG(Date)'] = u['Consume:COUNT(purchase)'] / (purchase_c.Date.max() - purchase_c.Date.min()).days
u['Consume:AVG(Coupon_id)'] = u['Consume:COUNT(purchase)'] / u['Consume:NUM_UNIQUE(purchase.Coupon_id)']
u['Consume:AVG(Merchant_id)'] = u['Consume:COUNT(purchase)'] / u['Consume:NUM_UNIQUE(purchase.coupon.Merchant_id)']
u['Consume:AVG(15Days)'] = u['Consume:COUNT(purchase)'] / 15
u['Consume:COUNT(no_coupon)'] = u['Receive:COUNT(purchase)'] - u['Consume:COUNT(purchase)']

u['Consume:RATE(purchase)'] = u['Consume:COUNT(purchase)'] / u['Receive:COUNT(purchase)']
u['Consume:RATE(no_coupon)'] = u['Consume:COUNT(no_coupon)'] / u['Receive:COUNT(purchase)']
u['Consume:OCC(purchase)'] = u['Consume:COUNT(purchase)'] / u['Consume:COUNT(no_coupon)']

In [210]:
coupon_c_f = coupon[coupon['Coupon_type'] == 1]
coupon_c_f = coupon[['Coupon_id', 'Merchant_id', 'Base_consume', 'Discount', 'Discount_money']]

es = create_entity('o2o_user_base', user, coupon_c_f, merchant, purchase_c)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'count'])

features.columns = [ 'ConsumeCtf:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
u = pd.merge(u, features, on='User_id', how='left')

Exception ignored in: <generator object Scheduler.add_client at 0x14532bb10>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [211]:
u['ConsumeCtd:COUNT(purchase)'] = u['Consume:COUNT(purchase)']  - u['ConsumeCtf:COUNT(purchase)']
u['ConsumeCtd:RATE(purchase)'] = u['ConsumeCtd:COUNT(purchase)']  / u['Consume:COUNT(purchase)']
u['ConsumeCtf:RATE(purchase)'] = u['ConsumeCtf:COUNT(purchase)']  / u['Consume:COUNT(purchase)']

u['ConsumeCt:OCC(purchase)'] = u['ConsumeCtf:COUNT(purchase)'] / u['ConsumeCtd:COUNT(purchase)']

#### 15天内用户消费优惠券特征

In [212]:
purchase_co = purchase[purchase['Label'] == 1]
purchase_co = purchase_co[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received', 'Date', 'DAY(Duration)', 'Distance']]
coupon_co = coupon[['Coupon_id', 'Merchant_id', 'Discount']]

es = create_entity('o2o_user_base', user, coupon_co, merchant, purchase_co)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])

features.columns = [ 'Use:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
u = pd.merge(u, features, on='User_id', how='left')

Exception ignored in: <generator object Scheduler.add_client at 0x14aa022a0>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [213]:
u['Use:AVG(Date_received)'] = u['Use:COUNT(purchase)'] / (purchase_co.Date_received.max() - purchase_co.Date_received.min()).days
u['Use:AVG(Date)'] = u['Use:COUNT(purchase)'] / (purchase_co.Date.max() - purchase_co.Date.min()).days
u['Use:AVG(Coupon_id)'] = u['Use:COUNT(purchase)'] / u['Use:NUM_UNIQUE(purchase.Coupon_id)']
u['Use:AVG(Merchant_id)'] = u['Use:COUNT(purchase)'] / u['Use:NUM_UNIQUE(purchase.coupon.Merchant_id)']
u['Use:AVG(15Days)'] = u['Use:COUNT(purchase)'] / 15
u['Use:COUNT(no_intime)'] = u['Consume:COUNT(purchase)'] - u['Use:COUNT(purchase)']

u['Use:RATE(purchase)'] = u['Use:COUNT(purchase)'] / u['Receive:COUNT(purchase)']
u['Use:RATE(purchase_4_consume)'] = u['Use:COUNT(purchase)'] / u['Consume:COUNT(purchase)']
u['Use:RATE(no_intime)'] = u['Use:COUNT(no_intime)'] / u['Receive:COUNT(purchase)']
u['Use:RATE(no_intime_4_consume)'] = u['Use:COUNT(no_intime)'] / u['Consume:COUNT(purchase)']

u['Use:OCC(purchase)'] = u['Use:COUNT(purchase)'] / u['Use:COUNT(no_intime)']

In [214]:
es = create_entity('o2o_user_base', user, coupon_c_f, merchant, purchase_co)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'count'])

features.columns = [ 'UseCtf:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
u = pd.merge(u, features, on='User_id', how='left')

tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [215]:
u['UseCtd:COUNT(purchase)'] = u['Use:COUNT(purchase)']  - u['UseCtf:COUNT(purchase)']
u['UseCtd:RATE(purchase)'] = u['UseCtd:COUNT(purchase)']  / u['Use:COUNT(purchase)']
u['UseCtf:RATE(purchase)'] = u['UseCtf:COUNT(purchase)']  / u['Use:COUNT(purchase)']

u['UseCt:OCC(purchase)'] = u['UseCtf:COUNT(purchase)'] / u['UseCtd:COUNT(purchase)']

#### 用户特征总结

In [216]:
u.columns = [ 'User' + ':' + str(col) if str(col) not in ['User_id'] else str(col) for col in u.columns]

In [217]:
i = pd.DataFrame()
i['column_name'] = u.columns.values
i['dtype'] = u.dtypes.values
i

Unnamed: 0,column_name,dtype
0,User_id,int64
1,User:Receive:MIN(purchase.Distance),float64
2,User:Receive:MAX(purchase.Distance),float64
3,User:Receive:MEAN(purchase.Distance),float64
4,User:Receive:NUM_UNIQUE(purchase.Coupon_id),int64
5,User:Receive:COUNT(purchase),int64
6,User:Receive:MIN(purchase.coupon.Discount),float64
7,User:Receive:MAX(purchase.coupon.Discount),float64
8,User:Receive:MEAN(purchase.coupon.Discount),float64
9,User:Receive:NUM_UNIQUE(purchase.coupon.Mercha...,int64


### 抽取商户特征

In [218]:
m = merchant.copy()

#### 商户消费属性

In [219]:
es = create_entity('o2o_merchant_base', user, coupon_r, merchant, purchase_r)
features, _ = dfs('merchant', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
m = merge(m, features, 'Merchant_id', 'Receive')

Exception ignored in: <generator object Scheduler.add_client at 0x13878c930>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [220]:
# 该商户每隔多少时间被领取一张优惠券
m['Receive:AVG(Date_received)'] = m['Receive:COUNT(purchase)'] / (purchase_r.Date_received.max() - purchase_r.Date_received.min()).days
# 平均每张优惠券被消费几次
m['Receive:AVG(Coupon)'] = m['Receive:COUNT(purchase)'] / m['Receive:COUNT(coupon)']

m['Receive:AVG(User_id)'] = m['Receive:COUNT(purchase)'] / m['Receive:NUM_UNIQUE(purchase.User_id)']
m['Receive:AVG(15Days)'] = m['Receive:COUNT(purchase)'] / 15

####  商户被消费优惠券特征

In [221]:
es = create_entity('o2o_merchant_base', user, coupon_c, merchant, purchase_c)
features, _ = dfs('merchant', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
m = merge(m, features, 'Merchant_id', 'Consume')

tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [222]:
m['Consume:AVG(Date_received)'] = m['Consume:COUNT(purchase)'] / (purchase_c.Date_received.max() - purchase_c.Date_received.min()).days
m['Consume:AVG(Date)'] = m['Consume:COUNT(purchase)'] / (purchase_c.Date.max() - purchase_c.Date.min()).days
m['Consume:AVG(Coupon)'] = m['Consume:COUNT(purchase)'] / m['Consume:COUNT(coupon)']
m['Consume:AVG(User_id)'] = m['Consume:COUNT(purchase)'] / m['Consume:NUM_UNIQUE(purchase.User_id)']
m['Consume:AVG(15Days)'] = m['Consume:COUNT(purchase)'] / 15
m['Consume:COUNT(no_coupon)'] = m['Receive:COUNT(purchase)'] - m['Consume:COUNT(purchase)']
m['Consume:RATE(purchase)'] = m['Consume:COUNT(purchase)'] / m['Receive:COUNT(purchase)']
m['Consume:RATE(no_coupon)'] = m['Consume:COUNT(no_coupon)'] / m['Receive:COUNT(purchase)']
m['Consume:OCC(purchase)'] = m['Consume:COUNT(purchase)'] / m['Consume:COUNT(no_coupon)']

In [223]:
es = create_entity('o2o_merchant_base', user, coupon_c_f, merchant, purchase_c)
features, _ = dfs('merchant', es, agg_primitives_fn = ['min', 'max', 'mean', 'count'])
m = merge(m, features, 'Merchant_id', 'ConsumeCtf')

Future exception was never retrieved
future: <Future finished exception=StreamClosedError('Stream is closed')>
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/tcpclient.py", line 112, in on_connect_done
    stream = future.result()
tornado.iostream.StreamClosedError: Stream is closed
Exception ignored in: <generator object Scheduler.add_client at 0x139da36d8>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [224]:
m['ConsumeCtd:COUNT(purchase)'] = m['Consume:COUNT(purchase)']  - m['ConsumeCtf:COUNT(purchase)']
m['ConsumeCtd:RATE(purchase)'] = m['ConsumeCtd:COUNT(purchase)']  / m['Consume:COUNT(purchase)']
m['ConsumeCtf:RATE(purchase)'] = m['ConsumeCtf:COUNT(purchase)']  / m['Consume:COUNT(purchase)']

m['ConsumeCt:OCC(purchase)'] = m['ConsumeCtf:COUNT(purchase)'] / m['ConsumeCtd:COUNT(purchase)']

#### 15天内商户被消费优惠券特征

In [225]:
es = create_entity('o2o_merchant_base', user, coupon_co, merchant, purchase_co)
features, _ = dfs('merchant', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
m = merge(m, features, 'Merchant_id', 'Use')

Exception ignored in: <generator object Scheduler.add_client at 0x149b9b8b8>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [226]:
m['Use:AVG(Date_received)'] = m['Use:COUNT(purchase)'] / (purchase_co.Date_received.max() - purchase_co.Date_received.min()).days
m['Use:AVG(Date)'] = m['Use:COUNT(purchase)'] / (purchase_co.Date.max() - purchase_co.Date.min()).days
m['Use:AVG(coupon)'] = m['Use:COUNT(purchase)'] / m['Use:COUNT(coupon)']
m['Use:AVG(User_id)'] = m['Use:COUNT(purchase)'] / m['Use:NUM_UNIQUE(purchase.User_id)']
m['Use:AVG(15Days)'] = m['Use:COUNT(purchase)'] / 15
m['Use:COUNT(no_intime)'] = m['Consume:COUNT(purchase)'] - m['Use:COUNT(purchase)']

m['Use:RATE(purchase)'] = m['Use:COUNT(purchase)'] / m['Receive:COUNT(purchase)']
m['Use:RATE(purchase_4_consume)'] = m['Use:COUNT(purchase)'] / m['Consume:COUNT(purchase)']
m['Use:RATE(no_intime)'] = m['Use:COUNT(no_intime)'] / m['Receive:COUNT(purchase)']
m['Use:RATE(no_intime_4_consume)'] = m['Use:COUNT(no_intime)'] / m['Consume:COUNT(purchase)']

m['Use:OCC(purchase)'] = m['Use:COUNT(purchase)'] / m['Use:COUNT(no_intime)']

In [227]:
es = create_entity('o2o_merchant_base', user, coupon_c_f, merchant, purchase_co)
features, _ = dfs('merchant', es, agg_primitives_fn = ['min', 'max', 'mean', 'count'])
m = merge(m, features, 'Merchant_id', 'UseCtf')

Exception ignored in: <generator object Scheduler.add_client at 0x12dc4bb10>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [228]:
m['UseCtd:COUNT(purchase)'] = m['Use:COUNT(purchase)']  - m['UseCtf:COUNT(purchase)']
m['UseCtd:RATE(purchase)'] = m['UseCtd:COUNT(purchase)']  / m['Use:COUNT(purchase)']
m['UseCtf:RATE(purchase)'] = m['UseCtf:COUNT(purchase)']  / m['Use:COUNT(purchase)']

m['UseCt:OCC(purchase)'] = m['UseCtf:COUNT(purchase)'] / m['UseCtd:COUNT(purchase)']

#### 商户特征总结

In [229]:
m.columns = [ 'Merchant' + ':' + str(col) if str(col) not in ['Merchant_id'] else str(col) for col in m.columns]

In [230]:
i = pd.DataFrame()
i['column_name'] = m.columns.values
i['dtype'] = m.dtypes.values
i

Unnamed: 0,column_name,dtype
0,Merchant_id,int64
1,Merchant:Receive:MIN(coupon.Discount),float64
2,Merchant:Receive:MAX(coupon.Discount),float64
3,Merchant:Receive:MEAN(coupon.Discount),float64
4,Merchant:Receive:COUNT(coupon),int64
5,Merchant:Receive:MIN(purchase.Distance),float64
6,Merchant:Receive:MAX(purchase.Distance),float64
7,Merchant:Receive:MEAN(purchase.Distance),float64
8,Merchant:Receive:NUM_UNIQUE(purchase.User_id),int64
9,Merchant:Receive:COUNT(purchase),int64


### 抽取优惠券特征

In [231]:
c = coupon.copy()

#### 优惠券消费属性

In [232]:
es = create_entity('o2o_coupon_base', user, coupon_r, merchant, purchase_r)
features, _ = dfs('coupon', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop(['Discount', 'Merchant_id'], axis=1)
c = merge(c, features, 'Coupon_id', 'Receive')

Exception ignored in: <generator object Scheduler.add_client at 0x12d8a8408>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [233]:
# 该商户每隔多少时间被领取一张优惠券
c['Receive:AVG(Date_received)'] = c['Receive:COUNT(purchase)'] / (purchase_r.Date_received.max() - purchase_r.Date_received.min()).days
c['Receive:AVG(User_id)'] = c['Receive:COUNT(purchase)'] / c['Receive:NUM_UNIQUE(purchase.User_id)']
c['Receive:AVG(15Days)'] = c['Receive:COUNT(purchase)'] / 15

#### 优惠券被用户消费特征

In [234]:
es = create_entity('o2o_coupon_base', user, coupon_c, merchant, purchase_c)
features, _ = dfs('coupon', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop(['Discount', 'Merchant_id'], axis=1)
c = merge(c, features, 'Coupon_id', 'Consume')

tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [235]:
c['Consume:AVG(Date_received)'] = c['Consume:COUNT(purchase)'] / (purchase_c.Date_received.max() - purchase_c.Date_received.min()).days
c['Consume:AVG(Date)'] = c['Consume:COUNT(purchase)'] / (purchase_c.Date.max() - purchase_c.Date.min()).days
c['Consume:AVG(User_id)'] = c['Consume:COUNT(purchase)'] / c['Consume:NUM_UNIQUE(purchase.User_id)']
c['Consume:AVG(15Days)'] = c['Consume:COUNT(purchase)'] / 15
c['Consume:COUNT(no_coupon)'] = c['Receive:COUNT(purchase)'] - c['Consume:COUNT(purchase)']
c['Consume:RATE(purchase)'] = c['Consume:COUNT(purchase)'] / c['Receive:COUNT(purchase)']
c['Consume:RATE(no_coupon)'] = c['Consume:COUNT(no_coupon)'] / c['Receive:COUNT(purchase)']
c['Consume:OCC(purchase)'] = c['Consume:COUNT(purchase)'] / c['Consume:COUNT(no_coupon)']

#### 15天内商户被消费优惠券特征

In [236]:
es = create_entity('o2o_coupon_base', user, coupon_co, merchant, purchase_co)
features, _ = dfs('coupon', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop(['Discount', 'Merchant_id'], axis=1)
c = merge(c, features, 'Coupon_id', 'Use')

Exception ignored in: <generator object Scheduler.add_client at 0x129f3ccf0>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [237]:
c['Use:AVG(Date_received)'] = c['Use:COUNT(purchase)'] / (purchase_co.Date_received.max() - purchase_co.Date_received.min()).days
c['Use:AVG(Date)'] = c['Use:COUNT(purchase)'] / (purchase_co.Date.max() - purchase_co.Date.min()).days
c['Use:AVG(User_id)'] = c['Use:COUNT(purchase)'] / c['Use:NUM_UNIQUE(purchase.User_id)']
c['Use:AVG(15Days)'] = c['Use:COUNT(purchase)'] / 15
c['Use:COUNT(no_intime)'] = c['Consume:COUNT(purchase)'] - c['Use:COUNT(purchase)']

c['Use:RATE(purchase)'] = c['Use:COUNT(purchase)'] / c['Receive:COUNT(purchase)']
c['Use:RATE(purchase_4_consume)'] = c['Use:COUNT(purchase)'] / c['Consume:COUNT(purchase)']
c['Use:RATE(no_intime)'] = c['Use:COUNT(no_intime)'] / c['Receive:COUNT(purchase)']
c['Use:RATE(no_intime_4_consume)'] = c['Use:COUNT(no_intime)'] / c['Consume:COUNT(purchase)']

c['Use:OCC(purchase)'] = c['Use:COUNT(purchase)'] / c['Use:COUNT(no_intime)']

#### 优惠券特征总结

In [238]:
c.columns = [ 'Coupon' + ':' + str(col) if str(col) not in ['Coupon_id'] else str(col) for col in c.columns]

In [239]:
i = pd.DataFrame()
i['column_name'] = c.columns.values
i['dtype'] = c.dtypes.values
i

Unnamed: 0,column_name,dtype
0,Coupon_id,float64
1,Coupon:Merchant_id,int64
2,Coupon:Base_consume,float64
3,Coupon:Discount,float64
4,Coupon:Discount_money,float64
5,Coupon:Coupon_type,int64
6,Coupon:Receive:MIN(purchase.Distance),float64
7,Coupon:Receive:MAX(purchase.Distance),float64
8,Coupon:Receive:MEAN(purchase.Distance),float64
9,Coupon:Receive:NUM_UNIQUE(purchase.User_id),int64


### 抽取用户-商户特征

In [240]:
def create_um_entity(name, coupon_um, puchase_um, um):
    es = ft.EntitySet(id = name)
    es = es.entity_from_dataframe(entity_id = 'coupon', dataframe = coupon_um, index='Coupon_id', variable_types = {'Coupon_id': ft.variable_types.Index})

    purchase_variable_types = {'Purchase_id': ft.variable_types.Index,'Um_id': ft.variable_types.Id,'Coupon_id': ft.variable_types.Id,}
    es = es.entity_from_dataframe(entity_id = 'purchase', dataframe = puchase_um, index = 'Purchase_id', variable_types = purchase_variable_types)

    es = es.entity_from_dataframe(entity_id = 'um', dataframe = um, index='Um_id',variable_types = {'Um_id': ft.variable_types.Index})

    r_user_purchase = ft.Relationship(es['um']['Um_id'], es['purchase']['Um_id'])
    es = es.add_relationship(r_user_purchase)

    r_purchase_coupon = ft.Relationship(es['coupon']['Coupon_id'], es['purchase']['Coupon_id'])
    es = es.add_relationship(r_purchase_coupon)
    
    return es

In [241]:
puchase_um = pd.merge(purchase, coupon, on='Coupon_id', how='left')
um = puchase_um[['User_id', 'Merchant_id']].drop_duplicates()
um = um.reset_index()
um = um.rename(index=str, columns={
    'index': 'Um_id'
})

puchase_um = pd.merge(puchase_um, um, on=['User_id', 'Merchant_id'], how='left')
coupon_um = coupon.drop(['Merchant_id'], axis=1)

In [242]:
u_m = um.copy().drop(['Um_id'], axis=1)

#### 抽取用户-商户消费特征

In [243]:
purchase_um_r = puchase_um[['Um_id', 'Coupon_id', 'Date_received', 'DAY(Duration)']]
coupon_um_r = coupon_um[['Coupon_id', 'Discount', 'Discount_money','Base_consume']]

es = create_um_entity('o2o_user_merchant', coupon_um_r, purchase_um_r, um)
features, _ = dfs('um', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
u_m = merge_by_keys(u_m, features, ['User_id', 'Merchant_id'], 'Receive')



Exception ignored in: <generator object Scheduler.add_client at 0x1299592a0>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [244]:
# 该商户每隔多少时间被领取一张优惠券
u_m['Receive:AVG(Date_received)'] = u_m['Receive:COUNT(purchase)'] / (purchase_um_r.Date_received.max() - purchase_um_r.Date_received.min()).days
# 平均每张优惠券被消费几次
u_m['Receive:AVG(Coupon)'] = u_m['Receive:COUNT(purchase)'] / u_m['Receive:NUM_UNIQUE(purchase.Coupon_id)']
u_m['Receive:AVG(15Days)'] = u_m['Receive:COUNT(purchase)'] / 15

####  用户-商户消费优惠券特征

In [245]:
puchase_um_c = puchase_um[puchase_um['Is_used_coupon'] == 1]
puchase_um_c = puchase_um_c[['Um_id', 'Coupon_id', 'Date_received', 'DAY(Duration)', 'Date']]

es = create_um_entity('o2o_user_merchant', coupon_um_r, puchase_um_c, um)
features, _ = dfs('um', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
u_m = merge_by_keys(u_m, features, ['User_id', 'Merchant_id'], 'Consume')



Exception ignored in: <generator object Scheduler.add_client at 0x12dfbc138>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [246]:
u_m['Consume:AVG(Date_received)'] = u_m['Consume:COUNT(purchase)'] / (puchase_um_c.Date_received.max() - puchase_um_c.Date_received.min()).days
u_m['Consume:AVG(Date)'] = u_m['Consume:COUNT(purchase)'] / (puchase_um_c.Date.max() - puchase_um_c.Date.min()).days
u_m['Consume:AVG(Coupon_id)'] = u_m['Consume:COUNT(purchase)'] / u_m['Consume:NUM_UNIQUE(purchase.Coupon_id)']
u_m['Consume:AVG(15Days)'] = u_m['Consume:COUNT(purchase)'] / 15
u_m['Consume:COUNT(no_coupon)'] = u_m['Receive:COUNT(purchase)'] - u_m['Consume:COUNT(purchase)']
u_m['Consume:RATE(purchase)'] = u_m['Consume:COUNT(purchase)'] / u_m['Receive:COUNT(purchase)']
u_m['Consume:RATE(no_coupon)'] = u_m['Consume:COUNT(no_coupon)'] / u_m['Receive:COUNT(purchase)']
u_m['Consume:OCC(purchase)'] = u_m['Consume:COUNT(purchase)'] / u_m['Consume:COUNT(no_coupon)']

#### 15天内用户-商户消费优惠券特征

In [247]:
puchase_um_co = puchase_um[puchase_um['Label'] == 1]
puchase_um_co = puchase_um_c[['Um_id', 'Coupon_id', 'Date_received', 'DAY(Duration)', 'Date']]

es = create_um_entity('o2o_user_merchant', coupon_um_r, puchase_um_co, um)
features, _ = dfs('um', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
u_m = merge_by_keys(u_m, features, ['User_id', 'Merchant_id'], 'Use')



Exception ignored in: <generator object Scheduler.add_client at 0x1376a5ed0>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [248]:
u_m['Use:AVG(Date_received)'] = u_m['Use:COUNT(purchase)'] / (puchase_um_co.Date_received.max() - puchase_um_co.Date_received.min()).days
u_m['Use:AVG(Date)'] = u_m['Use:COUNT(purchase)'] / (puchase_um_co.Date.max() - puchase_um_co.Date.min()).days
u_m['Use:AVG(Coupon_id)'] = u_m['Use:COUNT(purchase)'] / u_m['Use:NUM_UNIQUE(purchase.Coupon_id)']
u_m['Use:AVG(15Days)'] = u_m['Use:COUNT(purchase)'] / 15
u_m['Use:COUNT(no_intime)'] = u_m['Consume:COUNT(purchase)'] - u_m['Use:COUNT(purchase)']

u_m['Use:RATE(purchase)'] = u_m['Use:COUNT(purchase)'] / u_m['Receive:COUNT(purchase)']
u_m['Use:RATE(purchase_4_consume)'] = u_m['Use:COUNT(purchase)'] / u_m['Consume:COUNT(purchase)']
u_m['Use:RATE(no_intime)'] = u_m['Use:COUNT(no_intime)'] / u_m['Receive:COUNT(purchase)']
u_m['Use:RATE(no_intime_4_consume)'] = u_m['Use:COUNT(no_intime)'] / u_m['Consume:COUNT(purchase)']

u_m['Use:OCC(purchase)'] = u_m['Use:COUNT(purchase)'] / u_m['Use:COUNT(no_intime)']

#### 用户-商户特征总结

In [249]:
u_m.columns = [ 'User_Merchant' + ':' + str(col) if str(col) not in ['User_id', 'Merchant_id'] else str(col) for col in u_m.columns]

In [250]:
i = pd.DataFrame()
i['column_name'] = u_m.columns.values
i['dtype'] = u_m.dtypes.values
i

Unnamed: 0,column_name,dtype
0,User_id,int64
1,Merchant_id,int64
2,User_Merchant:Receive:MIN(purchase.DAY(Duration)),int64
3,User_Merchant:Receive:MAX(purchase.DAY(Duration)),int64
4,User_Merchant:Receive:MEAN(purchase.DAY(Durati...,float64
5,User_Merchant:Receive:NUM_UNIQUE(purchase.Coup...,int64
6,User_Merchant:Receive:COUNT(purchase),int64
7,User_Merchant:Receive:MIN(purchase.coupon.Disc...,float64
8,User_Merchant:Receive:MIN(purchase.coupon.Disc...,float64
9,User_Merchant:Receive:MIN(purchase.coupon.Base...,float64


### 抽取用户-优惠券特征

In [251]:
def create_uc_entity(name, puchase_uc, uc):
    es = ft.EntitySet(id = name)
#     es = es.entity_from_dataframe(entity_id = 'coupon', dataframe = coupon_uc, index='Coupon_id', variable_types = {'Coupon_id': ft.variable_types.Index})

    purchase_variable_types = {'Purchase_id': ft.variable_types.Index,'Uc_id': ft.variable_types.Id}
    es = es.entity_from_dataframe(entity_id = 'purchase', dataframe = puchase_uc, index = 'Purchase_id', variable_types = purchase_variable_types)

    es = es.entity_from_dataframe(entity_id = 'uc', dataframe = uc, index='Uc_id',variable_types = {'Uc_id': ft.variable_types.Index})

    r_user_purchase = ft.Relationship(es['uc']['Uc_id'], es['purchase']['Uc_id'])
    es = es.add_relationship(r_user_purchase)

#     r_purchase_coupon = ft.Relationship(es['coupon']['Coupon_id'], es['uc']['Coupon_id'])
#     es = es.add_relationship(r_purchase_coupon)
    
    return es

In [252]:
uc = purchase[['User_id', 'Coupon_id']].drop_duplicates()
uc = uc.reset_index()
uc = uc.rename(index=str, columns={
    'index': 'Uc_id'
})

puchase_uc = pd.merge(purchase, uc, on=['User_id', 'Coupon_id'], how='left')
coupon_uc = coupon.drop(['Merchant_id'], axis=1)

In [253]:
u_c = uc.copy().drop(['Uc_id'], axis=1)

#### 抽取用户-商户消费特征

In [254]:
puchase_uc_r = puchase_uc[['Uc_id', 'Coupon_id', 'Date_received', 'DAY(Duration)']]

es = create_uc_entity('o2o_user_coupon', puchase_uc_r, uc)
features, _ = dfs('uc', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop([
    'MIN(purchase.Coupon_id)',
    'MAX(purchase.Coupon_id)',
    'MEAN(purchase.Coupon_id)'
], axis=1)
u_c = merge_by_keys(u_c, features, ['User_id', 'Coupon_id'], 'Receive')



Exception ignored in: <generator object Scheduler.add_client at 0x13861da20>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [255]:
# 该商户每隔多少时间被领取一张优惠券
u_c['Receive:AVG(Date_received)'] = u_c['Receive:COUNT(purchase)'] / (puchase_uc_r.Date_received.max() - puchase_uc_r.Date_received.min()).days
# 平均每张优惠券被消费几次
u_c['Receive:AVG(15Days)'] = u_c['Receive:COUNT(purchase)'] / 15

####  用户-商户消费优惠券特征

In [256]:
puchase_uc_c = puchase_uc[puchase_uc['Is_used_coupon'] == 1]
puchase_uc_c = puchase_uc_c[['Uc_id', 'Coupon_id', 'Date_received', 'DAY(Duration)', 'Date']]

es = create_uc_entity('o2o_user_coupon', puchase_uc_c, uc)
features, _ = dfs('uc', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop([
    'MIN(purchase.Coupon_id)',
    'MAX(purchase.Coupon_id)',
    'MEAN(purchase.Coupon_id)'
], axis=1)
u_c = merge_by_keys(u_c, features, ['User_id', 'Coupon_id'], 'Consume')



Exception ignored in: <generator object Scheduler.add_client at 0x152cd0930>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [257]:
u_c['Consume:AVG(Date_received)'] = u_c['Consume:COUNT(purchase)'] / (puchase_uc_c.Date_received.max() - puchase_uc_c.Date_received.min()).days
u_c['Consume:AVG(Date)'] = u_c['Consume:COUNT(purchase)'] / (puchase_uc_c.Date.max() - puchase_uc_c.Date.min()).days
u_c['Consume:AVG(15Days)'] = u_c['Consume:COUNT(purchase)'] / 15
u_c['Consume:COUNT(no_coupon)'] = u_c['Receive:COUNT(purchase)'] - u_c['Consume:COUNT(purchase)']
u_c['Consume:RATE(purchase)'] = u_c['Consume:COUNT(purchase)'] / u_c['Receive:COUNT(purchase)']
u_c['Consume:RATE(no_coupon)'] = u_c['Consume:COUNT(no_coupon)'] / u_c['Receive:COUNT(purchase)']
u_c['Consume:OCC(purchase)'] = u_c['Consume:COUNT(purchase)'] / u_c['Consume:COUNT(no_coupon)']

#### 15天内用户-商户消费优惠券特征

In [258]:
puchase_uc_co = puchase_uc[puchase_uc['Label'] == 1]
puchase_uc_co = puchase_uc_co[['Uc_id', 'Coupon_id', 'Date_received', 'DAY(Duration)', 'Date']]

es = create_uc_entity('o2o_user_coupon', puchase_uc_co, uc)
features, _ = dfs('uc', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop([
    'MIN(purchase.Coupon_id)',
    'MAX(purchase.Coupon_id)',
    'MEAN(purchase.Coupon_id)'
], axis=1)
u_c = merge_by_keys(u_c, features, ['User_id', 'Coupon_id'], 'Use')



Exception ignored in: <generator object Scheduler.add_client at 0x14aa16d68>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [259]:
u_c['Use:AVG(Date_received)'] = u_c['Use:COUNT(purchase)'] / (puchase_uc_co.Date_received.max() - puchase_uc_co.Date_received.min()).days
u_c['Use:AVG(Date)'] = u_c['Use:COUNT(purchase)'] / (puchase_uc_co.Date.max() - puchase_uc_co.Date.min()).days
u_c['Use:AVG(15Days)'] = u_c['Use:COUNT(purchase)'] / 15
u_c['Use:COUNT(no_intime)'] = u_c['Consume:COUNT(purchase)'] - u_c['Use:COUNT(purchase)']

u_c['Use:RATE(purchase)'] = u_c['Use:COUNT(purchase)'] / u_c['Receive:COUNT(purchase)']
u_c['Use:RATE(purchase_4_consume)'] = u_c['Use:COUNT(purchase)'] / u_c['Consume:COUNT(purchase)']
u_c['Use:RATE(no_intime)'] = u_c['Use:COUNT(no_intime)'] / u_c['Receive:COUNT(purchase)']
u_c['Use:RATE(no_intime_4_consume)'] = u_c['Use:COUNT(no_intime)'] / u_c['Consume:COUNT(purchase)']

u_c['Use:OCC(purchase)'] = u_c['Use:COUNT(purchase)'] / u_c['Use:COUNT(no_intime)']

#### 用户-商户特征总结

In [260]:
u_c.columns = [ 'User_Coupon' + ':' + str(col) if str(col) not in ['User_id', 'Coupon_id'] else str(col) for col in u_c.columns]

In [261]:
i = pd.DataFrame()
i['column_name'] = u_c.columns.values
i['dtype'] = u_c.dtypes.values
i

Unnamed: 0,column_name,dtype
0,User_id,int64
1,Coupon_id,float64
2,User_Coupon:Receive:MIN(purchase.DAY(Duration)),int64
3,User_Coupon:Receive:MAX(purchase.DAY(Duration)),int64
4,User_Coupon:Receive:MEAN(purchase.DAY(Duration)),float64
5,User_Coupon:Receive:COUNT(purchase),int64
6,User_Coupon:Receive:AVG(Date_received),float64
7,User_Coupon:Receive:AVG(15Days),float64
8,User_Coupon:Consume:MIN(purchase.DAY(Duration)),float64
9,User_Coupon:Consume:MAX(purchase.DAY(Duration)),float64


### 抽取用户-优惠券类型特征

In [262]:
# 建立起优惠券的详细分类
discount_cat = offline_df['Discount_rate'].unique()
discount_cat_base = pd.DataFrame()
discount_cat_base['Discount_rate'] = discount_cat
discount_cat_base = discount_cat_base.reset_index()
discount_cat_base = discount_cat_base.rename(index=str, columns={
    'index': 'Discount_category_id'
})
discount_cat_base = discount_cat_base[discount_cat_base['Discount_category_id'] > 0]
discount_cat_base

Unnamed: 0,Discount_category_id,Discount_rate
1,1,150:20
2,2,20:1
3,3,200:20
4,4,30:5
5,5,50:10
6,6,10:5
7,7,100:10
8,8,200:30
9,9,20:5
10,10,30:10


In [263]:
def create_udc_entity(name, puchase_udc, udc):
    es = ft.EntitySet(id = name)

    purchase_variable_types = {'Purchase_id': ft.variable_types.Index,'Udc_id': ft.variable_types.Id}
    es = es.entity_from_dataframe(entity_id = 'purchase', dataframe = puchase_udc, index = 'Purchase_id', variable_types = purchase_variable_types)

    es = es.entity_from_dataframe(entity_id = 'udc', dataframe = udc, index='Udc_id',variable_types = {'Udc_id': ft.variable_types.Index})

    r_user_purchase = ft.Relationship(es['udc']['Udc_id'], es['purchase']['Udc_id'])
    es = es.add_relationship(r_user_purchase)
    
    return es

In [264]:
coupon_cat = pd.merge(coupon_saw, discount_cat_base, on='Discount_rate', how='left')
purchase_cat = pd.merge(purchase, coupon_cat, on="Coupon_id", how="left")
udc = purchase_cat[['User_id', 'Discount_category_id']].drop_duplicates()
udc = udc.reset_index()
udc = udc.rename(index=str, columns={
    'index': 'Udc_id'
})

puchase_udc = pd.merge(purchase_cat, udc, on=['User_id', 'Discount_category_id'], how='left')
coupon_udc = coupon.drop(['Merchant_id'], axis=1)

In [265]:
u_dc = udc.copy().drop(['Udc_id'], axis=1)

#### 抽取用户-优惠券消费特征

In [266]:
puchase_udc_r = puchase_udc[['Udc_id', 'Coupon_id', 'Date_received', 'DAY(Duration)']]

es = create_udc_entity('o2o_user_coupon', puchase_udc_r, udc)
features, _ = dfs('udc', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop([
    'MIN(purchase.Coupon_id)',
    'MAX(purchase.Coupon_id)',
    'MEAN(purchase.Coupon_id)'
], axis=1)
u_dc = merge_by_keys(u_dc, features, ['User_id', 'Discount_category_id'], 'Receive')



Exception ignored in: <generator object Scheduler.add_client at 0x1368a95e8>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [267]:
# 该商户每隔多少时间被领取一张优惠券
u_dc['Receive:AVG(Date_received)'] = u_dc['Receive:COUNT(purchase)'] / (puchase_udc_r.Date_received.max() - puchase_udc_r.Date_received.min()).days
# 平均每张优惠券被消费几次
u_dc['Receive:AVG(15Days)'] = u_dc['Receive:COUNT(purchase)'] / 15

####  用户-优惠券消费优惠券特征

In [268]:
puchase_udc = puchase_udc[puchase_udc['Is_used_coupon'] == 1]
puchase_udc_c = puchase_udc[['Udc_id', 'Coupon_id', 'Date_received', 'DAY(Duration)', 'Date']]

es = create_udc_entity('o2o_user_coupon', puchase_udc_r, udc)
features, _ = dfs('udc', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop([
    'MIN(purchase.Coupon_id)',
    'MAX(purchase.Coupon_id)',
    'MEAN(purchase.Coupon_id)'
], axis=1)
u_dc = merge_by_keys(u_dc, features, ['User_id', 'Discount_category_id'], 'Consume')

Exception ignored in: <generator object Scheduler.add_client at 0x12dcfa1b0>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [269]:
u_dc['Consume:AVG(Date_received)'] = u_dc['Consume:COUNT(purchase)'] / (puchase_udc_c.Date_received.max() - puchase_udc_c.Date_received.min()).days
u_dc['Consume:AVG(Date)'] = u_dc['Consume:COUNT(purchase)'] / (puchase_udc_c.Date.max() - puchase_udc_c.Date.min()).days
u_dc['Consume:AVG(15Days)'] = u_dc['Consume:COUNT(purchase)'] / 15
u_dc['Consume:COUNT(no_coupon)'] = u_dc['Receive:COUNT(purchase)'] - u_dc['Consume:COUNT(purchase)']
u_dc['Consume:RATE(purchase)'] = u_dc['Consume:COUNT(purchase)'] / u_dc['Receive:COUNT(purchase)']
u_dc['Consume:RATE(no_coupon)'] = u_dc['Consume:COUNT(no_coupon)'] / u_dc['Receive:COUNT(purchase)']
u_dc['Consume:OCC(purchase)'] = u_dc['Consume:COUNT(purchase)'] / u_dc['Consume:COUNT(no_coupon)']

#### 15天内用户-优惠券消费优惠券特征

In [270]:
puchase_udc_co = puchase_udc[puchase_udc['Label'] == 1]
puchase_udc_co = puchase_udc_co[['Udc_id', 'Coupon_id', 'Date_received', 'DAY(Duration)', 'Date']]

es = create_udc_entity('o2o_user_coupon', puchase_udc_r, udc)
features, _ = dfs('udc', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])
features = features.drop([
    'MIN(purchase.Coupon_id)',
    'MAX(purchase.Coupon_id)',
    'MEAN(purchase.Coupon_id)'
], axis=1)
u_dc = merge_by_keys(u_dc, features, ['User_id', 'Discount_category_id'], 'Use')

Future exception was never retrieved
future: <Future finished exception=CommClosedError('in <distributed.comm.tcp.TCPConnector object at 0x14eb4a160>: ConnectionRefusedError: [Errno 61] Connection refused')>
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/distributed/comm/tcp.py", line 334, in connect
    **kwargs)
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 1133, in run
    value = future.result()
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 1141, in run
    yielded = self.gen.throw(*exc_info)
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/tcpclient.py", line 232, in connect
    af, addr, stream = yield connector.start(connect_timeout=timeout)
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/pyt

In [271]:
u_dc['Use:AVG(Date_received)'] = u_dc['Use:COUNT(purchase)'] / (puchase_udc_co.Date_received.max() - puchase_udc_co.Date_received.min()).days
u_dc['Use:AVG(Date)'] = u_dc['Use:COUNT(purchase)'] / (puchase_udc_co.Date.max() - puchase_udc_co.Date.min()).days
u_dc['Use:AVG(15Days)'] = u_dc['Use:COUNT(purchase)'] / 15
u_dc['Use:COUNT(no_intime)'] = u_dc['Consume:COUNT(purchase)'] - u_dc['Use:COUNT(purchase)']

u_dc['Use:RATE(purchase)'] = u_dc['Use:COUNT(purchase)'] / u_dc['Receive:COUNT(purchase)']
u_dc['Use:RATE(purchase_4_consume)'] = u_dc['Use:COUNT(purchase)'] / u_dc['Consume:COUNT(purchase)']
u_dc['Use:RATE(no_intime)'] = u_dc['Use:COUNT(no_intime)'] / u_dc['Receive:COUNT(purchase)']
u_dc['Use:RATE(no_intime_4_consume)'] = u_dc['Use:COUNT(no_intime)'] / u_dc['Consume:COUNT(purchase)']

u_dc['Use:OCC(purchase)'] = u_dc['Use:COUNT(purchase)'] / u_dc['Use:COUNT(no_intime)']

#### 用户-优惠券特征总结

In [272]:
u_dc.columns = [ 'User_Discount_category' + ':' + str(col) if str(col) not in ['User_id', 'Discount_category_id'] else str(col) for col in u_dc.columns]

In [273]:
i = pd.DataFrame()
i['column_name'] = u_dc.columns.values
i['dtype'] = u_dc.dtypes.values
i

Unnamed: 0,column_name,dtype
0,User_id,int64
1,Discount_category_id,int64
2,User_Discount_category:Receive:MIN(purchase.DA...,int64
3,User_Discount_category:Receive:MAX(purchase.DA...,int64
4,User_Discount_category:Receive:MEAN(purchase.D...,float64
5,User_Discount_category:Receive:COUNT(purchase),int64
6,User_Discount_category:Receive:AVG(Date_received),float64
7,User_Discount_category:Receive:AVG(15Days),float64
8,User_Discount_category:Consume:MIN(purchase.DA...,int64
9,User_Discount_category:Consume:MAX(purchase.DA...,int64


## online特征的抽取

In [274]:
def extract_online(df):
    purchase = df.copy()
    purchase = purchase[purchase['Coupon_id']!='fixed']
    purchase['Coupon_id'] = purchase['Coupon_id'].astype('float')
    purchase = purchase[purchase['Coupon_id'] > 0]

    purchase['Duration'] = pd.to_datetime(purchase['Date'] - purchase['Date_received'])

    purchase = purchase.reset_index()
    purchase = purchase.rename(index=str, columns={"index": "Purchase_id"})

    user = purchase[['User_id']].drop_duplicates()
    merchant = purchase[['Merchant_id']].drop_duplicates()
    coupon = purchase[['Coupon_id', 'Merchant_id', 'Discount_rate']].drop_duplicates()
    purchase = purchase[['User_id', 'Coupon_id','Date_received', 'Date', 'Duration', 'Action']]

    es = create_entity('o2o', user, coupon, merchant, purchase)

    features, _ = dfs('purchase', es, trans_primitives_fn=['day', 'week', 'month', 'days'])
    purchase = pd.merge(purchase, features[['DAY(Date_received)', 'WEEK(Date_received)', 'MONTH(Date_received)', 'DAY(Duration)']], on='Purchase_id', how='left')
    purchase = purchase.drop(['Duration'], axis=1)
    
    # 不做fillna的动作是为了后面可以方便的找到max和min
    # purchase = purchase.fillna(0)
    
    purchase['DAY(Duration)'] = purchase['DAY(Duration)'] - 1

    # 用datetime相减去获得数值会是17
    purchase['Label'] = purchase.apply(lambda row: 1 if row['DAY(Duration)']< 16 else 0, axis=1)
    purchase['Is_used_coupon'] = purchase.apply(lambda row: 1 if row['DAY(Duration)']> 0 else 0, axis=1)

    coupon['Base_consume'] = 0.0
    coupon['Discount'] = 0.0
    coupon['Discount_money'] = 0.0
    coupon['Coupon_type'] = 0

    coupon = coupon.apply(lambda row: cal(row), axis=1)
    
    # Discount_rate 做保留，后续用来计算类型
    # coupon = coupon.drop(['Discount_rate'], axis=1)

    return purchase, coupon, merchant, user

In [275]:
purchase, coupon, merchant, user = extract_online(feature_alpha_online)



Exception ignored in: <generator object Scheduler.add_client at 0x125ef2840>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [276]:
uo = user.copy()

#### 用户消费属性

In [277]:
purchase_r = purchase[purchase['Action'] > 0]
purchase_r = purchase_r[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received']]
coupon_r = coupon[['Coupon_id', 'Merchant_id', 'Discount']]

es = create_entity('o2o_user_base', user, coupon_r, merchant, purchase_r)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count', 'avg_time_between'])

features.columns = [ 'Receive:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
uo = pd.merge(uo, features, on='User_id', how='left')

Exception ignored in: <generator object Scheduler.add_client at 0x127555570>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [278]:
uo['Receive:AVG(Date_received)'] = uo['Receive:COUNT(purchase)'] / (purchase_r.Date_received.max() - purchase_r.Date_received.min()).days
uo['Receive:AVG(Coupon_id)'] = uo['Receive:COUNT(purchase)'] / uo['Receive:NUM_UNIQUE(purchase.Coupon_id)']
uo['Receive:AVG(Merchant_id)'] = uo['Receive:COUNT(purchase)'] / uo['Receive:NUM_UNIQUE(purchase.coupon.Merchant_id)']
uo['Receive:AVG(15Days)'] = uo['Receive:COUNT(purchase)'] / 15

#### 用户消费优惠券特征

In [279]:
purchase_c = purchase[purchase['Action'] > 0]
purchase_c = purchase_c[purchase_c['Is_used_coupon'] == 1]
purchase_c = purchase_c[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received', 'Date', 'DAY(Duration)']]
coupon_c = coupon[['Coupon_id', 'Merchant_id', 'Discount']]

es = create_entity('o2o_user_base', user, coupon_c, merchant, purchase_c)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])

features.columns = [ 'Consume:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
uo = pd.merge(uo, features, on='User_id', how='left')

tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [280]:
uo['Consume:AVG(Date_received)'] = uo['Consume:COUNT(purchase)'] / (purchase_c.Date_received.max() - purchase_c.Date_received.min()).days
uo['Consume:AVG(Date)'] = uo['Consume:COUNT(purchase)'] / (purchase_c.Date.max() - purchase_c.Date.min()).days
uo['Consume:AVG(Coupon_id)'] = uo['Consume:COUNT(purchase)'] / uo['Consume:NUM_UNIQUE(purchase.Coupon_id)']
uo['Consume:AVG(Merchant_id)'] = uo['Consume:COUNT(purchase)'] / uo['Consume:NUM_UNIQUE(purchase.coupon.Merchant_id)']
uo['Consume:AVG(15Days)'] = uo['Consume:COUNT(purchase)'] / 15
uo['Consume:COUNT(no_coupon)'] = uo['Receive:COUNT(purchase)'] - uo['Consume:COUNT(purchase)']

uo['Consume:RATE(purchase)'] = uo['Consume:COUNT(purchase)'] / uo['Receive:COUNT(purchase)']
uo['Consume:RATE(no_coupon)'] = uo['Consume:COUNT(no_coupon)'] / uo['Receive:COUNT(purchase)']
uo['Consume:OCC(purchase)'] = uo['Consume:COUNT(purchase)'] / uo['Consume:COUNT(no_coupon)']

#### 15天内用户消费优惠券特征

In [281]:
purchase_co = purchase[purchase['Action'] > 0]
purchase_co = purchase_co[purchase_co['Label'] == 1]
purchase_co = purchase_co[['Purchase_id', 'User_id', 'Coupon_id', 'Date_received', 'Date', 'DAY(Duration)']]
coupon_co = coupon[['Coupon_id', 'Merchant_id', 'Discount']]

es = create_entity('o2o_user_base', user, coupon_co, merchant, purchase_co)
features, _ = dfs('user', es, agg_primitives_fn = ['min', 'max', 'mean', 'num_unique', 'count'])

features.columns = [ 'Use:' + str(col) if str(col) != 'User_id' else str(col) for col in features.columns]
uo = pd.merge(uo, features, on='User_id', how='left')

Exception ignored in: <generator object Scheduler.add_client at 0x127555750>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [282]:
uo['Use:AVG(Date_received)'] = uo['Use:COUNT(purchase)'] / (purchase_co.Date_received.max() - purchase_co.Date_received.min()).days
uo['Use:AVG(Date)'] = uo['Use:COUNT(purchase)'] / (purchase_co.Date.max() - purchase_co.Date.min()).days
uo['Use:AVG(Coupon_id)'] = uo['Use:COUNT(purchase)'] / uo['Use:NUM_UNIQUE(purchase.Coupon_id)']
uo['Use:AVG(Merchant_id)'] = uo['Use:COUNT(purchase)'] / uo['Use:NUM_UNIQUE(purchase.coupon.Merchant_id)']
uo['Use:AVG(15Days)'] = uo['Use:COUNT(purchase)'] / 15
uo['Use:COUNT(no_intime)'] = uo['Consume:COUNT(purchase)'] - uo['Use:COUNT(purchase)']

uo['Use:RATE(purchase)'] = uo['Use:COUNT(purchase)'] / uo['Receive:COUNT(purchase)']
uo['Use:RATE(purchase_4_consume)'] = uo['Use:COUNT(purchase)'] / uo['Consume:COUNT(purchase)']
uo['Use:RATE(no_intime)'] = uo['Use:COUNT(no_intime)'] / uo['Receive:COUNT(purchase)']
uo['Use:RATE(no_intime_4_consume)'] = uo['Use:COUNT(no_intime)'] / uo['Consume:COUNT(purchase)']

uo['Use:OCC(purchase)'] = uo['Use:COUNT(purchase)'] / uo['Use:COUNT(no_intime)']

#### 用户特征总结

In [283]:
uo.columns = [ 'UserOnline' + ':' + str(col) if str(col) not in ['User_id'] else str(col) for col in uo.columns]

In [284]:
i = pd.DataFrame()
i['column_name'] = uo.columns.values
i['dtype'] = uo.dtypes.values
i

Unnamed: 0,column_name,dtype
0,User_id,int64
1,UserOnline:Receive:NUM_UNIQUE(purchase.Coupon_id),int64
2,UserOnline:Receive:COUNT(purchase),int64
3,UserOnline:Receive:MIN(purchase.coupon.Discount),float64
4,UserOnline:Receive:MAX(purchase.coupon.Discount),float64
5,UserOnline:Receive:MEAN(purchase.coupon.Discount),float64
6,UserOnline:Receive:NUM_UNIQUE(purchase.coupon....,int64
7,UserOnline:Receive:AVG(Date_received),float64
8,UserOnline:Receive:AVG(Coupon_id),float64
9,UserOnline:Receive:AVG(Merchant_id),float64


## 合并数据

In [285]:
def extract_base(df):
    purchase = df.copy()
    purchase = purchase[purchase['Coupon_id'] > 0]

    purchase = purchase.reset_index()
    purchase = purchase.rename(index=str, columns={"index": "Purchase_id"})

    user = purchase[['User_id']].drop_duplicates()
    merchant = purchase[['Merchant_id']].drop_duplicates()
    coupon = purchase[['Coupon_id', 'Merchant_id', 'Discount_rate']].drop_duplicates()
    purchase = purchase[['User_id', 'Coupon_id','Date_received', 'Distance']]

    es = create_entity('o2o', user, coupon, merchant, purchase)

    features, _ = dfs('purchase', es, trans_primitives_fn=['day', 'week', 'month', 'days'])
    purchase = pd.merge(purchase, features[['DAY(Date_received)', 'WEEK(Date_received)', 'MONTH(Date_received)']], on='Purchase_id', how='left')
    
    # 不做fillna的动作是为了后面可以方便的找到max和min
    # purchase = purchase.fillna(0)
    
    coupon['Base_consume'] = 0.0
    coupon['Discount'] = 0.0
    coupon['Discount_money'] = 0.0
    coupon['Coupon_type'] = 0

    coupon = coupon.apply(lambda row: cal(row), axis=1)
    
    # Discount_rate 做保留，后续用来计算类型
    # coupon = coupon.drop(['Discount_rate'], axis=1)

    return purchase, coupon, merchant, user

In [286]:
purchase, coupon, merchant, user = extract_base(dataset_alpha)
dataset = pd.merge(purchase, coupon, on='Coupon_id', how='left')
dataset = pd.merge(dataset, discount_cat_base, on='Discount_rate', how='left')

dataset.columns = [ 'Base' + ':' + str(col) if str(col) not in ['User_id', 'Merchant_id', 'Coupon_id', 'Discount_rate', 'Discount_category_id'] else str(col) for col in dataset.columns]

dataset = pd.merge(dataset, u, on='User_id', how='left')
dataset = pd.merge(dataset, m, on='Merchant_id', how='left')
dataset = pd.merge(dataset, c, on='Coupon_id', how='left')
dataset = pd.merge(dataset, um, on=['User_id','Merchant_id'], how='left')
dataset = pd.merge(dataset, uc, on=['User_id','Coupon_id'], how='left')
dataset = pd.merge(dataset, udc, on=['User_id','Discount_category_id'], how='left')
dataset = pd.merge(dataset, uo, on='User_id', how='left')

dataset = dataset.drop([
    'Coupon:Merchant_id',
    'Base:Purchase_id', 'User_id', 'Coupon_id',
    'Discount_category_id',
    'Um_id', 'Uc_id', 'Udc_id'
], axis=1)



Exception ignored in: <generator object Scheduler.add_client at 0x13b3b6048>
RuntimeError: generator ignored GeneratorExit
tornado.application - ERROR - Exception in Future <Future cancelled> after timeout
Traceback (most recent call last):
  File "/Users/leewind/.local/share/virtualenvs/leewind-p6XO93Th/lib/python3.7/site-packages/tornado/gen.py", line 970, in error_callback
    future.result()
concurrent.futures._base.CancelledError


In [287]:
dataset.to_csv('../features/' + FILENAME + '.csv')