# BERT Baseline with All Streamers Data

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import random
import itertools
from tqdm.notebook import tqdm
import pickle

ORIGIN_DATA = pd.read_pickle('../../data/new_txn.pkl')
BERT = np.load('../../data/jambo_iname2embedding.npy', allow_pickle=True)[None][0]

In [2]:
# 測試少量資料
DATA = ORIGIN_DATA.reset_index(drop=True)

## Preprocessing & Storage

In [3]:
%%time
'''
Make Stream Indexer
'''
DATA['stream'] = DATA['user_id'].astype(str) + DATA['場次'].astype(str)

# Set stream_id in time order
sorted_streams = DATA.sort_values(by=['下單日期'])['stream'].reset_index(drop=True).unique()
stream_dict = { x: i for i, x in enumerate(sorted_streams) }

DATA['stream'] = DATA['stream'].map(stream_dict)

CPU times: user 7.9 s, sys: 1.05 s, total: 8.95 s
Wall time: 8.95 s


In [4]:
%%time
'''
THRESHOLDS
1. Users bought threshold: could be 9/15/25/1(consider all)
2. Stream buyer threshold: could be 2/25/73/209
   -> 避免 Feature 即為正確答案
'''
buy_threshold = 9
stream_threshold = 1

user_count = DATA.groupby(['asid']).count()
uid = user_count['下單日期'].loc[user_count['下單日期'] > buy_threshold].index
DATA = DATA.loc[DATA['asid'].isin(uid)]

data_groupby_stream = DATA.groupby(['stream'])
g_s_count = data_groupby_stream.count()
available_streams = g_s_count.loc[g_s_count['訂單編號'] > stream_threshold].index
DATA = DATA.loc[DATA['stream'].isin(available_streams)]

CPU times: user 15.6 s, sys: 814 ms, total: 16.4 s
Wall time: 16.4 s


In [5]:
%%time
'''
period & item_id
'''
DATA['period'] = DATA['下單日期'].astype(str).apply(lambda x: x[:7])
DATA.rename(columns = {'商品id': 'item_id'}, inplace=True)

CPU times: user 32.4 s, sys: 140 ms, total: 32.6 s
Wall time: 32.5 s


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [6]:
%%time
'''
One Hot Encoding
* 處理 shipment, payment
* 要先做 label encoding 才能做 one hot encoding
'''
le = LabelEncoder()

DATA['shipment'] = le.fit_transform(DATA['運送方式'])
DATA['payment'] = le.fit_transform(DATA['付款方式'])
DATA['streamer_id'] = le.fit_transform(DATA['user_id'])
DATA = pd.get_dummies(DATA,
                      prefix=['shipment', 'payment', 'streamer'],
                      columns=['shipment', 'payment', 'streamer_id'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


CPU times: user 9.08 s, sys: 1.11 s, total: 10.2 s
Wall time: 10.2 s


In [7]:
%%time
'''
USER Features 
Generate USER_STREAM_CONTEXT
'''
data_groupby_user_stream = DATA.groupby(['asid', 'stream'])


# Get group by sum
USER_STREAM_CONTEXT = data_groupby_user_stream.sum()
USER_STREAM_CONTEXT = USER_STREAM_CONTEXT[['shipment_0', 'shipment_1', 'shipment_2', 'shipment_3', 
                                           'shipment_4', 'shipment_5', 'shipment_6', 'payment_0', 
                                           'payment_1', 'payment_2', 'payment_3', 'payment_4', 'payment_5',
                                           'payment_6', 'payment_7', 'payment_8', 'streamer_0', 'streamer_1',
                                           'streamer_2', 'streamer_3', 'streamer_4', 'streamer_5', 
                                           'streamer_6', 'streamer_7', 'streamer_8']]

# Get group by count & mean
u_s_count = data_groupby_user_stream.count()
u_s_avg = data_groupby_user_stream.mean()

# Assign seleted values to USER_STREAM_CONTEXT
USER_STREAM_CONTEXT[['channel_name', 'user_id', 'item_id', '訂單編號', '專屬折扣']] = u_s_count[['channel_name', 'user_id', 'item_id', '訂單編號', '專屬折扣']]
USER_STREAM_CONTEXT[['運費', '總金額', '數量', '單價']] = u_s_avg[['運費', '總金額', '數量', '單價']]

# Rename columns
USER_STREAM_CONTEXT.rename(columns={'user_id': 'streamer_id',
                                    'channel_name': 'channel_cnt',
                                    '訂單編號': 'order_cnt',
                                    'item_id': 'item_var_cnt',
                                    '運費': 'shipment_fee',
                                    '專屬折扣': 'discount',
                                    '總金額': 'total_price',
                                    '單價': 'avg_item_price',
                                    '數量': 'avg_item_cnt'
                                   },
                           inplace=True)

CPU times: user 12.3 s, sys: 1.97 s, total: 14.3 s
Wall time: 14.2 s


In [8]:
%%time
'''
USER_STREAM_MDB: user bought history average embedding (by stream) through items.unique.index
* BERT_INDEX: [item_name] -> item_idx
* item_name_embedding: [item_idx] -> item_embedding
* user_stream_buy_list: (buyer, stream) -> [items]
'''
# Append item embeddings according to items list's order & convert to np array
item_names = DATA['item_name'].unique().tolist()
ITEM_EMB_BY_IDX = []
for iname in tqdm(item_names):
  ITEM_EMB_BY_IDX.append(BERT[iname])
ITEM_EMB_BY_IDX = np.array(ITEM_EMB_BY_IDX)

# Map item name to item index
BERT_INDEX = {name:i for i, name in enumerate(item_names)}

# Item names grouped by asid & stream
user_stream_buy_list = DATA.groupby(['asid', 'stream'])['item_name'].apply(list)

  0%|          | 0/162189 [00:00<?, ?it/s]

CPU times: user 48.4 s, sys: 207 ms, total: 48.6 s
Wall time: 48.6 s


In [9]:
'''
Calculate user cumulated stream embedding
* user_stream_order_dict: { User: [stream list in order] }
* buy list: [asid, streams]
* 需求：可以選擇要「給予權重」或「直接平均」
'''
user_stream_order = DATA[['asid', 'stream']].groupby(['asid'])['stream'].apply(set)
USER_LIST = DATA['asid'].unique().tolist()
# test asid: ['1000057940522534']
# test streams: {3397, 3415, 3815, 3865, 4365, 4478, 4563, 5028}

# AVG
FINAL_EMB = {}
for asid in tqdm(USER_LIST):
  # find all the stream user attended and sort them in time order
  streamlist = list(user_stream_order[asid])
  streamlist.sort()
  # find items bought in each stream
  buylist = [user_stream_buy_list[asid, stream] for stream in streamlist]
  # cumulated items bought in each stream
  cumulative_buylist = [sum(buylist[:i], []) for i in range(1, len(buylist) + 1)]
  # map { stream: cumulate_items } to a dictionary
  cumulative_dict = dict(zip(streamlist, cumulative_buylist))
  # calculate average embedding and put them into FINAL_EMB
  # FOR EACH STREAM
  for k in cumulative_dict.keys():
    # get all items' name from stream
    i_list = cumulative_dict[k]
    # get items' BERT_INDEX for retrived items
    all_buy_item_index = [BERT_INDEX[item] for item in i_list]
    # get their embeddings and calculate mean
    user_emb = ITEM_EMB_BY_IDX[all_buy_item_index].mean(axis=0)
    # put them into the FINAL_EMB as previous set format
    FINAL_EMB[(asid, k)] = user_emb

  0%|          | 0/79207 [00:00<?, ?it/s]

In [10]:
%%time
# store 
with open('cumulative_item_emb.pkl', 'wb') as handle:
    pickle.dump(FINAL_EMB, handle, protocol=pickle.HIGHEST_PROTOCOL)

# load
with open('cumulative_item_emb.pkl', 'rb') as handle:
    b = pickle.load(handle)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [10]:
%%time
'''
ITEM Features
1. Attach item embeddings to DATA
2. Reset ITEM_DF's index
'''
LB_BERT = [f'bert_{x}' for x in range(768)] # constant

# Generate ITEM_DF for DATA
ITEM_FEAT = pd.DataFrame.from_dict(BERT, orient='index').astype('float32') # decrease processing time

CPU times: user 51.9 s, sys: 1.43 s, total: 53.3 s
Wall time: 53.3 s


In [9]:
'''
ITEM_STREAM_DICT: 直播 id 對應到的商品列表
REAL_BOUGHT_DF: 計算 reward 用
LAST_BOUGHT_STREAM: 作為 game_over
'''
# Constant: all stream list
STREAM_LIST = DATA.stream.unique()

# [Reward]
REAL_BOUGHT_DF = DATA.loc[:, ['asid', 'stream', 'item_id']]

# [Action]
LAST_BOUGHT_STREAM = USER_STREAM_CONTEXT.reset_index().groupby('asid', as_index=False).last().loc[:, ['asid', 'stream']].set_index('asid')

In [10]:
%%time
'''
STREAM Features:
* STREAM_FEAT: avg_price, avg_sum, count order #
* STREAM_ITEM_EMB: [stream] -> avg item embedding
'''
data_gb_stream = DATA.groupby(['stream'])
STREAM_FEAT = data_gb_stream.mean()[['單價', '總金額']]
STREAM_FEAT[['count']] = data_gb_stream.count()[['item_name']]
STREAM_FEAT.rename(columns={'單價':'avg_price', '總金額':'avg_sum'}, inplace=True)

# Item names grouped by stream
stream_item_list = data_gb_stream['item_name'].apply(list)

STREAM_ITEM_EMB = {}
for stream in tqdm(stream_item_list.index):
  # Find each stream's items
  item_list = stream_item_list[stream]
  # Find these items' indeces
  all_stream_item_index = [BERT_INDEX[item] for item in item_list]
  # Use the indeces found above to locate BERT embeddings & calculate mean
  stream_emb = ITEM_EMB_BY_IDX[all_stream_item_index].mean(axis=0)
  # Put result into dict
  STREAM_ITEM_EMB[stream] = stream_emb

  0%|          | 0/7701 [00:00<?, ?it/s]

CPU times: user 12.7 s, sys: 2.18 s, total: 14.8 s
Wall time: 14.8 s


In [13]:
%%time
# store 
with open('stream_item_emb.pkl', 'wb') as handle:
    pickle.dump(STREAM_ITEM_EMB, handle, protocol=pickle.HIGHEST_PROTOCOL)

CPU times: user 35.8 ms, sys: 36 ms, total: 71.7 ms
Wall time: 70.7 ms


In [18]:
USER_STREAM_CONTEXT.to_pickle('user_stream_context.pkl')
STREAM_FEAT.to_pickle('stream_feat.pkl')

In [21]:
np.save('item_emb_by_idx.pkl', ITEM_EMB_BY_IDX, allow_pickle=True)

In [None]:
'''
Data to use(exported)
1. cumulative_item_emb
2. stream_item_emb
3. user_stream_context
4. stream_feat
5. item_emb
'''

---
## Train DQN model
* Input: `user_df` 253, `item_df` 231(BERT: 768), interact (?), `reward` 1
* Output: recommend a list of items
* Methods Needed
    * Environment Function
    * Choose Action
    * Store Transition
    * Learn

In [None]:
USERS_LIST = DATA['asid'].unique()

In [63]:
LAST_BOUGHT_STREAM.head()

Unnamed: 0_level_0,stream
asid,Unnamed: 1_level_1
1000057940522534,6012
1000149184139055,4962
1000250653732283,906
1000394583731592,527
1000409986993290,3397


In [3]:
USER_STREAM_CONTEXTg Process

def train(model, exp_replay, epochs, batch_size, num_episode=1000, verbose=1, reward_set='strict', hist=[], c_hist=[], rec_list=[]):
  # total_actions = ITEM_DF.shape[0]
  # total_episodes = len(USER_LIST)
  # Reset win counter
  c_win_cnt = 0

  for e in range(epochs):
    rec_cnt = 0
    win_cnt = 0
    loss = 0.
    # TODO/MAIN: Apply user preference changes as epsilon
    # epsilon for exploration - dependent inversely on the training epoch
    epsilon = 4 / ((e + 1) ** (1 / 2))

    # handling episodes by assigning users from USER_LIST
    # Each user represent an Episode
    episodes = np.random.choice(USER_LIST, num_episode, replace=False)

    print(f'Epoch {e} started.   Time: {datetime.now(pytz.timezone("Asia/Taipei")).strftime("%H:%M:%S")}')
    # ------------------- Episode (User) -------------------------------
    for asid in episodes:
      # get [episode data, stream list, final stream] by asid
      user_all_streams = USER_STREAM_CONTEXT.xs(asid, level="asid")
      stream_list = user_all_streams.index
      final_stream = LAST_BOUGHT_STREAM.loc[asid, 'stream']
      
      # ----------------- Runs (User x All_Stream) ---------------------
      for i, stream in enumerate(stream_list):          
        game_over = stream == final_stream
                
        # Get full state: current_state = user_stream + item_stream
        # 用上一場紀錄預測下一場直播會購買的商品
        current_state = get_full_state(user_all_streams, stream_list, i)
        stream_items = STREAM_ITEM_DICT[stream]
        
        # --------------- Explore/Exploit Section ----------------------
        if np.random.rand() <= epsilon:
          # Explore by randomly select 10/n items from candidate_items
          # Get all items from the stream
          sample_actions = random.sample(stream_items, 10) if len(stream_items) > 10 else stream_items
          action_ids = gen_exist_series(sample_actions, stream_items)
        else:
          # Exploit by choosing action from the model's prediction
          pred_actions = model_predict_top10(model, current_state)
          action_ids = gen_exist_series(pred_actions, stream_items)

        # --------------- Get next state & info to store ---------------
        reward = get_reward(asid, stream, action_ids)
        next_state = get_full_state(user_all_streams, stream_list, i+1) if not game_over else []

        rec_cnt += 1
        if sum(reward) > 0:
          c_win_cnt += 1
          win_cnt += 1

        # --------------- Calculating Interest Changes -----------------
        interest_score = calculate_interest_change(user_all_streams, stream_list, i)

        # --------------- Store Experience -----------------------------
        exp_replay.remember(interest_score,
                            [current_state, action_ids, reward, next_state],
                            game_over)
        

        # --------------- Load batch of experiences --------------------
        inputs, targets = exp_replay.get_batch(model, batch_size=batch_size)
        # train model on experiences
        batch_loss = model.train_on_batch(inputs, targets)
        loss += batch_loss
            
    if verbose > 0:
      print(f'Epoch: {e}/{epochs} | Loss {loss} | Win count {win_cnt} | Rec count {rec_cnt} | Time {datetime.now(pytz.timezone("Asia/Taipei")).strftime("%H:%M:%S")}')
    
    # Track win history to later check if our model is improving at the game over time.
    hist.append(win_cnt)
    c_hist.append(c_win_cnt)
    rec_list.append(rec_cnt)

In [4]:
pd.set_option('mode.chained_assignment', None)

# parameters
MAX_MEMORY = 1000  # Maximum number of experiences we are storing
BATCH_SIZE = 2  # Number of experiences we use for training per batch
EPOCH = 50
TOTAL_ACTIONS = 1 # probability of ordering
NUM_EPISODE = 100
HIDDEN_SIZE = 512


warnings.simplefilter(action='ignore', category=FutureWarning)

### Main Method

In [None]:
exp_replay = ReplayBuffer(max_memory=MAX_MEMORY)# Our model's architecture parameters
input_size = 473 # The input shape for model - this comes from the output shape of the CNN Mobilenet

# Setting up the model with keras.
model = keras.Sequential()
model.add(Dense(HIDDEN_SIZE, input_shape=(input_size,), activation='relu'))
model.add(Dense(HIDDEN_SIZE, activation='tanh'))
model.add(Dense(TOTAL_ACTIONS))
model.compile(Adam(learning_rate=.000001), "mse")

hist = []
c_hist = []
rec_list = []

# Training the model
train(model, 
      exp_replay, 
      epochs=EPOCH, 
      batch_size=BATCH_SIZE, 
      num_episode=NUM_EPISODE, 
      verbose=1, 
      reward_set='strict',
      hist=hist,
      c_hist=c_hist,
      rec_list=rec_list)
plt.plot(range(EPOCH), hist)

Epoch 0 started.   Time: 13:28:43
