# Cross Entropy Capturing User Preference Changes Implementation

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import random
import itertools

In [3]:
DATA = pd.read_csv('../data/6897-1y-c.csv')

In [4]:
DATA['場次'].isnull().sum()/DATA.shape[0]

0.0365678187939242

## Preprocessing

### Handle Username Formats

In [5]:
def replace_invalid(string):
  string = string.replace('-', '')
  string = string.replace('/', '')
  string = string.replace('*', '')
  string = string.replace(' ', '')
  string = string.replace('劉德玲', '')
  return string

DATA['聯絡電話'] = DATA['聯絡電話'].apply(replace_invalid)

### 處理 period （前六碼）

In [6]:
DATA['period'] = DATA['下單日期'].astype(str).apply(lambda x: x[:6])

### One Hot Encoding
* 處理 category, shipment, payment
* 要先做 label encoding 才能做 one hot encoding

In [7]:
le = LabelEncoder()
DATA['category_label'] = le.fit_transform(DATA['商品分類'])
DATA = pd.concat([DATA, pd.get_dummies(DATA['category_label'], prefix='cat')], axis = 1)

DATA['shipment'] = le.fit_transform(DATA['運送方式'])
DATA['payment'] = le.fit_transform(DATA['付款方式'])
DATA = pd.get_dummies(DATA,
                      prefix=['shipment', 'payment'],
                      columns=['shipment', 'payment'])

### Create `item_id`

In [8]:
# Generating item ids
shuffled_items = DATA['商品名稱'].sample(frac=1).reset_index(drop=True).unique()
item_dict = { x: i for i, x in enumerate(shuffled_items) }

DATA['item_id'] = DATA['商品名稱'].map(item_dict)

### 場次為空
<font color='yellow'>

問題：
1. 有些下單日期與場次不相同，兩個欄位儲存內容的差異？
2. 填入 下單日期 作為場次後，user x streams 的數量為 72,380（有購買紀錄）
   * 如果要讓 user x 所有 stream，資料筆數會從 1,437,160 變成 4,605,980，會產生很多場次屬於 沒有答案 的情境
3. 場次為空的紀錄有 10547 筆，佔 total data 的 0.0366

</font>

In [9]:
DATA['場次'].isnull().sum()/DATA.shape[0], DATA.shape[0]

(0.0365678187939242, 288423)

In [10]:
DATA['場次'] = DATA['場次'].fillna(DATA['下單日期'])

## Constants Needed

In [11]:
LB_CE = [f'cat_{i}' for i in range(228)] + [f'shipment_{i}' for i in range(4)] + [f'payment_{i}' for i in range(2)]
USER_LIST = DATA['聯絡電話'].unique()
STREAM_LIST = DATA['場次'].unique()
STREAM_LIST.sort()

# Creating the row axis labels 
LB_PERIOD = list(DATA['period'].unique())
LB_USER = ['user']
LB_PQ = ['total_price', 'total_quantity']
LB_CAT = [f'cat_{x}' for x in list(range(228))]
LB_SHIPMENT_PAYMENT = ['shipment_0', 'shipment_1', 'shipment_2', 'shipment_3', 'payment_0', 'payment_1']
USER_LB = LB_PERIOD + LB_USER + LB_PQ + LB_SHIPMENT_PAYMENT
LB_ITEMS = ['item_id', 'avg_price', 'count'] + LB_CAT


## Context Representation
user x stream data

<font color='yellow'>
如果 context = user + stream + other feature，好像沒有存在的必要
</font>

In [12]:
USER_STREAM_CONTEXT = DATA.groupby(['聯絡電話', '場次']).sum().loc[:, :'payment_1']
USER_STREAM_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,下單日期,單價,數量,折扣,總金額,專屬折扣,運費,信用卡手續費,紅利折抵,收款金額,...,cat_224,cat_225,cat_226,cat_227,shipment_0,shipment_1,shipment_2,shipment_3,payment_0,payment_1
聯絡電話,場次,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
02268052110975092973,20201029.0,20201029,677,1,0,677,0,60,0,0,737,...,0,0,0,0,0,1,0,0,0,1
0266020137,20210718.0,40421436,532,10,0,2660,0,120,82,0,4136,...,0,0,0,0,0,2,0,0,2,0
0266020137,20210729.0,80842916,1158,8,0,1274,0,0,0,0,0,...,0,0,0,0,0,4,0,0,4,0
0266213401,20210701.0,40421402,516,2,0,516,0,120,0,0,1134,...,0,0,0,0,0,2,0,0,0,2
0266213401,20210711.0,40421422,498,2,0,498,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0989994927,20210606.0,161684849,1696,11,0,1922,0,60,0,0,1982,...,0,0,0,0,0,8,0,0,0,8
0989996692,20210314.0,20210314,999,1,0,999,0,60,37,0,1861,...,1,0,0,0,1,0,0,0,1,0
0989996692,20210318.0,60630954,586,4,0,765,0,0,0,0,0,...,0,0,0,0,3,0,0,0,3,0
0989996692,20210321.0,60630963,1886,3,0,1886,0,60,89,0,4528,...,0,0,0,0,0,3,0,0,3,0


In [13]:
USER_STREAM_CONTEXT.shape, len(DATA['聯絡電話'].unique()), len(DATA['場次'].unique())

((72380, 245), 11778, 391)

## Item Representation
- 商品 id 來源為 `item_dict`: `商品名稱: item_id`
- 可用欄位： `categories(one-hot)`, `price`, `被購買次數`
- Columns: `['下單日期', '商品名稱', '規格', '單價', '數量', '折扣', '總金額', '專屬折扣', '運費', '信用卡手續費', '紅利折抵', '收款金額', '付款方式', '運送方式', '收件人', '寄送地址', '聯絡電話', '場次', '處理後名稱', '商品分類', 'period', 'category_label', 0-227, 'shipment_0', 'shipment_1', 'shipment_2', 'shipment_3', 'payment_0', 'payment_1', 'item_id']`

In [14]:
def get_item_df():
  item_df = pd.DataFrame(columns=LB_ITEMS)
  item_df['item_id'] = DATA.item_id.unique()
  # Count
  item_count = DATA.groupby('item_id').size()
  item_df['count'] = item_df['item_id'].apply(lambda x: item_count[x])
  # Cat
  item_df.loc[:, LB_CAT] = DATA.groupby('item_id').sum()[LB_CAT]
  # Price
  item_df['avg_price'] = DATA.groupby('item_id').mean()['單價']
  return item_df

In [15]:
ITEM_DF = get_item_df()

## Generate Item x Stream data
* unique items x each stream
* index: item_id
* column: stream_id

In [16]:
def map_item_stream(item_id):
  in_streams = DATA.loc[DATA.item_id == item_id]['場次'].unique()
  res_series = pd.Series([0]*len(STREAM_LIST))
  res_series.index = STREAM_LIST
  res_series[in_streams] = 1
  return res_series

In [17]:
ITEM_STREAM_DF = pd.DataFrame(index=DATA.item_id.unique(), columns=DATA['場次'].unique())
ITEM_STREAM_DF = ITEM_STREAM_DF.apply(lambda x: map_item_stream(x.name), axis=1, result_type='expand')

## [Reward] Generate Real Bought DF

In [18]:
REAL_BOUGHT_DF = DATA.loc[:, ['聯絡電話', '場次', 'item_id']]

## [Action] User bought last stream

In [19]:
LAST_BOUGHT_STREAM = USER_STREAM_CONTEXT.reset_index().groupby('聯絡電話', as_index=False).last().loc[:, ['聯絡電話', '場次']].set_index('聯絡電話')

---
### Test Area

In [27]:
# STREAM_LIST.where(20210606.0)
np.where(STREAM_LIST==20210606.0)

(array([275], dtype=int64),)

In [70]:
USER_STREAM_CONTEXT.columns.to_list()+LB_ITEMS == input_state.index.to_list()+item_feat.columns.to_list()

True

In [80]:
input_state = get_full_state(USER_STREAM_CONTEXT.xs('0989994927', level="聯絡電話"), 276)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer_missing(indexer, value)


---
## Train DQN model
* Input: `user_df` 253, `item_df` 231(BERT: 768), interact (?), `reward` 1
* Output: recommend a list of items
* Methods Needed
    * Environment Function
    * Choose Action
    * Store Transition
    * Learn

### Environment
Get full state

<font color='yellow'>

* 參考原論文: user _interest 初始值為 random vector
1. 很多筆記錄裡用戶只在一場直播裡購買，如果 random 的話 accuracy 會下降
2. 不可能用該期直播的相關紀錄作為 state → 在 user/state context 裡加上 RFML/streamer information
3. user x all_stream → 在第一場直播都給予 random vector，對 accuracy 的影響就不會太高（？

</font>

In [58]:
USER_ALL_STREAM_INIT = USER_STREAM_CONTEXT.describe().loc['50%']

def get_full_state(user_all_streams, i):
  # Get full state: current_state = user_stream + item_stream
  # 第一次參加直播/cold start
  # CE paper: user_interest part init with random vector
  # TODO init with random values
  #      Cold start problem
  # USER PART
  user_part = USER_ALL_STREAM_INIT if (i - 1) == -1 else user_all_streams.loc[STREAM_LIST[(i - 1)]]
  # ITEMS PART
  # Get all items from stream
  # USER_PART + ITEM_PART
  stream_items = ITEM_STREAM_DF.loc[ITEM_STREAM_DF[STREAM_LIST[i]] == 1].index.to_list()
  user_part.loc['cand_item'] = stream_items
  return user_part


def get_reward(user_phone, stream, action_ids):
  test_real_bought_ids = REAL_BOUGHT_DF.loc[(REAL_BOUGHT_DF['聯絡電話'] == user_phone) 
                                            & (REAL_BOUGHT_DF['場次'] == stream)]['item_id'].values

  reward = [int(action_id in test_real_bought_ids) for action_id in action_ids]
  reward = pd.Series(reward, index=action_ids)
  return reward

from sklearn.metrics import log_loss

def calculate_interest_change(user_all_streams, i):
  if i < 0: return 0

  former_stream = STREAM_LIST[i-1]
  current_stream = STREAM_LIST[i]

  test_ce = user_all_streams.loc[:, LB_CE]
  ce = log_loss(test_ce.loc[former_stream], test_ce.loc[current_stream])
  if ce < 0.01: ce = 0
  else: ce = round(ce, 3)
  return ce

### Collecting Training Data

In [23]:
class ReplayBuffer:
  def __init__(self, max_memory=100000, discount=.9):
    """
    Setup
    max_memory: the maximum number of experiences we want to store
    memory: a list of experiences
    discount: the discount factor for future experience
    In the memory the information whether the game ended at the state is stored seperately in a nested array
    [...
    [experience, game_over]
    [experience, game_over]
    ...]
    """
    self.max_memory = max_memory
    self.memory = list()
    self.discount = discount

  def remember(self, interest_score, states, game_over):
    # Save a state to memory
    self.memory.append([interest_score, states, game_over])
    # We don't want to store infinite memories, so if we have too many, we just delete the oldest one
    if len(self.memory) > self.max_memory:
      del self.memory[0]

  def get_batch(self, model, batch_size=10):

    # How many experiences do we have?
    len_memory = len(self.memory)

    # Calculate the number of actions that can possibly be taken in the game.
    # Actions: 0 = not recommend, 1 = recommend
    num_actions = model.output_shape[-1]

    # Dimensions of our observed states, ie, the input to our model.
    # Memory:  [
    #   [interest_score, [ [...state], action, reward, next_state_idx], game_over],
    #   [interest_score, [ [...state], action, reward, nexr_state_idx], game_over],
    #   ...
    # ]
    env_dim = self.memory[0][1][0].shape[1]

    # We want to return an input and target vector with inputs from an observed state.
    inputs = np.zeros((min(len_memory, batch_size), env_dim))

    # ...and the target r + gamma * max Q(s',a')
    # Note that our target is a matrix, with possible fields not only for the action taken but also for
    # the other possible actions. The actions not take the same value as the prediction to not affect them
    targets = np.zeros((inputs.shape[0], num_actions))

    # We draw states to learn from randomly
    for i, idx in enumerate(np.random.randint(0, len_memory, size=inputs.shape[0])):
      # Here we load one transition <s, a, r, s'> from memory
      state_t, action_t, reward_t, state_tp1 = self.memory[idx][1]
      state_t = state_t.astype('float32')
      game_over = self.memory[idx][2]

      # add the state s to the input
      # TODO 
      '''
      修改倒入 state 的方式 input = (state - item) + item_feat
      拆掉 model_predict 成 function
      '''
      inputs[i:i + 1] = state_t

      # First we fill the target values with the predictions of the model.
      # They will not be affected by training (since the training loss for them is 0)
      # TODO
      '''
      每個 actions 都會被 predict 一個成績/reward
      '''
      targets[i] = model.predict(state_t)[0]

      # if the game ended, the reward is the final reward
      if game_over:  # if game_over is True
        targets[i, action_t] = reward_t
      else:
        Q_sa = np.max(model.predict(state_tp1)[0])
        # r + gamma * max Q(s',a')
        targets[i, action_t] = reward_t + self.discount * Q_sa
    return inputs, targets

In [37]:
def get_input(input_state):
  # Slice items
  items = input_state['cand_item']
  input_state = input_state.drop('cand_item')
  item_feat = ITEM_DF.loc[items]

  # Create new dataframe
  stream_item_feat = pd.DataFrame(columns=USER_STREAM_CONTEXT.columns.to_list()+LB_ITEMS, index=item_feat.index)
  # Fill in items
  stream_item_feat.loc[:, LB_ITEMS] = item_feat
  # Fill in other context
  stream_item_feat = stream_item_feat.loc[:, USER_STREAM_CONTEXT.columns.to_list()].assign(**input_state)
  return stream_item_feat

def model_predict_top10(model, input_state):
  # Get all items
  full_input = get_input(input_state)
  # 紀錄所有預測結果
  full_input.loc['predict'] = full_input.apply(lambda row: np.argmax(model.predict(row)[0]), axis=1)
  return full_input.loc['predict'].nlargest(10)['item_id'].to_list()

### Training Process

In [24]:
def train(model, exp_replay, epochs, batch_size, num_episode=1000, verbose=1, reward_set='strict'):
  # total_actions = ITEM_DF.shape[0]
  total_episodes = len(USER_LIST)
  # Reset win counter
  win_cnt = 0
  win_hist = []

  for e in range(epochs):
    loss = 0.
    # TODO/MAIN: Apply user preference changes as epsilon
    # epsilon for exploration - dependent inversely on the training epoch
    epsilon = 4 / ((e + 1) ** (1 / 2))

    # handling episodes by assigning users from USER_LIST
    # Each user represent an Episode
    episodes = random.sample(range(total_episodes), num_episode)

    print(f'Epoch {e} started.')
    # ------------------- Episode (User) -------------------------------
    for user_episode in episodes:
      # game_over = False
      # get episode data by user phone number
      user_phone = USER_LIST[user_episode]
      user_all_streams = USER_STREAM_CONTEXT.xs(user_phone, level="聯絡電話")
      final_stream = LAST_BOUGHT_STREAM.loc[user_phone, '場次']
      
      # ----------------- Runs (User x All_Stream) ---------------------
      for i, stream in enumerate(STREAM_LIST):
        game_over = stream == final_stream
        
        # Get full state: current_state = user_stream + item_stream
        # 用上一場紀錄預測下一場直播會購買的商品
        current_state = get_full_state(user_all_streams, i)

        # --------------- Explore/Exploit Section ----------------------
        if np.random.rand() <= epsilon:
          # Explore by randomly select 10/n items from candidate_items
          # Get all items from the stream
          stream_items = ITEM_STREAM_DF[STREAM_LIST[i]][ITEM_STREAM_DF[STREAM_LIST[i]] == 1].index.to_list()
          action_ids = random.sample(stream_items, 10)
        else:
          # Exploit by choosing action from the model's prediction
          # TODO Refactor training/predict process
          action_ids = model_predict_top10(model, current_state)

        # --------------- Get next state & info to store ---------------
        reward = get_reward(user_phone, stream, action_ids)
        next_state = get_full_state(user_phone, i + 1) 

        # if reward == 1:
        #   win_cnt += 1

        # --------------- Calculating Interest Changes -----------------
        interest_score = calculate_interest_change(user_phone, i)

        # --------------- Store Experience -----------------------------
        # TODO Refactor Experience Class
        exp_replay.remember(interest_score,
                            [current_state.astype('float32'), action_ids, reward, next_state],
                            game_over)

        # --------------- Load batch of experiences --------------------
        inputs, targets = exp_replay.get_batch(model, batch_size=batch_size)
        # train model on experiences
        batch_loss = model.train_on_batch(inputs, targets)
        loss += batch_loss

    if verbose > 0:
      print("Epoch: {:03d}/{:03d} | Loss {:.4f} | Win count {}".format(e, epochs, loss, win_cnt))
    
    # Track win history to later check if our model is improving at the game over time.
    win_hist.append(win_cnt)
  return win_hist

### Main Method

In [25]:
pd.set_option('mode.chained_assignment', None)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from matplotlib import pyplot as plt

# parameters
MAX_MEMORY = 1000  # Maximum number of experiences we are storing
BATCH_SIZE = 10  # Number of experiences we use for training per batch
EPOCH = 500
TOTAL_ACTIONS = 2 # 0 = not recommend, 1 = recommend
NUM_EPISODE = 100
HIDDEN_SIZE = 512

In [26]:
exp_replay = ReplayBuffer(max_memory=MAX_MEMORY)# Our model's architecture parameters
input_size = 256 # The input shape for model - this comes from the output shape of the CNN Mobilenet

# Setting up the model with keras.
model = keras.Sequential()
model.add(Dense(HIDDEN_SIZE, input_shape=(input_size,), activation='relu'))
model.add(Dense(HIDDEN_SIZE, activation='tanh'))
model.add(Dense(TOTAL_ACTIONS))
model.compile(Adam(learning_rate=.000001), "mse")


# Training the model
hist = train(model, 
             exp_replay, 
             epochs=EPOCH, 
             batch_size=BATCH_SIZE, 
             num_episode=NUM_EPISODE, 
             verbose=1, 
             reward_set='strict')
plt.plot(range(EPOCH), hist)

Epoch 0 started.


TypeError: Population must be a sequence or set.  For dicts, use list(d).