# Cross Entropy Capturing User Preference Changes Implementation

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import random
import itertools

In [2]:
DATA = pd.read_csv('data/6897-1y-c.csv')

In [3]:
DATA['場次'].isnull().sum()/DATA.shape[0]

0.0365678187939242

## Preprocessing

### Handle Username Formats

In [4]:
def replace_invalid(string):
  string = string.replace('-', '')
  string = string.replace('/', '')
  string = string.replace('*', '')
  string = string.replace('劉德玲', '')
  return string

DATA['聯絡電話'] = DATA['聯絡電話'].apply(replace_invalid)

### 處理 period （前六碼）

In [5]:
DATA['period'] = DATA['下單日期'].astype(str).apply(lambda x: x[:6])

### One Hot Encoding
* 處理 category, shipment, payment
* 要先做 label encoding 才能做 one hot encoding

In [6]:
le = LabelEncoder()
DATA['category_label'] = le.fit_transform(DATA['商品分類'])
DATA = pd.concat([DATA, pd.get_dummies(DATA['category_label'], prefix='cat')], axis = 1)

DATA['shipment'] = le.fit_transform(DATA['運送方式'])
DATA['payment'] = le.fit_transform(DATA['付款方式'])
DATA = pd.get_dummies(DATA,
                      prefix=['shipment', 'payment'],
                      columns=['shipment', 'payment'])

### Create `item_id`

In [7]:
# Generating item ids
shuffled_items = DATA['商品名稱'].sample(frac=1).reset_index(drop=True).unique()
item_dict = { x: i for i, x in enumerate(shuffled_items) }

DATA['item_id'] = DATA['商品名稱'].map(item_dict)

### 場次為空
<font color='yellow'>

問題：
1. 有些下單日期與場次不相同，兩個欄位儲存內容的差異？
2. 填入 `下單日期` 作為場次後，user x streams 的數量從 1,437,160 變成 4,605,980
3. 場次為空的紀錄有 10547 筆，佔 total data 的 0.0366，可否直接刪除場次為空的資料？

</font>

In [8]:
DATA['場次'].isnull().sum()/DATA.shape[0], DATA.shape[0]

(0.0365678187939242, 288423)

In [30]:
DATA['場次'] = DATA['場次'].fillna(DATA['下單日期'])

In [31]:
user_stream_context = DATA.groupby(['聯絡電話', '場次']).sum()

In [32]:
user_stream_context.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,下單日期,單價,數量,折扣,總金額,專屬折扣,運費,信用卡手續費,紅利折抵,收款金額,...,cat_225,cat_226,cat_227,shipment_0,shipment_1,shipment_2,shipment_3,payment_0,payment_1,item_id
聯絡電話,場次,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
937474657,20210808.0,20210808,599,1,0,599,0,60,0,0,659,...,0,0,0,1,0,0,0,0,1,169
2268052110975092973,20201029.0,20201029,677,1,0,677,0,60,0,0,737,...,0,0,0,0,1,0,0,0,1,3296
266020137,20210718.0,40421436,532,10,0,2660,0,120,82,0,4136,...,0,0,0,0,2,0,0,2,0,844
266020137,20210729.0,80842916,1158,8,0,1274,0,0,0,0,0,...,0,0,0,0,4,0,0,4,0,15712
266213401,20210701.0,40421402,516,2,0,516,0,120,0,0,1134,...,0,0,0,0,2,0,0,0,2,468


In [33]:
user_stream_context.shape

(72380, 246)

In [34]:
len(DATA['聯絡電話'].unique()), len(DATA['場次'].unique())

(11780, 391)

## User representation
<font color='yellow'>

* Context 變成 User x Stream
  * User 移除 categories
  * Context 新增 ***該場*** x ***該 user*** 的 categories
  
</font>

In [43]:
# Creating the row axis labels 
LB_PERIOD = list(DATA['period'].unique())
LB_USER = ['user']
LB_PQ = ['total_price', 'total_quantity']
LB_CAT = [f'cat_{x}' for x in list(range(228))]
LB_SHIPMENT_PAYMENT = ['shipment_0', 'shipment_1', 'shipment_2', 'shipment_3', 'payment_0', 'payment_1']
USER_LB = LB_PERIOD + LB_USER + LB_PQ + LB_SHIPMENT_PAYMENT
# USER_LB = LB_PERIOD + LB_USER + LB_PQ + LB_CAT + LB_SHIPMENT_PAYMENT

def generate_user_series(sample_user_key):
  user_sample = DATA.loc[DATA['聯絡電話'] == sample_user_key]
  user_sum = user_sample.loc[:, LB_SHIPMENT_PAYMENT+['總金額', '數量']+LB_CAT].sum()
  
  # Creating the Series 
  res_series = pd.Series([0]*254) 
  # Creating the row axis labels 
  res_series.index = USER_LB
  
  # Period
  res_series[user_sample['period'].unique()] = 1
  # User
  res_series['user'] = sample_user_key
  # Total Price & Quantities
  res_series['total_price'] = user_sum['總金額']
  res_series['total_quantity'] = user_sum['數量']
  # Shipment & Payment
  res_series[LB_SHIPMENT_PAYMENT] = user_sum[LB_SHIPMENT_PAYMENT]
  # Categories
  # res_series[LB_CAT] = user_sum[LB_CAT]
  
  return res_series

In [44]:
def get_user_df():
  user_df = pd.DataFrame(index=DATA['聯絡電話'].unique(), columns=USER_LB)
  user_df = user_df.apply(lambda x: generate_user_series(x.name), axis=1, result_type='expand')
  return user_df

In [45]:
DATA.groupby(['聯絡電話', '場次']).size().describe()

count    66844.000000
mean         4.157082
std          4.107615
min          1.000000
25%          2.000000
50%          3.000000
75%          5.000000
max        104.000000
dtype: float64

## Item Representation
- 商品 id 來源為 `item_dict`: `商品名稱: item_id`
- 可用欄位： `categories(one-hot)`, `price`, `被購買次數`
- Columns: `['下單日期', '商品名稱', '規格', '單價', '數量', '折扣', '總金額', '專屬折扣', '運費', '信用卡手續費', '紅利折抵', '收款金額', '付款方式', '運送方式', '收件人', '寄送地址', '聯絡電話', '場次', '處理後名稱', '商品分類', 'period', 'category_label', 0-227, 'shipment_0', 'shipment_1', 'shipment_2', 'shipment_3', 'payment_0', 'payment_1', 'item_id']`

In [46]:
LB_ITEMS = ['item_id', 'avg_price', 'count'] + LB_CAT

def get_item_df():
  item_df = pd.DataFrame(columns=LB_ITEMS)
  item_df['item_id'] = DATA.item_id.unique()
  # Count
  item_count = DATA.groupby('item_id').size()
  item_df['count'] = item_df['item_id'].apply(lambda x: item_count[x])
  # Cat
  item_df.loc[:, LB_CAT] = DATA.groupby('item_id').sum()[LB_CAT]
  # Price
  item_df['avg_price'] = DATA.groupby('item_id').mean()['單價']
  return item_df

## Stream list

In [47]:
STREAMS = DATA['場次'].unique()

## User Preference for each state
User x Stream_id corresponding bought categories

In [48]:
pd.DataFrame([e for e in itertools.product(DATA['聯絡電話'].unique(), DATA['場次'].unique())], columns=['user', 'stream'])

Unnamed: 0,user,stream
0,0986079133,20210321.0
1,0986079133,20210328.0
2,0986079133,20210325.0
3,0986079133,
4,0986079133,20210318.0
...,...,...
1437155,0988342585,20210808.0
1437156,0988342585,20201101.0
1437157,0988342585,20201025.0
1437158,0988342585,20201105.0


In [50]:
pd.DataFrame([e for e in itertools.product(DATA['聯絡電話'].unique(), DATA['場次'].unique())], columns=['user', 'stream'])

Unnamed: 0,user,stream
0,0986079133,20210321.0
1,0986079133,20210328.0
2,0986079133,20210325.0
3,0986079133,20210330.0
4,0986079133,20210331.0
...,...,...
4605975,0988342585,20210714.0
4605976,0988342585,20210716.0
4605977,0988342585,20210719.0
4605978,0988342585,20210720.0


## Context representation
* Columns: `period, user, item, bought(reward)`
* Generating item ids?

#### Considering Other Context [pending]
* Example: time, weekday, and the freshness of the news (the gap between request time and news publish time)
* 候選欄位：下單時間、產品上架時間（新鮮度）、直播主相關資料 

### Add appeared items categories as state

In [10]:
LB_AP_CAT = [f'ap_{x}' for x in LB_CAT]

def get_context_df():
    # Generating context representation
    context_col = ['user', 'item', 'reward'] + LB_AP_CAT
    context_df = pd.DataFrame(columns=context_col)

    context_df['user'] = DATA['聯絡電話']
    context_df['item'] = DATA['item_id']
    context_df['discount'] = DATA['折扣']
    context_df['date'] = DATA['下單日期']
    context_df['period'] = DATA['period']
    context_df['reward'] = 1
    context_df = context_df.sort_values(by=['user', 'date'])
    
    # Add appeared cat in date as feature
    item_cat_sum_by_date = DATA.groupby('下單日期').sum()[LB_CAT]
    ap_cat_df = context_df.apply(lambda x: item_cat_sum_by_date.loc[x['date']],
                                 axis=1, result_type='expand')
    context_df[LB_AP_CAT] = ap_cat_df
    
    return context_df

In [47]:
%timeit -r1 get_item_df()

1.94 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [48]:
%timeit -r1 get_context_df()

45.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [49]:
%timeit -r1 get_user_df()

2min 27s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


---
## Train DQN model
* Input: `user_df` 253, `item_df` 231(BERT: 768), interact (?), `reward` 1
* Output: recommend a list of items
* Methods Needed
    * Environment Function
    * Choose Action
    * Store Transition
    * Learn

### Environment

In [11]:
# context_df is global variable
# get current reward, next state and determine whether the episode ended
def env_take_action(current_state_idx, current_user, action, reward_set='strict'):
  # init return outputs
  game_over = False
  current_state = context_df.iloc[current_state_idx]
  context_same_user_all = context_df.loc[context_df.user == current_user]
  context_same_user_next = context_same_user_all.loc[(current_state_idx + 1):, :]

  # Reward set conditions
  if reward_set == 'strict':
    reward = 1 if current_state['item'] == action else 0
  elif reward_set == 'loose-all':
    # 不能跳 idx 因為每局的 action 不一樣
    reward = 1 if action in context_same_user_all['item'].unique() else 0
  elif reward_set == 'loose-after':
    reward = 1 if action in context_same_user_next['item'].unique() else 0
    
  # Print if any reward added
  if reward == 1: print('reward +')

  # Check if next state exist
  if context_same_user_next.shape[0] == 0:
    next_state_idx = None
    reward = 0
    game_over = True
  else:
    next_state_idx = context_same_user_next.iloc[0].idx

  return next_state_idx, reward, game_over

In [12]:
# get all inputs(user + item based on context_idx)
# used at init state and store transition
def get_full_inputs(context_idx):
  current_context = context_df.iloc[context_idx]
  full_input = user_df.loc[user_df.user == current_context.user]
  full_input.loc[:, 'discount'] = current_context['discount']
  full_input.loc[:, 'date'] = current_context['date']
  # if item_cat:
  #   # 原本 user context 就有 item cates (購買過的商品種類總和)
  #   # 這次新增的 0-227 是該期的所有 item cates，因此需要在 0-227 加上 prefix
  state_cate_cols = [f'state_cat_{x}' for x in range(228)]
  state_cate = pd.DataFrame(current_context.loc[LB_AP_CAT], columns=state_cate_cols) # Add appeared items
  full_input = pd.concat([full_input, state_cate]).astype('float32')
  return full_input

### Collecting Training Data

In [13]:
class ReplayBuffer:
  def __init__(self, max_memory=100000, discount=.9):
    """
    Setup
    max_memory: the maximum number of experiences we want to store
    memory: a list of experiences
    discount: the discount factor for future experience
    In the memory the information whether the game ended at the state is stored seperately in a nested array
    [...
    [experience, game_over]
    [experience, game_over]
    ...]
    """
    self.max_memory = max_memory
    self.memory = list()
    self.discount = discount

  def remember(self, states, game_over):
    # Save a state to memory
    self.memory.append([states, game_over])
    # We don't want to store infinite memories, so if we have too many, we just delete the oldest one
    if len(self.memory) > self.max_memory:
      del self.memory[0]

  def get_batch(self, model, batch_size=10):

    # How many experiences do we have?
    len_memory = len(self.memory)

    # Calculate the number of actions that can possibly be taken in the game.
    num_actions = model.output_shape[-1]

    # Dimensions of our observed states, ie, the input to our model.
    # Memory:  [
    #   [[ [...state], action, reward, next_state_idx], game_over],
    #   [[ [...state], action, reward, nexr_state_idx], game_over],
    #   ...
    # ]
    env_dim = self.memory[0][0][0].shape[1]

    # We want to return an input and target vector with inputs from an observed state.
    inputs = np.zeros((min(len_memory, batch_size), env_dim))

    # ...and the target r + gamma * max Q(s’,a’)
    # Note that our target is a matrix, with possible fields not only for the action taken but also for
    # the other possible actions. The actions not take the same value as the prediction to not affect them
    targets = np.zeros((inputs.shape[0], num_actions))

    # We draw states to learn from randomly
    for i, idx in enumerate(np.random.randint(0, len_memory, size=inputs.shape[0])):
      """
      Here we load one transition <s, a, r, s’> from memory
      state_t: initial state s
      action_t: action taken a
      reward_t: reward earned r
      state_tp1: the state that followed s’
      """
      state_t, action_t, reward_t, state_tp1 = self.memory[idx][0]
      state_t = state_t.astype('float32')

      # We also need to know whether the game ended at this state
      game_over = self.memory[idx][1]

      # add the state s to the input
      inputs[i:i + 1] = state_t

      # First we fill the target values with the predictions of the model.
      # They will not be affected by training (since the training loss for them is 0)
      targets[i] = model.predict(state_t)[0]

      """
      If the game ended, the expected reward Q(s,a) should be the final reward r.
      Otherwise the target value is r + gamma * max Q(s’,a’)
      """
      

      # if the game ended, the reward is the final reward
      if game_over:  # if game_over is True
        targets[i, action_t] = reward_t
      else:
        # Find next state representation
        state_tp1 = get_full_inputs(state_tp1)
        # Convert state_tp1 to float32
        state_tp1 = state_tp1.astype('float32')
        # Here Q_sa is max_a'Q(s', a')
        Q_sa = np.max(model.predict(state_tp1)[0])
        # r + gamma * max Q(s’,a’)
        targets[i, action_t] = reward_t + self.discount * Q_sa
    return inputs, targets

### Training Process

In [14]:
# Train a model on the given game
def train(model, exp_replay, epochs, batch_size, num_episode=100, verbose=1, reward_set='strict'):
  total_actions = item_df.shape[0]
  total_episodes = user_df.shape[0]
  # Reseting the win counter
  win_cnt = 0
  # We want to keep track of the progress of the AI over time, so we save its win count history 
  # indicated by number of goals scored
  win_hist = []

  # Epochs is the number of games we play
  for e in range(epochs):
    loss = 0.
    # epsilon for exploration - dependent inversely on the training epoch
    epsilon = 4 / ((e + 1) ** (1 / 2))

    # handling episodes by assigning users from user_df
    episodes = random.sample(range(total_episodes), num_episode)

    # Episode start
    print(f'Epoch {e} started.')

    for user_episode in episodes:
      game_over = False
      # get current state s by observing our game environment
      # TODO: Init state for each epoch, find the 
      user_phone = user_df.iloc[user_episode].user
      next_state_idx = context_df.loc[context_df['user'] == user_phone].iloc[0].idx
      
      while not game_over:
        # The learner is acting on the last observed game screen
        # next_state is a vector containing representing the game screen
        current_state_idx = next_state_idx
        current_state = get_full_inputs(current_state_idx)

        # We choose our action from either exploration (random) or exploitation (model).
        if np.random.rand() <= epsilon:
          # Explore a random action
          action_id = int(np.random.randint(0, total_actions, size=1))
        else:
          # Choose action from the model's prediction
          # q contains the expected rewards for each actions
          # ---> q is a list of predicted score/possible rewards for each action
          q = model.predict(current_state)
          # We pick the action with the highest expected reward
          # ---> np.argmax returns the index which has the highest reward
          action_id = np.argmax(q[0])

        # apply action, get rewards r and new state s'
        next_state_idx, reward, game_over = env_take_action(current_state_idx, user_phone, action_id, reward_set)
        # If we managed to score a goal we add 1 to our win counter
        if reward == 1:
          win_cnt += 1

        """
        The experiences < s, a, r, s' > we make during gameplay are our training data.
        Here we first save the last experience, and then load a batch of experiences to train our model
        """
        # store experience
        #   Full input
        action = item_df.loc[item_df.item_id == action_id]
        # state_input = pd.concat([current_state.reset_index(), action.reset_index()], axis=1).astype('float32')
        exp_replay.remember([current_state.astype('float32'), action_id, reward, next_state_idx], game_over)

        # Load batch of experiences
        inputs, targets = exp_replay.get_batch(model, batch_size=batch_size)
        print(targets)

        # train model on experiences
        batch_loss = model.train_on_batch(inputs, targets)

        loss += batch_loss

    # Episode end

    if verbose > 0:
      print("Epoch: {:03d}/{:03d} | Loss {:.4f} | Win count {}".format(e, epochs, loss, win_cnt))
    
    # Track win history to later check if our model is improving at the game over time.
    win_hist.append(win_cnt)
  return win_hist

### Main Method

In [15]:
user_df = get_user_df()
item_df = get_item_df()
context_df = get_context_df()

In [16]:
pd.set_option('mode.chained_assignment', None)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from matplotlib import pyplot as plt

# parameters
MAX_MEMORY = 1000  # Maximum number of experiences we are storing
BATCH_SIZE = 10  # Number of experiences we use for training per batch
EPOCH = 500
TOTAL_ACTIONS = item_df.shape[0]
NUM_EPISODE = 100
HIDDEN_SIZE = 512

In [17]:
exp_replay = ReplayBuffer(max_memory=MAX_MEMORY)# Our model's architecture parameters
input_size = 256 # The input shape for model - this comes from the output shape of the CNN Mobilenet

# Setting up the model with keras.
model = keras.Sequential()
model.add(Dense(HIDDEN_SIZE, input_shape=(input_size,), activation='relu'))
model.add(Dense(HIDDEN_SIZE, activation='tanh'))
model.add(Dense(TOTAL_ACTIONS))
model.compile(Adam(learning_rate=.000001), "mse")


# Training the model
hist = train(model, 
             exp_replay, 
             epochs=EPOCH, 
             batch_size=BATCH_SIZE, 
             num_episode=NUM_EPISODE, 
             verbose=1, 
             reward_set='strict')
plt.plot(range(EPOCH), hist)

Epoch 0 started.


IndexError: single positional indexer is out-of-bounds

In [20]:
USER_LB

['202103',
 '202104',
 '202102',
 '202008',
 '202009',
 '202011',
 '202012',
 '202010',
 '202007',
 '202108',
 '202109',
 '202107',
 '202101',
 '202006',
 '202003',
 '202105',
 '202106',
 'user',
 'total_price',
 'total_quantity',
 'cat_0',
 'cat_1',
 'cat_2',
 'cat_3',
 'cat_4',
 'cat_5',
 'cat_6',
 'cat_7',
 'cat_8',
 'cat_9',
 'cat_10',
 'cat_11',
 'cat_12',
 'cat_13',
 'cat_14',
 'cat_15',
 'cat_16',
 'cat_17',
 'cat_18',
 'cat_19',
 'cat_20',
 'cat_21',
 'cat_22',
 'cat_23',
 'cat_24',
 'cat_25',
 'cat_26',
 'cat_27',
 'cat_28',
 'cat_29',
 'cat_30',
 'cat_31',
 'cat_32',
 'cat_33',
 'cat_34',
 'cat_35',
 'cat_36',
 'cat_37',
 'cat_38',
 'cat_39',
 'cat_40',
 'cat_41',
 'cat_42',
 'cat_43',
 'cat_44',
 'cat_45',
 'cat_46',
 'cat_47',
 'cat_48',
 'cat_49',
 'cat_50',
 'cat_51',
 'cat_52',
 'cat_53',
 'cat_54',
 'cat_55',
 'cat_56',
 'cat_57',
 'cat_58',
 'cat_59',
 'cat_60',
 'cat_61',
 'cat_62',
 'cat_63',
 'cat_64',
 'cat_65',
 'cat_66',
 'cat_67',
 'cat_68',
 'cat_69',
 'cat_70'