<a href="https://colab.research.google.com/github/kawamottyan/movie-recommendation-system/blob/main/model/experiment/model/context_bandit/contextbaundit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

LinUCBを使用した、映画のジャンル推薦

# 準備

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

まず、ランダムシードを作成して再現性を持たせる


In [2]:
np.random.seed(42)

# データの準備

今回はテスト用に、5種類の映画と6種類のジャンルをもとに、5回のセッション（ユーザー）に対して予測を行う。

In [3]:
session_ids = np.arange(1, 6)
item_ids = np.arange(1, 6)
genres_columns = ['Action', 'Comedy', 'Drama', 'Mystery', 'Romance', 'Thriller']
time_stamps = np.random.rand(20)

In [4]:
genre_data = np.random.randint(0, 2, size=(5, len(genres_columns)))
movie_genres = pd.DataFrame(genre_data, columns=genres_columns)
movie_genres.insert(0, 'ItemId', item_ids)

print('length of movie_genres:', str(len(movie_genres)))
print(movie_genres)

length of movie_genres: 5
   ItemId  Action  Comedy  Drama  Mystery  Romance  Thriller
0       1       0       0      1        1        1         1
1       2       1       0      1        1        0         1
2       3       0       1      0        1        1         0
3       4       0       0      0        0        0         0
4       5       0       1      1        0        1         1


In [5]:
rating_data = np.random.choice(item_ids, size=(20, 1), replace=True)
rating_data = np.hstack([np.random.choice(session_ids, size=(20, 1)), rating_data, time_stamps.reshape(-1, 1)])
rating_df = pd.DataFrame(rating_data, columns=['SessionId', 'ItemId', 'Time'])
rating_df = rating_df.drop_duplicates(subset=['SessionId', 'ItemId'])

rating_df['SessionId'] = rating_df['SessionId'].astype(int)
rating_df['ItemId'] = rating_df['ItemId'].astype(int)

print('length of rating_df:', str(len(rating_df)))
print(rating_df.head(5))

length of rating_df: 11
   SessionId  ItemId      Time
0          2       2  0.374540
2          4       1  0.731994
4          2       5  0.156019
5          4       2  0.155995
6          4       4  0.058084


報酬の初期値を1に設定する

In [6]:
rating_df['reward'] = 1
print(rating_df.head(25))

    SessionId  ItemId      Time  reward
0           2       2  0.374540       1
2           4       1  0.731994       1
4           2       5  0.156019       1
5           4       2  0.155995       1
6           4       4  0.058084       1
7           1       4  0.866176       1
8           5       4  0.601115       1
11          5       3  0.969910       1
12          2       1  0.832443       1
17          5       2  0.524756       1
19          5       5  0.291229       1


# モデルの動作

ユーザーIDとアイテムIDをもとに戻すためのマッピング辞書を作成する

In [7]:
user_id_mapping = {id: index for index, id in enumerate(sorted(rating_df['SessionId'].unique()))}
item_id_mapping = {id: index for index, id in enumerate(sorted(rating_df['ItemId'].unique()))}
print('user_id_mapping:', str(user_id_mapping))
print('item_id_mapping:', str(item_id_mapping))

user_id_mapping: {1: 0, 2: 1, 4: 2, 5: 3}
item_id_mapping: {1: 0, 2: 1, 3: 2, 4: 3, 5: 4}


ユーザー数とアイテム数を数える

In [8]:
num_users = len(user_id_mapping)
num_items = len(item_id_mapping)
print('num_users:', str(num_users))
print('num_items:', str(num_items))

num_users: 4
num_items: 5


### ユーザー×アイテムの行列を作成  

In [9]:
matrix = np.zeros((num_users, num_items))
print('matrix:\n', matrix)

matrix:
 [[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


0が未視聴、1が視聴済み

In [10]:
for _, row in rating_df.iterrows():
    user_id = user_id_mapping[row['SessionId']]
    item_id = item_id_mapping[row['ItemId']]
    matrix[user_id, item_id] = 1
print('matrix:\n', matrix)

matrix:
 [[0. 0. 0. 1. 0.]
 [1. 1. 0. 0. 1.]
 [1. 1. 0. 1. 0.]
 [0. 1. 1. 1. 1.]]


### 各エポック値の保存場所を用意

In [11]:
num_epochs = 3
avg_rewards = np.zeros(shape=(num_epochs,), dtype=float)
print(avg_rewards)

[0. 0. 0.]


In [12]:
rewards = []

In [13]:
user_indexes = np.array(range(matrix.shape[0]))
print(user_indexes)

[0 1 2 3]


### 次のユーザーを取得する関数を作成

In [14]:
def get_next_user(current_user_idx):
    if current_user_idx >= matrix.shape[0]:
        current_user_idx = 0
        np.random.shuffle(user_indexes)
    
    next_user_id = user_indexes[current_user_idx]
    current_user_idx += 1
    return next_user_id, current_user_idx

In [15]:
current_user_idx = 0

In [16]:
for _ in range(num_users):
    user_id, current_user_idx = get_next_user(current_user_idx)
    print(user_id)

0
1
2
3


get_next_user関数内に`np.random.shuffle(user_indexes)`があるのでシャッフルされる

In [17]:
for _ in range(num_users):
    user_id, current_user_idx = get_next_user(current_user_idx)
    print(user_id)

1
3
0
2


In [18]:
# UNKNOWN_RATING_VAL = 0

In [19]:
# def get_uknown_items_of_user(user_id, UNKNOWN_RATING_VAL):
#     user_ratings = matrix[user_id]
#     unknown_item_ids = np.argwhere(user_ratings == UNKNOWN_RATING_VAL).flatten()
#     return unknown_item_ids

In [20]:
# user_id = 0
# unknown_item_ids = get_uknown_items_of_user(user_id, UNKNOWN_RATING_VAL)
# print(unknown_item_ids)

In [21]:
# if unknown_item_ids.size == 0:
#     print("User {} has no more unknown ratings, skipping him.".format(user_id))

In [22]:
titles = np.array(['Movie ' + str(i) for i in item_ids])
item_genres = movie_genres[genres_columns].to_numpy()
print('titles:', str(titles))
print('item_genres:\n', str(item_genres))

titles: ['Movie 1' 'Movie 2' 'Movie 3' 'Movie 4' 'Movie 5']
item_genres:
 [[0 0 1 1 1 1]
 [1 0 1 1 0 1]
 [0 1 0 1 1 0]
 [0 0 0 0 0 0]
 [0 1 1 0 1 1]]


In [23]:
d = item_genres.shape[1] + matrix.shape[1]
print(d)

11


In [24]:
b = np.zeros(shape=(num_items, d))
print(b)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [25]:
A = np.repeat(np.identity(d, dtype=float)[np.newaxis, :, :], num_items, axis=0)
print('length of A:', str(len(A)))
print(A[0])

length of A: 5
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [26]:
user_id = 0
user_id = user_id % num_users
user_features_vector = matrix[user_id]
print(user_features_vector)

[0. 0. 0. 1. 0.]


In [27]:
user_features_matrix = np.tile(user_features_vector, (num_items, 1))
print(user_features_matrix)

[[0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]]


In [28]:
item_features_matrix = item_genres
print(item_features_matrix)

[[0 0 1 1 1 1]
 [1 0 1 1 0 1]
 [0 1 0 1 1 0]
 [0 0 0 0 0 0]
 [0 1 1 0 1 1]]


In [29]:
arm_features_matrix = np.concatenate((user_features_matrix, item_features_matrix), axis=1)
print(arm_features_matrix)

[[0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1.]
 [0. 0. 0. 1. 0. 0. 1. 0. 1. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1.]]


In [30]:
p_t = np.zeros(shape=(arm_features_matrix.shape[0],), dtype=float)
print(p_t)

[0. 0. 0. 0. 0.]


In [31]:
# item_ids = unknown_item_ids
# print(item_ids)

In [32]:
item_ids = range(num_items)
print(item_ids)

range(0, 5)


In [33]:
item_id = item_ids[0]
print(item_id)

0


In [34]:
x_ta = arm_features_matrix[item_id].reshape(arm_features_matrix[item_id].shape[0], 1)
print(x_ta)

[[0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [35]:
A_a_inv = np.linalg.inv(A[item_id])
print(A_a_inv)

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [36]:
theta_a = A_a_inv.dot(b[item_id])
print(theta_a)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [37]:
alpha = 0.1

In [38]:
p_t[item_id] = theta_a.T.dot(x_ta) + alpha * np.sqrt(x_ta.T.dot(A_a_inv).dot(x_ta))
print(p_t[item_id])

0.223606797749979


In [39]:
max_p_t = np.max(p_t)
print(max_p_t)

0.223606797749979


In [40]:
max_idxs = np.argwhere(p_t == max_p_t).flatten()
print(max_idxs)

[0]


In [41]:
a_t = np.random.choice(max_idxs)
print(a_t)

0


In [55]:
x_t_at = arm_features_matrix[a_t].reshape(arm_features_matrix[a_t].shape[0], 1)
print(x_t_at)

[[0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [61]:
###
r_t = 1
###

In [62]:
A[a_t] = A[a_t] + x_t_at.dot(x_t_at.T)
print(A[a_t])

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 4. 0. 0. 0. 3. 3. 3. 3.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 3. 0. 0. 0. 4. 3. 3. 3.]
 [0. 0. 0. 3. 0. 0. 0. 3. 4. 3. 3.]
 [0. 0. 0. 3. 0. 0. 0. 3. 3. 4. 3.]
 [0. 0. 0. 3. 0. 0. 0. 3. 3. 3. 4.]]


In [63]:
b[a_t] = b[a_t] + r_t * x_t_at.flatten()
print(b[a_t])

[0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1.]


In [42]:
MIN_PROBABILITY = 0
UNKNOWN_RATING_VAL = 0
POSITIVE_RATING_VAL = 1
NEGATIVE_RATING_VAL = -1

In [43]:
user_id=user_id
item_id=a_t

In [44]:
item_genres_vec = item_genres[item_id]
print(item_genres_vec)

[0 0 1 1 1 1]


In [45]:
user_ratings = matrix[user_id]
print(user_ratings)

[0. 0. 0. 1. 0.]


In [46]:
user_pos_rat_idxs = np.argwhere(user_ratings == POSITIVE_RATING_VAL).flatten()
print(user_pos_rat_idxs)

[3]


In [47]:
user_neg_rat_idxs = np.argwhere(user_ratings == NEGATIVE_RATING_VAL).flatten()
print(user_neg_rat_idxs)

[]


In [48]:
num_known_ratings = len(user_pos_rat_idxs) + len(user_neg_rat_idxs)
print(num_known_ratings)

1


In [49]:
genre_idxs = np.argwhere(item_genres == 1).flatten()
print(genre_idxs)

[0 2 0 3 0 4 0 5 1 0 1 2 1 3 1 5 2 1 2 3 2 4 4 1 4 2 4 4 4 5]


In [50]:
genre_likabilities = []

In [51]:
genre_idxs

array([0, 2, 0, 3, 0, 4, 0, 5, 1, 0, 1, 2, 1, 3, 1, 5, 2, 1, 2, 3, 2, 4,
       4, 1, 4, 2, 4, 4, 4, 5], dtype=int64)

In [52]:
genre_idx = 0
item_idx = 3

In [53]:
genre_likability += item_genres[item_idx][genre_idx]
print(genre_likability)

NameError: name 'genre_likability' is not defined

In [None]:
for genre_idx in genre_idxs:
    genre_likability = 0
    for item_idx in user_pos_rat_idxs:
        genre_likability += item_genres[item_idx][genre_idx]
    for item_idx in user_neg_rat_idxs:
        genre_likability -= item_genres[item_idx][genre_idx]
    genre_likability /= num_known_ratings
    genre_likabilities.append(genre_likability)

genre_likabilities = np.array(genre_likabilities)
print(genre_likabilities)

In [None]:
def recommend(self, user_id, item_id, fixed_rewards=True, prob_reward_p=0.9):
    """
    Returns reward and updates rating maatrix self.R.
    :param fixed_rewards: Whether to always return 1/0 rewards for already rated items.
    :param prob_reward_p: Probability of returning the correct reward for already rated item.
    :return: Reward = either 0 or 1.
    """
    MIN_PROBABILITY = 0 # Minimal probability to like an item - adds stochasticity

    if self.R[user_id, item_id] == self.POSITIVE_RATING_VAL:
        if fixed_rewards:
            return 1
        else:
            return np.random.binomial(n=1, p=prob_reward_p)  # Bernoulli coin toss
    elif self.R[user_id, item_id] == self.NEGATIVE_RATING_VAL:
        if fixed_rewards:
            return 0
        else:
            return np.random.binomial(n=1, p=1-prob_reward_p)  # Bernoulli coin toss
    else:
        item_genres = self.item_genres[item_id]
        user_ratings = self.R[user_id]
        user_pos_rat_idxs = np.argwhere(user_ratings == self.POSITIVE_RATING_VAL).flatten()
        user_neg_rat_idxs = np.argwhere(user_ratings == self.NEGATIVE_RATING_VAL).flatten()
        num_known_ratings = len(user_pos_rat_idxs) + len(user_neg_rat_idxs)
        genre_idxs = np.argwhere(item_genres == 1).flatten()

        # Find how much user likes the genre of the recommended movie based on his previous ratings.
        genre_likabilities = []
        for genre_idx in genre_idxs:
            genre_likability = 0
            for item_idx in user_pos_rat_idxs:
                genre_likability += self.item_genres[item_idx][genre_idx]
            for item_idx in user_neg_rat_idxs:
                genre_likability -= self.item_genres[item_idx][genre_idx]
            genre_likability /= num_known_ratings
            genre_likabilities.append(genre_likability)

        genre_likabilities = np.array(genre_likabilities)

        # how much user user_id likes the genre of the recommended item item_id
        result_genre_likability = np.average(genre_likabilities)
        binomial_reward_probability = result_genre_likability
        if binomial_reward_probability <= 0:
            #print("User={}, item={}, genre likability={}".format(user_id, item_id, result_genre_likability))
            binomial_reward_probability = MIN_PROBABILITY # this could be replaced by small probability

        approx_rating = np.random.binomial(n=1, p=binomial_reward_probability)  # Bernoulli coin toss

        if approx_rating == 1:
            self.R[user_id, item_id] = self.POSITIVE_RATING_VAL
        else:
            self.R[user_id, item_id] = self.NEGATIVE_RATING_VAL

        #return approx_rating
        return approx_rating

In [None]:
def choose_arm(self, user_id, unknown_item_ids, verbosity):
    """
    Choose an arm to pull = item to recommend to user t that he did not rate yet.
    :param t: User_id of user to recommend to.
    :param unknown_item_ids: Indexes of items that user t has not rated yet.
    :return: Received reward for selected item = 1/0 = user liked/disliked item.
    """
    A = self.A
    b = self.b
    arm_features = self.dataset.get_features_of_current_arms(user_id=user_id)
    p_t = np.zeros(shape=(arm_features.shape[0],), dtype=float)
    p_t -= 9999  # I never want to select the already rated items
    item_ids = unknown_item_ids
    
    if self.allow_selecting_known_arms:
        item_ids = range(self.dataset.num_items)
        p_t += 9999
    
    for a in item_ids:  # iterate over all arms
        x_ta = arm_features[a].reshape(arm_features[a].shape[0], 1)  # make a column vector
        A_a_inv = np.linalg.inv(A[a])
        theta_a = A_a_inv.dot(b[a])
        p_t[a] = theta_a.T.dot(x_ta) + self.alpha * np.sqrt(x_ta.T.dot(A_a_inv).dot(x_ta))
    
    max_p_t = np.max(p_t)
    if max_p_t <= 0:
        print("User {} has max p_t={}, p_t={}".format(t, max_p_t, p_t))
    
    # I want to randomly break ties, np.argmax return the first occurence of maximum.
    # So I will get all occurences of the max and randomly select between them
    max_idxs = np.argwhere(p_t == max_p_t).flatten()
    a_t = np.random.choice(max_idxs)  # idx of article to recommend to user t
    
    # observed reward = 1/0
    r_t = self.dataset.recommend(user_id=t, item_id=a_t,
                                 fixed_rewards=self.fixed_rewards, prob_reward_p=self.prob_reward_p)
    
    if verbosity >= 2:
        print("User {} choosing item {} with p_t={} reward {}".format(t, a_t, p_t[a_t], r_t))
    
    x_t_at = arm_features[a_t].reshape(arm_features[a_t].shape[0], 1)  # make a column vector
    A[a_t] = A[a_t] + x_t_at.dot(x_t_at.T)
    b[a_t] = b[a_t] + r_t * x_t_at.flatten()  # turn it back into an array because b[a_t] is an array
    
    return r_t

In [None]:
rewards.append(choose_arm(user_id, unknown_item_ids, verbosity))