In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(42)

In [3]:
session_ids = np.arange(1, 6)
item_ids = np.arange(1, 6)
genres_columns = ['Action', 'Comedy', 'Drama', 'Mystery', 'Romance', 'Thriller']
time_stamps = np.random.rand(20)

In [4]:
genre_data = np.random.randint(0, 2, size=(5, len(genres_columns)))
movie_genres = pd.DataFrame(genre_data, columns=genres_columns)
movie_genres.insert(0, 'ItemId', item_ids)

print('length of movie_genres:', str(len(movie_genres)))
print(movie_genres)

length of movie_genres: 5
   ItemId  Action  Comedy  Drama  Mystery  Romance  Thriller
0       1       0       0      1        1        1         1
1       2       1       0      1        1        0         1
2       3       0       1      0        1        1         0
3       4       0       0      0        0        0         0
4       5       0       1      1        0        1         1


In [5]:
rating_data = np.random.choice(item_ids, size=(20, 1), replace=True)
rating_data = np.hstack([np.random.choice(session_ids, size=(20, 1)), rating_data, time_stamps.reshape(-1, 1)])
rating_df = pd.DataFrame(rating_data, columns=['SessionId', 'ItemId', 'Time'])
rating_df = rating_df.drop_duplicates(subset=['SessionId', 'ItemId'])

rating_df['SessionId'] = rating_df['SessionId'].astype(int)
rating_df['ItemId'] = rating_df['ItemId'].astype(int)

print('length of rating_df:', str(len(rating_df)))
print(rating_df.head(5))

length of rating_df: 11
   SessionId  ItemId      Time
0          2       2  0.374540
2          4       1  0.731994
4          2       5  0.156019
5          4       2  0.155995
6          4       4  0.058084


In [6]:
user_id_mapping = {id: index for index, id in enumerate(sorted(rating_df['SessionId'].unique()))}
item_id_mapping = {id: index for index, id in enumerate(sorted(rating_df['ItemId'].unique()))}
print('user_id_mapping:', str(user_id_mapping))
print('item_id_mapping:', str(item_id_mapping))

user_id_mapping: {1: 0, 2: 1, 4: 2, 5: 3}
item_id_mapping: {1: 0, 2: 1, 3: 2, 4: 3, 5: 4}


In [7]:
num_users = len(user_id_mapping)
num_items = len(item_id_mapping)
print('num_users:', str(num_users))
print('num_items:', str(num_items))

num_users: 4
num_items: 5


In [8]:
matrix = np.zeros((num_users, num_items))
for _, row in rating_df.iterrows():
    user_id = user_id_mapping[row['SessionId']]
    item_id = item_id_mapping[row['ItemId']]
    matrix[user_id, item_id] = 1
print('matrix:\n', matrix)

matrix:
 [[0. 0. 0. 1. 0.]
 [1. 1. 0. 0. 1.]
 [1. 1. 0. 1. 0.]
 [0. 1. 1. 1. 1.]]


In [9]:
user_indexes = np.array(range(matrix.shape[0]))

In [10]:
def get_next_user(current_user_idx):
    if current_user_idx >= matrix.shape[0]:
        current_user_idx = 0
        # np.random.shuffle(user_indexes)
    
    next_user_id = user_indexes[current_user_idx]
    current_user_idx += 1
    return next_user_id, current_user_idx

In [11]:
item_genres = movie_genres[genres_columns].to_numpy()

In [12]:
alpha = 0.1

In [13]:
def choose_arm(user_id, A, b):
    # user_id = user_id % num_users
    user_features_vector = matrix[user_id]
    user_features_matrix = np.tile(user_features_vector, (num_items, 1))
    item_features_matrix = item_genres
    
    arm_features =np.concatenate((user_features_matrix, item_features_matrix), axis=1)
    p_t = np.zeros(shape=(arm_features.shape[0],), dtype=float)
    item_ids = range(num_items)

    for a in item_ids:
        x_ta = arm_features[a].reshape(arm_features[a].shape[0], 1)
        A_a_inv = np.linalg.inv(A[a])
        theta_a = A_a_inv.dot(b[a])
        p_t[a] = theta_a.T.dot(x_ta) + alpha * np.sqrt(x_ta.T.dot(A_a_inv).dot(x_ta))

    max_p_t = np.max(p_t)
    max_idxs = np.argwhere(p_t == max_p_t).flatten()
    a_t = np.random.choice(max_idxs)
    
    r_t = recommend(user_id=user_id, item_id=a_t)

    x_t_at = arm_features[a_t].reshape(arm_features[a_t].shape[0], 1)
    A[a_t] = A[a_t] + x_t_at.dot(x_t_at.T)
    b[a_t] = b[a_t] + r_t * x_t_at.flatten()

    return r_t

In [14]:
current_user_idx = 0

In [15]:
def run_epoch():
    rewards = []
    current_user_idx = 0

    d = item_genres.shape[1] + matrix.shape[1]
    b = np.zeros(shape=(num_items, d))
    A = np.repeat(np.identity(d, dtype=float)[np.newaxis, :, :], num_items, axis=0)
    
    for i in range(num_users):
        user_id, current_user_idx = get_next_user(current_user_idx)
        
        reward = choose_arm(user_id, A, b)
        rewards.append(reward)

    avg_reward = np.average(np.array(rewards))
    return avg_reward

In [16]:
def recommend(user_id, item_id):
    if matrix[user_id, item_id] == 1:
        return 1
    elif matrix[user_id, item_id] == 0:
        return 0

In [17]:
num_epochs = 3
avg_rewards = np.zeros(shape=(num_epochs,), dtype=float)

In [18]:
for i in range(num_epochs):
    avg_rewards[i] = run_epoch()

In [19]:
avg_rewards

array([0.5 , 0.75, 0.5 ])

In [20]:
matrix

array([[0., 0., 0., 1., 0.],
       [1., 1., 0., 0., 1.],
       [1., 1., 0., 1., 0.],
       [0., 1., 1., 1., 1.]])

In [49]:
num_epochs = 3
avg_rewards = np.zeros(shape=(num_epochs,), dtype=float)

In [50]:
alpha = 0.1

In [56]:
for epoc in range(num_epochs):
    rewards = []
    current_user_idx = 0

    d = item_genres.shape[1] + matrix.shape[1]
    b = np.zeros(shape=(num_items, d))
    A = np.repeat(np.identity(d, dtype=float)[np.newaxis, :, :], num_items, axis=0)
    
    for i in range(num_users):
        if current_user_idx >= matrix.shape[0]:
            current_user_idx = 0
            # np.random.shuffle(user_indexes)
        next_user_id = user_indexes[current_user_idx]
        current_user_idx += 1

        user_id = next_user_id
        # user_id = user_id % num_users
        user_features_vector = matrix[user_id]
        user_features_matrix = np.tile(user_features_vector, (num_items, 1))
        item_features_matrix = item_genres
        
        arm_features =np.concatenate((user_features_matrix, item_features_matrix), axis=1)
        p_t = np.zeros(shape=(arm_features.shape[0],), dtype=float)
        item_ids = range(num_items)
    
        for a in item_ids:
            x_ta = arm_features[a].reshape(arm_features[a].shape[0], 1)
            A_a_inv = np.linalg.inv(A[a])
            theta_a = A_a_inv.dot(b[a])
            p_t[a] = theta_a.T.dot(x_ta) + alpha * np.sqrt(x_ta.T.dot(A_a_inv).dot(x_ta))
    
        max_p_t = np.max(p_t)
        max_idxs = np.argwhere(p_t == max_p_t).flatten()
        a_t = np.random.choice(max_idxs)
        if matrix[user_id, a_t] == 1:
            r_t = 1
        elif matrix[user_id, a_t] == 0:
            r_t = 0
        x_t_at = arm_features[a_t].reshape(arm_features[a_t].shape[0], 1)
        A[a_t] = A[a_t] + x_t_at.dot(x_t_at.T)
        b[a_t] = b[a_t] + r_t * x_t_at.flatten()

        rewards.append(r_t)

    avg_reward = np.average(np.array(rewards))

0
1
0
1
0
1
1
1
0
1
0
1


In [54]:
avg_reward

0.5