In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from linucb import LinUCB, eLinUCB, HybridLinUCB
import os
from tqdm.auto import tqdm
from IPython.display import display
from sklearn.preprocessing import LabelEncoder
import warnings

pd.set_option("display.max_columns", None)
warnings.filterwarnings('ignore')

%matplotlib inline

# Load Data

In [None]:
print(os.listdir("./datasets"))

dfs = []
for fname in os.listdir("./datasets"):
    print(fname)
    df = pd.read_csv(f"./datasets/{fname}", engine='python', low_memory=True)
    dfs.append(df)

In [None]:
users, movies, ratings = dfs
display(users.head())
print(f"="*35)
display(movies.head())
print(f"="*35)
display(ratings.head())

# Preprocess

## Users

In [None]:
## gender one-hot
users['gender'] = users['gender'].map(lambda x: 1 if x == "F" else 0)
users.head()

In [None]:
## age one-hot
bins = [0, 20, 30, 40, 50, 60, np.inf]
names = ['<20', '20-29', '30-39','40-49', '51-60', '60+']
labels = [i for i in range(len(names))]

name_label = dict()
for name, label in zip(names, labels):
    name_label[name] = label

print(name_label)
users['agegroup'] = pd.cut(users['age'], bins=bins, labels=names)
users['agegroup'] = users['agegroup'].map(name_label)
users.head()

In [None]:
users.drop('age', axis=1, inplace=True)

age_onehot = np.zeros(shape=(users.shape[0], users['agegroup'].nunique()), dtype=np.uint8)
for i in range(users.shape[0]):
    group_idx = users['agegroup'].iloc[i]
    age_onehot[i][group_idx] = 1

for j in range(users['agegroup'].nunique()):
    users[f"agegroup_{j}"] = age_onehot[:, j]

users.drop('agegroup', axis=1, inplace=True)
users.head()

In [None]:
## occupation one-hot
occupation_onehot = np.zeros(shape=(users.shape[0], users['occupation'].nunique()), dtype=np.uint8)
for i in range(users.shape[0]):
    group_idx = users['occupation'].iloc[i]
    occupation_onehot[i][group_idx] = 1

for j in range(users['occupation'].nunique()):
    users[f"occupation_{j}"] = occupation_onehot[:, j]

users.drop('occupation', axis=1, inplace=True)
users.head()

In [None]:
## drop zipcode
users.drop('zipcode', axis=1, inplace=True)
users.head()

## Movies

In [None]:
## drop title
movies.drop('title', axis=1, inplace=True)

In [None]:
genres = []
for i in range(movies.shape[0]):
    genre_string = movies['genre'].iloc[i]
    genre_split = genre_string.split(', ')
    for g in genre_split:
        if g not in genres:
            genres.append(g)
genres = sorted(genres)
print(genres)

In [None]:
## genre one-hot
genre_onehot = np.zeros(shape=(movies.shape[0], len(genres)), dtype=np.uint8)
for i in range(movies.shape[0]):
    g_split = movies['genre'].iloc[i].split(', ')
    for g in g_split:
        idx = genres.index(g)
        genre_onehot[i, idx] = 1

for j in range(len(genres)):
    genre_name = genres[j]
    movies[f"is_{genre_name}"] = genre_onehot[:, j]

movies.drop('genre', axis=1, inplace=True)
movies.head()

## Ratings

In [None]:
ratings.drop('timestamp', axis=1, inplace=True)
ratings['reward'] = ratings['ratings'].map(lambda x: 1 if x > 4 else 0)
ratings.drop(['ratings'], axis=1, inplace=True)

In [None]:
# user_mean = ratings[['userid', 'ratings']].groupby(by='userid').mean()
# user_mean.reset_index(drop=False, inplace=True)
# user_mean.head()

In [None]:
# ratings = pd.merge(left=ratings, right=user_mean, on='userid', how='left')
# ratings.head()

In [None]:
# ratings['reward'] = (ratings['ratings_x'] > ratings['ratings_y']).astype(np.uint8)
# ratings.drop(['ratings_x', 'ratings_y'], axis=1, inplace=True)
# print(ratings.shape)
ratings.head()

In [None]:
n = 100

top_movies = ratings[["movieid", "userid"]].groupby(by="movieid").count().sort_values(by=["userid"], ascending=False)
top_movies.reset_index(drop=False, inplace=True)
top_movies.head()
top_n_movies = movies[movies['movieid'].isin(top_movies.head(n)['movieid'])]
top_n_movies.sort_values(by='movieid', inplace=True)
top_n_movies.reset_index(drop=True, inplace=True)
top_n_movies.head()

In [None]:
top_n_ratings = ratings[ratings['movieid'].isin(top_n_movies['movieid'])]
top_n_ratings.reset_index(drop=True, inplace=True)
print(top_n_ratings.shape)
top_n_ratings.head()

# LinUCB Simulation

In [None]:
def run(learner, data, arms, users, nsim):
    arm_features = arms.shape[1]
    user_features = users.shape[1] - 1
    d = arm_features + user_features
    
    aligned_ctr = []
    aligned_timestep = 0
    cum_reward = 0
    
    arm_list = arms['movieid']
    for sim in range(nsim):
        if sim == 0:
            data = data.copy()
            unused = []
        else:
            data = data.iloc[unused].copy()
            unused = []
        cnt = data.shape[0]
        for i in tqdm(range(data.shape[0])):
            user_id = data['userid'].iloc[i]
            movie_id = data['movieid'].iloc[i]
            user_feature = users[users['userid'] == user_id].iloc[:, 1:].to_numpy()
            chosen_arm = learner.choose(user_feature)
            if arm_list[chosen_arm] == movie_id:
                reward = data['reward'].iloc[i]
                aligned_timestep += 1
                cum_reward += reward
                aligned_ctr.append((cum_reward / aligned_timestep))
                learner.update(user_feature, chosen_arm, reward)
            else:
                unused.append(i)
        print(f"Hit count: {cnt - len(unused)}")
    
    return {
        "aligned_ctr": aligned_ctr,
        "aligned_timestamp": aligned_timestep,
        "cum_reward": cum_reward,
    }

In [None]:
arm_to_use = top_n_movies.iloc[:, 1:].to_numpy()
arm_features = movies.shape[1] - 1
user_features = users.shape[1] - 1
d = arm_features + user_features
k = arm_features * user_features
alphas = [0., 0.5, 1., 2.]
reward_mean = top_n_ratings["reward"].mean()
print(f"Mean reward: {reward_mean}")

In [None]:
plt.figure(figsize=(8, 6))

for i, alpha in enumerate(alphas):
    print(f"alpha={alpha}")
    learner = LinUCB(arms=arm_to_use, d=d, alpha=alpha)
    result = run(
        learner=learner, 
        data=top_n_ratings, 
        arms=top_n_movies,
        users=users,
        nsim=2
    )

    plt.plot(result['aligned_ctr'], label=f"alpha={alpha}")
plt.axhline(y=reward_mean, color="red")
plt.ylim([reward_mean-0.3, 1.05])
plt.title(f"{learner.__class__.__name__}")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))

for i, alpha in enumerate(alphas):
    print(f"alpha={alpha}")
    learner = eLinUCB(arms=arm_to_use, d=d, alpha=alpha, epsilon=0.3)
    result = run(
        learner=learner, 
        data=top_n_ratings, 
        arms=top_n_movies,
        users=users,
        nsim=2
    )

    plt.plot(result['aligned_ctr'], label=f"alpha={alpha}")
plt.axhline(y=reward_mean, color="red")
plt.ylim([reward_mean-0.3, 1.05])
plt.title(f"{learner.__class__.__name__}")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))

for i, alpha in enumerate(alphas):
    print(f"alpha={alpha}")
    learner = HybridLinUCB(arms=arm_to_use, d=d, k=k, alpha=alpha)
    result = run(
        learner=learner,
        data=top_n_ratings, 
        arms=top_n_movies,
        users=users,
        nsim=2
    )

    plt.plot(result['aligned_ctr'], label=f"alpha={alpha}")
plt.axhline(y=reward_mean, color="red")
plt.ylim([reward_mean-0.3, 1.05])
plt.title(f"{learner.__class__.__name__}")
plt.grid(True)
plt.legend()
plt.show()