In [1]:
!pip install -Uqq fastbook
#!pip install graphviz
import fastbook
from fastbook import *
from fastai.collab import *
from fastai.tabular.all import *  
import torch
torch.cuda.get_device_name(torch.cuda.current_device())
# Note: make sure GPU is enabled

'Tesla T4'

# RecSys Model

This is a FastAI recommender systems model based on collaboritive filtering (“Users who preferred this item also preferred”) and explicit feedback (rating). We need a user-uploaded csv dataset that contains users, items, and ratings to build the model.

If it is implicit feedback (only user-item interactions are considered), just append a column of constants to be the ratings. 

Load our demo 10k movielens data. Columns=['user', 'movie', 'rating', 'timestamp', 'title']

In [2]:
path = untar_data(URLs.ML_100k)
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','movie','rating','timestamp'])
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


Through the MLaaS platform, we are taking a well-cleaned dataset that has the features "user" (str), "item" (str), and "rating" (int/float). The user needs to specify the column names; the range of the ratings, eg. 0-5 for movies; and k=? for the top k items to recommend. 

In [3]:
# Define some constants
USER, ITEM, RATING, PREDICTION = 'user', 'title', 'rating', 'pred' # the first three requires user input
Y_RANGE = (0, 5.5) # adding 0.5 to the maximum rating for better performance

# Top k items to recommend
TOP_K = 10

Make sure the IDs are loaded as stirngs to prevent confusion with embedding IDs

In [4]:
ratings[USER] = ratings[USER].astype('str')
ratings[ITEM] = ratings[ITEM].astype('str') 
# For interpretability, we are using movie titles instead of movie IDs as items, which are already strings

Split into train and test sets. The splitting ratio is (.75, .25). This is a random split. Depending on the data, we can also work on stratified and chono split in the future. 

In [5]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(ratings, test_size=0.25, random_state=42)

In [6]:
def get_model(dls, n_factors, layers, max_learning_rate, n_epoch, weight_decay):
  learn = collab_learner(dls, n_factors=n_factors, use_nn=True, y_range=Y_RANGE, layers=layers)
  learn.fit_one_cycle(n_epoch=n_epoch, lr_max=max_learning_rate, wd=weight_decay)
  return learn

In [7]:
dls = CollabDataLoaders.from_df(train, user_name=USER, item_name=ITEM, rating_name=RATING, seed=42, bs=64)
# Train a sample model
model = get_model(dls, n_factors=40, layers=[120, 60, 30], max_learning_rate=5e-3, n_epoch=5, weight_decay=0.1 )

epoch,train_loss,valid_loss,time
0,1.002506,0.98314,00:07
1,0.941453,0.919471,00:07
2,0.881236,0.899451,00:07
3,0.84102,0.878275,00:07
4,0.764742,0.880206,00:07


@source [microsoft recommenders / fastai_movielens.ipynb](https://github.com/microsoft/recommenders/blob/main/examples/00_quick_start/fastai_movielens.ipynb)

In [8]:
def get_test_df(learn, test, item_col=ITEM, user_col=USER, rating_col=RATING):
  ''' 
  Return all user-item combinations where the user is known to the model but the combinations are unknown (pandas.DataFrame). 
  
  Args:
      learn (object): Model.
      test (pandas.DataFrame): Test dataset from train-test split.
      user_col (str): User column name.
      item_col (str): Item column name.
      rating_col (str): Rating column name.
  '''

  # get items and users known to the model
  total_items = learn.dls.classes[item_col][1:]
  total_users = learn.dls.classes[user_col][1:]

  # get all users from the test set and remove any users not in the train set
  test_users = test[user_col].unique()
  test_users = np.intersect1d(test_users, total_users)

  # build the cartesian product of test set users and all items known to the model
  users_items = cartesian_product(np.array(test_users),np.array(total_items))
  users_items = pd.DataFrame(users_items, columns=[user_col, item_col])

  # remove the user/items combinations that are in the train set; we don't want to propose a movie that the user has already watched.
  training_removed = pd.merge(users_items, train.astype('str'), on=[user_col, item_col], how='left')
  training_removed = training_removed[training_removed[rating_col].isna()][[user_col, item_col]]

  return training_removed

def cartesian_product(x, y):
  '''a helper function that returns the cartesian product of two numpy arrays'''
  return np.transpose([np.tile(x, len(y)), np.repeat(y, len(x))])


@source [microsoft recommenders / fastai_utils.py](https://github.com/microsoft/recommenders/blob/main/reco_utils/recommender/fastai/fastai_utils.py)

In [9]:
def score(learn, test_df, item_col=ITEM, user_col=USER, pred_col=PREDICTION, top_k=TOP_K):
  '''
  Return recommendation result (pandas.DataFrame)

  Args:
      learn (object): Model.
      test_df (pandas.DataFrame): Test dataframe from get_test_df().
      user_col (str): User column name.
      item_col (str): Item column name.
      pred_col (str): Prediction column name.
      top_k (int): Number of top items to recommend.
  '''
  # get prediction as a numpy array
  dl = learn.dls.test_dl(test_df)
  pred_tensor,_ = learn.get_preds(dl=dl)
  pred = pred_tensor.numpy().flatten()

  scores = pd.DataFrame(
        {user_col: test_df[user_col], item_col: test_df[item_col], pred_col: pred}
    )
  scores = scores.sort_values([user_col, pred_col], ascending=[True, False])
  if top_k==TOP_K:
      top_scores = scores.groupby(user_col).head(top_k).reset_index(drop=True)
  else:
      top_scores = scores
  return top_scores

Use our sample model to take a look at our top-k prediction pandas dataframe.

In [10]:
test_df = get_test_df(model, test)
pred_df = score(model, test_df)
pred_df.head()

Unnamed: 0,user,title,pred
0,1,Schindler's List (1993),4.614772
1,1,Rear Window (1954),4.556983
2,1,Casablanca (1942),4.549874
3,1,"Close Shave, A (1995)",4.53476
4,1,"Ruling Class, The (1972)",4.504482


## Evaluation, step by step

In [11]:
# Make sure the prediction and true data frames have the same set of users
common_users = set(test[USER]).intersection(set(pred_df[USER]))
rating_true_common = test[test[USER].isin(common_users)]
rating_pred_common = pred_df[pred_df[USER].isin(common_users)]
# n_users = len(common_users)

df_hit = pred_df.copy()
df_hit['rank'] = df_hit.groupby(USER, sort=False).cumcount() + 1
df_hit = pd.merge(df_hit, rating_true_common, on=[USER, ITEM])[
        [USER, ITEM, "rank"]
    ]
df_hit

Unnamed: 0,user,title,rank
0,1,"Empire Strikes Back, The (1980)",8
1,10,Casablanca (1942),1
2,10,"Shawshank Redemption, The (1994)",2
3,10,Star Wars (1977),3
4,101,Star Wars (1977),6
...,...,...,...
712,96,Schindler's List (1993),1
713,96,"Shawshank Redemption, The (1994)",2
714,96,"Godfather, The (1972)",10
715,97,"Silence of the Lambs, The (1991)",5


In [12]:
n_users = len(common_users)
df_hit_count = pd.merge(
    df_hit.groupby(USER, as_index=False)[USER].agg({"hit": "count"}),
    test.groupby(USER, as_index=False)[USER].agg(
        {"actual": "count"}
    ),
    on=USER,
)
df_hit_count.head()

Unnamed: 0,user,hit,actual
0,1,1,55
1,10,3,56
2,101,1,13
3,103,1,12
4,106,2,19


In [13]:
test.groupby(USER, as_index=False)[USER].agg(
        {"actual": "count"})

Unnamed: 0,user,actual
0,1,55
1,10,56
2,100,19
3,101,13
4,102,45
...,...,...
938,95,67
939,96,16
940,97,15
941,98,6


In [14]:
df_hit.groupby(USER, as_index=False)[USER].agg({"hit": "count"})

Unnamed: 0,user,hit
0,1,1
1,10,3
2,101,1
3,103,1
4,106,2
...,...,...
409,942,1
410,943,1
411,96,3
412,97,1


In [15]:
df_hit.shape[0]

717

In [16]:
(df_hit_count["hit"] / TOP_K).sum() / n_users

0.07603393425238603

In [17]:
(df_hit_count["hit"] / df_hit_count["actual"]).sum() / n_users

0.028887843546493586

## Evaluation of Model Performance

@source [microsoft recommenders / python_evaluation.py](https://github.com/microsoft/recommenders/blob/main/reco_utils/evaluation/python_evaluation.py)

In [18]:
def merge_test_pred(test, pred_df):
  '''Filter truth and prediction data frames on common users
      test (pandas.DataFrame): True DataFrame from train-test split
      pred_df (pandas.DataFrame): Predicted DataFrame
    Returns:
      pandas.DataFrame, pandas.DataFrame, int: DataFrame of recommendation hits, sorted by `col_user` and `rank`
      DataFrmae of hit counts vs actual relevant items per user number of unique user ids
  '''
  # Make sure the prediction and true data frames have the same set of users
  common_users = set(test[USER]).intersection(set(pred_df[USER]))
  rating_true_common = test[test[USER].isin(common_users)]
  rating_pred_common = pred_df[pred_df[USER].isin(common_users)]
  n_users = len(common_users)

  df_hit = pred_df.copy()
  df_hit['rank'] = df_hit.groupby(USER, sort=False).cumcount() + 1
  df_hit = pd.merge(df_hit, rating_true_common, on=[USER, ITEM])[
          [USER, ITEM, "rank"]
      ]

  df_hit_count = pd.merge(
    df_hit.groupby(USER, as_index=False)[USER].agg({"hit": "count"}),
    test.groupby(USER, as_index=False)[USER].agg(
        {"actual": "count"}
    ),
    on=USER,
  )
  
  return df_hit, df_hit_count, n_users

def precision_at_k(test, pred_df):
  '''Precision at K.
    Note:
        We use the same formula to calculate precision@k as that in Spark.
        More details can be found at
        http://spark.apache.org/docs/2.1.1/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.precisionAt
        In particular, the maximum achievable precision may be < 1, if the number of items for a
        user in rating_pred is less than k.
    Args:
      test (pandas.DataFrame): True DataFrame from train-test split
      pred_df (pandas.DataFrame): Predicted DataFrame
    Returns:
      float: precision at k (min=0, max=1)
    '''
  df_hit, df_hit_count, n_users = merge_test_pred(test, pred_df)
    
  if df_hit.shape[0] == 0:
    return 0.0

  return (df_hit_count["hit"] / TOP_K).sum() / n_users

def recall_at_k(test, pred_df):
  """Recall at K.
  Args:
      test (pandas.DataFrame): True DataFrame from train-test split
      pred_df (pandas.DataFrame): Predicted DataFrame
  Returns:
      float: recall at k (min=0, max=1). The maximum value is 1 even when fewer than 
      k items exist for a user in rating_true.
  """
  df_hit, df_hit_count, n_users = merge_test_pred(test, pred_df)

  if df_hit.shape[0] == 0:
    return 0.0

  return (df_hit_count["hit"] / df_hit_count["actual"]).sum() / n_users

def F1_Score(test, pred_df):
  precision = precision_at_k(test, pred_df)
  recall = recall_at_k(test, pred_df)

  return 2 * ((precision * recall) / (precision + recall))

In [19]:
#precision_at_k(test, pred_df)

In [20]:
#recall_at_k(test, pred_df)

In [21]:
F1_Score(test, pred_df)

0.04186845558636277

# Auto Tuning

Tunable hyperparameters: 'n_factors', 'layers', 'max_learning_rate', 'n_epoch', 'weight_decay'.

In [22]:
def generate_layers():
  n_layers = np.random.randint(2, 5)
  top_layer = np.random.choice(np.arange(100, 260, 20))
  layers = [top_layer]
  for i in np.arange(n_layers):
    if top_layer >= 60:
      top_layer = int(top_layer / 2)
      layers.append(top_layer)
  return layers

def generate_factors():
  return np.random.randint(35, 65)

def generate_epoch():
  return np.random.randint(4, 8)

def generate_max_lr():
  m = np.log(max(Y_RANGE)/1000)
  return np.random.choice(np.logspace(m,-1,num=50)).item()

def generate_wd():
  return np.random.uniform(0.01, 1)

def generate_random_comb(n_comb):
  n = max(2, n_comb**(1/5))
  parameters = [
  {"id": "n_factors", "values": [generate_factors() for i in np.arange(n)]},
  {"id": "layers", "values": [generate_layers() for i in np.arange(n)]},
  {"id": "n_epoch", "values": [generate_epoch() for i in np.arange(n)]},
  {"id": "max_learning_rate", "values": [generate_max_lr() for i in np.arange(n)]},
  {"id": "weight_decay", "values": [generate_wd() for i in np.arange(n)]}
  ] 
  param_id = [parameter['id'] for parameter in parameters]
  param_values = [parameter['values'] for parameter in parameters]
  value_pairs = list(itertools.product(*param_values))
  combinations =  [dict(zip(param_id, value)) for value in value_pairs]
  return np.random.choice(combinations, size=n_comb, replace=False)

@source [microsoft recommenders / fasti_utils](https://github.com/microsoft/recommenders/blob/main/reco_utils/recommender/fastai/fastai_utils.py)

In [23]:
# This function is not working properly; progress bars are not suppressed
import fastai
import fastprogress
from fastprogress.fastprogress import force_console_behavior
from fastprogress.fastprogress import progress_bar,master_bar

def hide_fastai_progress_bar():
    """Hide fastai progress bar"""
    fastprogress.fastprogress.NO_BAR = True
    fastprogress.fastprogress.WRITER_FN = str
    master_bar, progress_bar = force_console_behavior()
    master_bar, progress_bar = (
        master_bar,
        progress_bar,
    )

In [24]:
def random_search(train, test, n_comb, evaluate=F1_Score):
  '''Args:
      train: Train set from train-test split
      test: Test set from train-test split
      n_comb: Number of combinations of hyperparameters
      evaluate: Evaluation function, default to F1_Score
    Returns:
      The model that performs the best, its hyperparameters, and its evalation score based on the evaluation function
  '''

  dls = CollabDataLoaders.from_df(train, user_name=USER, item_name=ITEM, rating_name=RATING, seed=42, bs=64)
  combinations = generate_random_comb(n_comb)

  hide_fastai_progress_bar()
  
  models = []
  scores = []
  for i in np.arange(len(combinations)):
    combo = combinations[i] 
    learner = get_model(dls, 
                  n_factors=combo['n_factors'], 
                  layers=combo['layers'], 
                  max_learning_rate=combo['max_learning_rate'], 
                  n_epoch=combo['n_epoch'], 
                  weight_decay=combo['weight_decay'])
    models.append(learner)
    
    test_df = get_test_df(learner, test)
    pred_df = score(learner, test_df)
    f1_score = F1_Score(test, pred_df)
    scores.append(f1_score)

  best_param_idx = np.array(scores).argmax()
  
  return models[best_param_idx], combinations[best_param_idx], scores[best_param_idx]


In [25]:
learn, params, f1_score = random_search(train, test, 10)

epoch,train_loss,valid_loss,time
0,1.070942,1.042757,00:08
1,1.005176,1.04625,00:07
2,0.964906,0.936328,00:07
3,0.857576,0.889376,00:08


epoch,train_loss,valid_loss,time
0,1.039809,1.045739,00:08
1,0.996393,1.013684,00:08
2,0.913293,0.935595,00:08
3,0.870046,0.894166,00:07


epoch,train_loss,valid_loss,time
0,1.025604,1.009889,00:08
1,0.971079,0.946757,00:08
2,0.874647,0.897981,00:08
3,0.79793,0.884507,00:08


epoch,train_loss,valid_loss,time
0,1.252633,1.189978,00:08
1,0.928001,0.966802,00:08
2,0.771807,0.955246,00:08
3,0.581241,0.971724,00:08


epoch,train_loss,valid_loss,time
0,1.038777,0.984982,00:07
1,0.969052,0.952445,00:08
2,0.892304,0.891971,00:08
3,0.826514,0.882448,00:07


epoch,train_loss,valid_loss,time
0,1.19503,1.142655,00:08
1,0.959111,0.963414,00:07
2,0.755897,0.95867,00:07
3,0.55452,0.972416,00:08


epoch,train_loss,valid_loss,time
0,1.227055,1.178494,00:07
1,0.893448,0.964283,00:07
2,0.763295,0.952042,00:07
3,0.528098,0.962554,00:07


epoch,train_loss,valid_loss,time
0,1.074532,1.053127,00:08
1,1.016605,0.999673,00:08
2,0.947769,0.947103,00:08
3,0.904851,0.888657,00:08


epoch,train_loss,valid_loss,time
0,1.292522,1.220792,00:08
1,0.918336,0.962576,00:08
2,0.743158,0.95652,00:08
3,0.586185,0.96726,00:08


epoch,train_loss,valid_loss,time
0,1.185246,1.116771,00:07
1,0.933953,0.979543,00:07
2,0.707064,0.960683,00:07
3,0.50417,0.976959,00:08


In [26]:
params

{'layers': [240, 120, 60, 30],
 'max_learning_rate': 0.011388532733495037,
 'n_epoch': 4,
 'n_factors': 41,
 'weight_decay': 0.4646564030462085}

In [27]:
f1_score

0.05399485115338404

Using EmbeddingDotBias and the paramters from the microsoft example. Got higher f_score.

In [28]:
non_nn = collab_learner(dls, n_factors=40, y_range=[0,5.5], wd=1e-1)
non_nn.fit_one_cycle(5, 5e-3)
test_df = get_test_df(non_nn, test)
pred_df = score(non_nn, test_df)
F1_Score(test, pred_df)

epoch,train_loss,valid_loss,time
0,0.987687,0.972858,00:05
1,0.867035,0.881681,00:05
2,0.723627,0.852629,00:05
3,0.602197,0.843186,00:05
4,0.503336,0.844614,00:05


0.07633238366502201

Things to explore next: 
- Does class EmbeddingDotBias consistently outperform class EmbeddingNN (use_nn=True)?
- Write GridSearch method for class EmbeddingNN
- Write GridSearch and/or RandomSearch methods for class EmbeddingDotBias too and compare with the best EmbeddingNN model; take the better of the two
- Content-based and hybrid model when given datasets that contain different kinds of information
- We manipulate data using pandas, which may not be appropriate for larger datasets