In [63]:
from fastai.collab import *
from fastai.tabular.all import *
set_seed(42)

In [64]:
path = untar_data(URLs.ML_100k)

### Preprocessing Our Data

In [65]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user','movie','rating','timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [66]:
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1', usecols=(0,1), names=('movie','title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [67]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


In [68]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,782,Starship Troopers (1997),2
1,943,Judge Dredd (1995),3
2,758,Mission: Impossible (1996),4
3,94,Farewell My Concubine (1993),5
4,23,Psycho (1960),4
5,296,Secrets & Lies (1996),5
6,940,"American President, The (1995)",4
7,334,Star Trek VI: The Undiscovered Country (1991),1
8,380,Braveheart (1995),4
9,690,So I Married an Axe Murderer (1993),1


In [69]:
n_users  = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [70]:
one_hot_3 = one_hot(3, n_users).float()

In [71]:
user_factors.t() @ one_hot_3

tensor([-0.4586, -0.9915, -0.4052, -0.3621, -0.5908])

### Using Fast.AI Collab Learner

In [72]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))

In [73]:
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.876221,0.937569,00:07
1,0.67471,0.884032,00:07
2,0.516745,0.865782,00:07
3,0.444451,0.852322,00:08
4,0.432781,0.848564,00:07


In [74]:
learn.model

EmbeddingDotBias(
  (u_weight): Embedding(944, 50)
  (i_weight): Embedding(1665, 50)
  (u_bias): Embedding(944, 1)
  (i_bias): Embedding(1665, 1)
)

In [75]:
movie_bias = learn.model.i_bias.weight.squeeze()
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]

['Titanic (1997)',
 "Schindler's List (1993)",
 'Shawshank Redemption, The (1994)',
 'As Good As It Gets (1997)',
 'Boot, Das (1981)']

### Embedding Distance

In [76]:
movie_factors = learn.model.i_weight.weight
idx = dls.classes['title'].o2i['Star Wars (1977)']
distances = nn.CosineSimilarity(dim=1)(movie_factors, movie_factors[idx][None])
idx = distances.argsort(descending=True)[1]
dls.classes['title'][idx]

'Empire Strikes Back, The (1980)'

### Deep Learning For Collaborative Filtering

In [77]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [78]:
learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100,50])
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.973642,0.974152,00:09
1,0.893988,0.916082,00:09
2,0.818448,0.886796,00:09
3,0.752589,0.878665,00:09
4,0.744945,0.873627,00:09


In [91]:
def recommend_movies(user_id, topn=10):
    user_movies = ratings.loc[ratings['user'] == user_id, 'title'].unique()
    all_movies = list(dls.classes['title'])

    candidates = [m for m in all_movies if m not in user_movies]

    preds = []
    device = next(learn.model.parameters()).device

    for m in candidates:
        u = dls.classes['user'].o2i[user_id]
        i = dls.classes['title'].o2i[m]

        if hasattr(learn.model, 'u_weight'):
            score = (learn.model.i_weight.weight[i] @ learn.model.u_weight.weight[u]) \
                    + learn.model.i_bias.weight[i] + learn.model.u_bias.weight[u]
            preds.append((m, score.item()))

        else:
            x_cat = torch.tensor([[u, i]], device=device)
            score = learn.model(x_cat)
            preds.append((m, score.item()))

    preds = sorted(preds, key=lambda x: x[1], reverse=True)[:topn]
    return preds

recommend_movies(3, topn=10)


[('Close Shave, A (1995)', 4.2141804695129395),
 ('Wallace & Gromit: The Best of Aardman Animation (1996)', 4.165003299713135),
 ('Shawshank Redemption, The (1994)', 3.9987528324127197),
 ('Wrong Trousers, The (1993)', 3.989448308944702),
 ('Grand Day Out, A (1992)', 3.8906309604644775),
 ('Rear Window (1954)', 3.8737099170684814),
 ('To Kill a Mockingbird (1962)', 3.8661587238311768),
 ('Titanic (1997)', 3.8576841354370117),
 ('Henry V (1989)', 3.844143867492676),
 ('Pather Panchali (1955)', 3.788679838180542)]