### -- GLOBAL IMPORTS --

In [1]:
import pandas as pd

# torch import : more compatible with m1
import torch
import torch.nn as nn
# from torch.nn import Module, Embedding

# fastai import : less compatible with m1
from fastai.torch_basics import Module, Embedding
from fastai.data.external import untar_data, URLs
from fastai.collab import CollabDataLoaders
from fastai.learner import Learner
from fastai.losses import MSELossFlat
from fastai.torch_core import one_hot

# super charged version of Python's list
# enhanced functionality and is used extensively throughout the fastai library
# built-in list does not have map to item by default
from fastcore.foundation import L

# get path to MovieLens data
path = untar_data(URLs.ML_100k)

#### -- read csv --

In [2]:
# panda is commonly used to work with structured data in tabular form
# - csv, xlsx

# read in a csv file
ratings = pd.read_csv(
    # read the 'u.data' csv file at the MovieLens data path
    path/'u.data',
    # default is comma separated, but MovieLens is tab separated
    delimiter='\t',
    # csv does NOT have a header row
    header=None,
    # use these as column names
    names=['user', 'movie', 'rating', 'timestamp']
)

# displays the first N (default=5) rows of ratings table
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


#### -- log movie titles --

In [3]:
movies = pd.read_csv(
    path/'u.item',
    delimiter='|',
    encoding='latin-1',
    usecols=(0,1),
    names=('movie', 'title'),
    header=None
)

movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


#### -- log movie rating --

In [4]:
ratings=ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


#### -- dls show_batch() --

In [5]:
dls=CollabDataLoaders.from_df(
    ratings,
    item_name='title',
    bs=64
)

dls.show_batch()

Unnamed: 0,user,title,rating
0,851,Chain Reaction (1996),3
1,851,Raiders of the Lost Ark (1981),5
2,932,Cliffhanger (1993),2
3,343,"Perez Family, The (1995)",4
4,727,Braveheart (1995),4
5,447,Mulholland Falls (1996),3
6,292,Hamlet (1996),4
7,455,Evita (1996),3
8,425,Liar Liar (1997),2
9,17,Toy Story (1995),4


#### -- n_users n_movies n_factors --

In [6]:
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [7]:
# we need to manually cast 3 as a tensor because we are using torch one_hot
# not fastai one_hot
one_hot_3 = one_hot(torch.tensor(3), n_users).float()
# print actual index look up
print(user_factors[3])
# should be equal to look up as a matrix operation using one hot encoding
user_factors.t() @ one_hot_3

tensor([-0.0949,  0.0252,  0.8279, -0.2113,  0.3091])


tensor([-0.0949,  0.0252,  0.8279, -0.2113,  0.3091])

### -- DOT PRODUCT -- default

In [8]:
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors):
        # Because we are using torch Module not fastai's
        super().__init__() # Add this line
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)

    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        return (users * movies).sum(dim=1)

In [9]:
x,y = dls.one_batch()
# @audit : Explain why is x [64, 2] and y [64, 1]
# Guessing that independent (x) is user, movie 
# ... so what is dependent returning (y) ?
print(f"x : [{x.shape}] y : [{y.shape}]")

x : [torch.Size([64, 2])] y : [torch.Size([64, 1])]


In [10]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [11]:
# @audit : refactoring fastai => pytorch has DRASTICALLY model perf, why?
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.346614,1.294557,00:02
1,1.088729,1.081342,00:02
2,0.99438,0.978127,00:02
3,0.843148,0.883507,00:02
4,0.808667,0.866061,00:02


In [12]:
def sigmoid_range_mt(x, low, high):
    "Sigmoid function with range `(low, high)`"
    return torch.sigmoid(x) * (high - low) + low

### -- DOT PRODUCT -- sigmoid_range

In [13]:
# @audit : Improve model by forcing prediction between 0 and 5 somehow?

class DotProductM(Module):
    def __init__(
        self, 
        n_users, 
        n_movies, 
        n_factors,
        y_range=(0, 5.5)
    ):
        super().__init__()
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.y_range = y_range
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        # @audit : WTF ... why does manually copyting the sigmoid_range
        # fastai code work with m1 chip ... but NOT when calling function
        # from fastai library?  What xform is fastai applying that is 
        # preventing m1 from working LOL
        return sigmoid_range_mt((users*movies).sum(dim=1), *self.y_range)

In [14]:
model = DotProductM(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [15]:
# @audit : refactoring fastai => pytorch has DRASTICALLY model perf, why?
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.994168,0.964676,00:02
1,0.850616,0.890451,00:02
2,0.679248,0.857362,00:02
3,0.468255,0.857912,00:02
4,0.36066,0.86169,00:02


### -- DOT PRODUCT -- bias

In [16]:
# @audit : Explain why we are adding bias

class DotProductBias(Module):
    def __init__(
        self,
        n_users,
        n_movies,
        n_factors,
        y_range = (0, 5.5)
    ):
        super().__init__()
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.movie_bias = Embedding(n_movies, 1)
        self.y_range = y_range
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        # Jupyter cell is crashing here, and the difference is keepdim
        # @audit : Explain why
        # res = (users*movies).sum(dim=1, keepdim=True)
        res = (users*movies).sum(dim=1)
        # print(f"res : presqueeze [{res.shape}]")
        res = res.unsqueeze(1) # equivalent to keepdim=True
        # print(f"res : unsqueeze [{res.shape}]")
        # res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1]) # @audit : crashing here??
        bias = self.user_bias(x[:,0]) + self.movie_bias(x[:,1]) # hmmm this is ok ...
        # res = res + self.user_bias(x[:,0]) # but is this ok? NO this crashes
        # res = res + self.movie_bias(x[:,1]) # howabout this? NO also crashes but not immediately
        # print(f"res shape [{res.shape}] bias shape [{bias.shape}]")
        # res += bias # crashing
        res = res + bias # crashing
        # return sigmoid_range_mt(res, *self.y_range)
        return res # maybe this is OK somehow? NOPE does NOTHING to change outcome


In [17]:
user_factors = Embedding(n_users, n_factors)
user_bias = Embedding(n_users, 1)
users = user_factors(x[:,0])

In [18]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [19]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.245302,1.228191,00:03
1,1.056693,1.070815,00:02
2,0.934575,0.948646,00:02
3,0.804969,0.860907,00:02
4,0.765749,0.847356,00:02


#### -- fastai cpu variant --

In [20]:
# Do fastai on CPU
class DotProductBiasFAI(Module):
    def __init__(
        self,
        n_users,
        n_movies,
        n_factors,
        y_range = (0, 5.5)
    ):
        super().__init__()
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.movie_bias = Embedding(n_movies, 1)
        self.y_range = y_range
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        res = (users*movies).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1]) # @audit : crashing here??
        return sigmoid_range_mt(res, *self.y_range)


In [21]:
model = DotProductBiasFAI(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [22]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.950105,0.923197,00:03
1,0.8218,0.848295,00:02
2,0.594166,0.84891,00:02
3,0.395843,0.871276,00:02
4,0.301095,0.877649,00:02


In [23]:
model = DotProductBiasFAI(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [24]:
learn.fit_one_cycle(5, 5e-3, wd=-0.1)

epoch,train_loss,valid_loss,time
0,0.932985,0.915409,00:02
1,0.822885,0.915387,00:03
2,0.627315,1.21255,00:03
3,0.439667,1.444548,00:02
4,0.317416,1.490096,00:02


### -- EMBEDDINGS --

#### -- L --

In [25]:
a = L(range(10))
print(a) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]



[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [26]:
class T(Module):
    def __init__(self):
        self.a = torch.ones(3)

L(T().parameters())

(#0) []

In [27]:
class T(Module):
    def __init__(self):
        self.a = nn.Parameter(torch.ones(3))

L(T().parameters())

(#1) [Parameter containing:
tensor([1., 1., 1.], requires_grad=True)]

In [28]:
class T(Module):
    def __init__(self):
        self.a = nn.Linear(1,3, bias=False)

t = T()
L(t.parameters())

(#1) [Parameter containing:
tensor([[-0.3875],
        [-0.8044],
        [ 0.8442]], requires_grad=True)]

In [29]:
type(t.a.weight)

torch.nn.parameter.Parameter

#### -- create parameter with random init --

In [30]:
def create_params(size):
    return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

In [31]:
class DotProductBiasEmb(Module):

    def __init__(
            self, 
            n_users,
            n_movies,
            n_factors,
            y_range=(0, 5.5)
        ):
        self.user_factors = create_params([n_users, n_factors])
        self.user_bias = create_params([n_users])
        self.movie_factors = create_params([n_movies, n_factors])
        self.movie_bias = create_params([n_movies])
        self.y_range = y_range

    def forward(self, x):
        users = self.user_factors[x[:,0]]
        movies = self.movie_factors[x[:,1]]
        res = (users*movies).sum(dim=1)
        res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]]
        return sigmoid_range_mt(res, *self.y_range)

In [32]:
model = DotProductBiasEmb(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [33]:
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.930217,0.937655,00:03
1,0.854108,0.865882,00:03
2,0.732167,0.822986,00:02
3,0.572907,0.81155,00:02
4,0.478006,0.810909,00:02
