In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import pandas as pd


In [2]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
        # initializing our matrices with a positive number generally will yield better results
        self.user_emb.weight.data.uniform_(0, 0.5)
        self.item_emb.weight.data.uniform_(0, 0.5)
    def forward(self, u, v):
        u = self.user_emb(u)
        v = self.item_emb(v)
        return (u*v).sum(1)  # taking the dot product

In [3]:
model = MF(610, 100836, emb_size=100)
df = pd.read_csv('./ml-latest-small/ratings.csv', sep=',', header=None)
# print(df[1])
print(type(df[1]))

<class 'pandas.core.series.Series'>


In [4]:
train_df, valid_df = train_test_split(df, test_size=0.25)
# resetting indices to avoid indexing errors
train_df = train_df.reset_index(drop=True)
test_df = valid_df.reset_index(drop=True)

In [5]:
def train_epocs(model, epochs=10, lr=0.01, wd=0.0):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    model.train()
    for i in range(epochs):
        usernames = torch.LongTensor(train_df.userId.values)
        movie_titles = torch.LongTensor(train_df.movieId.values)
        ratings = torch.FloatTensor(train_df.rating.values)
        y_hat = model(usernames, movie_titles)
        loss = F.mse_loss(y_hat, ratings)
        optimizer.zero_grad()  # reset gradient
        loss.backward()
        optimizer.step()
        print(loss.item())
    test(model)

In [6]:
def test(model):
    model.eval()
    usernames = torch.LongTensor(train_df.userId.values)
    movie_titles = torch.LongTensor(train_df.movieId.values)
    ratings = torch.FloatTensor(train_df.rating.values)
    y_hat = model(usernames, movie_titles)
    loss = F.mse_loss(y_hat, ratings)
    print("test loss %.3f " % loss.item())

In [8]:
print(df)
# movie_ratings = train_df.movieId.values
# print(movie_rating)
# user = torch.tensor([10])
# games = torch.tensor(game_ratings['TitleId'].unique().tolist())
# predictions = model(user, games).tolist()
# print(predictions)

             0        1       2           3
0       userId  movieId  rating   timestamp
1            1        1     4.0   964982703
2            1        3     4.0   964981247
3            1        6     4.0   964982224
4            1       47     5.0   964983815
...        ...      ...     ...         ...
100832     610   166534     4.0  1493848402
100833     610   168248     5.0  1493850091
100834     610   168250     5.0  1494273047
100835     610   168252     5.0  1493846352
100836     610   170875     3.0  1493846415

[100837 rows x 4 columns]
