In [None]:
import torch
import pandas
from BiasMFRecommender import BiasMF
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [None]:
class RateDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __getitem__(self, index):
        return self.df.user[index], self.df.movie[index], self.df.rating[index]

    def __len__(self):
        return self.df.shape[0]
    
def get_loss(df, model):
    with torch.no_grad():
        criterion = torch.nn.MSELoss()
        preds = model(torch.tensor(df.user - 1), torch.tensor(df.movie - 1))
        return criterion(preds, torch.tensor(df.rating))

In [None]:
COLS = ['user', 'movie', 'rating', 'timestamp']
# df_train = pandas.read_csv("./data/ml-100k/u1.base", sep='\t', names=COLS).drop(columns=['timestamp']).astype(int)
# df_test = pandas.read_csv("./data/ml-100k/u1.test", sep='\t', names=COLS).drop(columns=['timestamp']).astype(int)
df_1m = pandas.read_csv("./data/ml-1m/ratings.dat", sep='::', names=COLS, engine='python').drop(columns=['timestamp']).astype(int)
df_train, df_test = train_test_split(df_1m, test_size=0.2, random_state=42, shuffle=True)
df_train = df_train.reset_index()
df_test = df_test.reset_index()
train_data = RateDataset(df_train)
test_data = RateDataset(df_test)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
print(df_train.shape, df_test.shape)

In [None]:
params = {'num_users': df_1m.user.max(), 'num_items': df_1m.movie.max(), 'global_mean': df_1m.rating.mean(), 'latent_dim': 5}
#device = torch.device('mps')
#model.to(device)
criterion = torch.nn.MSELoss()
num_epoch = 30
df_history = pandas.DataFrame(columns=['latent_dim', 'train_loss', 'test_loss'])

for latent_dim in (0, 1, 2, 3, 4, 5):
    print(f'latent_dim: {latent_dim}')
    params['latent_dim'] = latent_dim
    model = BiasMF(params)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    for epoch in range(num_epoch):
        for bid, batch in enumerate(train_loader):
            u, i, r = batch[0]-1, batch[1]-1, batch[2]
            r = r.float()
            # forward pass
            preds = model(u, i)
            loss = criterion(preds, r)
            # backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        test_loss = get_loss(df_test, model)
        train_loss = get_loss(df_train, model)
        print(f'Epoch [{epoch + 1}/{num_epoch}], train_loss: {train_loss:.4f}, test_loss: {test_loss:.4f}')
        df_history.loc[len(df_history.index)] = [latent_dim, train_loss, test_loss]
    torch.save(model.state_dict(), f'./saved_models/matrix_movielens_{latent_dim}.pth')

In [None]:
df_history.to_csv(f'df_history.csv')
df_history

In [None]:
params = {'num_users': df_train.user.max(), 'num_items': df_train.movie.max(), 'global_mean': df_train.rating.mean(), 'latent_dim': }
model = BiasMF(params)
model.load_state_dict(torch.load( "./saved_models/matrix_movielens_2.pth"))

In [None]:
print(get_loss(df_train, model))
print(get_loss(df_test, model))

In [None]:
with torch.no_grad():
    print(model(torch.tensor([65]),torch.tensor([29])))
    print(model.user_bias.weight[65] + model.item_bias.weight[29] + model.mu)

In [None]:
model

In [None]:
params