In [1]:
# Neural Collaborative Filtering

In [1]:
import math

import numpy as np
from tqdm import tqdm

dataset_name = 'ml_small'
# dataset_name = 'ml_25m'

# Load the matrix
M = np.load('{}.npy'.format(dataset_name))

# Shuffle and split
np.random.shuffle(M)
nnz_count = M.shape[0]
row_count = int(M[:, 0].max() + 1)
col_count = int(M[:, 1].max() + 1)

training_count = round(nnz_count * 0.8)
test_count = nnz_count - training_count

M_train = M[:training_count, :]
M_test = M[training_count:, :]

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-1 * x))

class NeuralCollaborativeFiltering:
    """
    MLPRegressor with single target. The network is trained with SGD.
    """
    
    def __init__(self, f, row_count, col_count, layers, lr=0.001):
        
        # f is the factor size
        # layers[0] should be 2 * f
        # layers is a tuple of ints (3, 5) means 3 input nodes, 5 hidden nodes
        self.weight_lst = []
        self.bias_lst = []
        self.lr = lr
        self.f = f
        self.layer_count = len(layers)
        
        self.P = np.random.uniform(size=(row_count, f))
        self.Q = np.random.uniform(size=(col_count, f))
        
        left_node_size = layers[0]
        for right_node_size in layers[1:]:
            self.weight_lst.append(np.random.randn(right_node_size, left_node_size))
            self.bias_lst.append(np.random.randn(right_node_size))
            left_node_size = right_node_size
        
        self.weight_lst.append(np.random.randn(1, left_node_size))
        self.bias_lst.append(np.random.randn(1))
    
    def train(self, X, y, epoch_count=1, verbose=False):
        
        self._loss = []
        if verbose:
            y_predict = self.predict(X)
            error = (y_predict - y) ** 2
            error = error.mean()
            self._loss.append(error)
        
        for epoch in tqdm(range(epoch_count)):
            
            # Shuffle the training samples
            ind = np.arange(X.shape[0])
            np.random.shuffle(ind)
            X = X[ind]
            y = y[ind]
            
            self._train_epoch(X, y)
            
            if verbose:
                y_predict = self.predict(X)
                error = (y_predict - y) ** 2
                error = error.mean()
                self._loss.append(error)
    
    def _train_epoch(self, X, y):
        
        a_lst = [None for _ in range(self.layer_count)]   # a_k = sigmoid(z_k) where z_k is the output, W_k * a_{k-1}
        
        for x, y_true in zip(X, y):
            # Feed forward
            #print(x)
            row, col = map(int, x)
            
            a = np.concatenate((mdl.P[row], mdl.Q[col]))
            a_lst[0] = a
            for i in range(self.layer_count - 1):
                W = self.weight_lst[i]
                b = self.bias_lst[i]
                z = W @ a + b
                a = sigmoid(z)
                a_lst[i + 1] = a
            
            y_predict = self.weight_lst[-1] @ a + self.bias_lst[-1]
            
            # Backpropagation
            e = y_predict - y_true
            next_e = e * self.weight_lst[-1]
            
            gradient = a_lst[-1] * e
            
            self.weight_lst[-1] -= self.lr * gradient
            self.bias_lst[-1] -= self.lr * e
            e = next_e
            
            for i in reversed(range(len(self.weight_lst) - 1)):
                next_e = e @ self.weight_lst[i]
                bias_gradient = e * a_lst[i + 1] * (1 - a_lst[i + 1])
                self.bias_lst[i] -= self.lr * bias_gradient.ravel()
                
                gradient = bias_gradient.T @ a_lst[i].reshape(1, -1)
                self.weight_lst[i] -= self.lr * gradient
                e = next_e
                
            self.P[row] -= self.lr * e.ravel()[:self.f]
            self.Q[col] -= self.lr * e.ravel()[self.f:]
    
    def predict(self, X):
        
        # X is (N, 3) or (N, 2)
        
        # Get embeddings
        X = np.hstack((self.P[X[:, 0].astype(int), :], self.Q[X[:, 1].astype(int), :]))
        
        a = X.T
        # hidden layers
        for W, b in zip(self.weight_lst[:-1], self.bias_lst[:-1]):
            z = W @ a + b.reshape(-1, 1)
            a = sigmoid(z)
        
        y_predict = self.weight_lst[-1] @ a + self.bias_lst[-1]
        return y_predict.ravel()

In [3]:
def compute_rmse(y_predict, y_true):
    return np.sqrt(((y_predict - y_true) ** 2).mean())

In [4]:
mdl = NeuralCollaborativeFiltering(f=16, row_count=row_count, col_count=col_count, layers=(32, 16))
y_predict = mdl.predict(M_train)
rmse = compute_rmse(y_predict, M_train[:, 2])
print(rmse)

mdl.train(M_train[:, :2], M_train[:, -1], epoch_count=100)
y_predict = mdl.predict(M_train)
rmse = compute_rmse(y_predict, M_train[:, 2])
print(rmse)

  0%|          | 0/100 [00:00<?, ?it/s]

10.24880807007689


100%|██████████| 100/100 [06:39<00:00,  3.99s/it]

0.7760348107145926





In [5]:
y_predict = mdl.predict(M_test)
rmse = compute_rmse(y_predict, M_test[:, 2])
print(rmse)

0.8822412191214752


In [49]:
M_train.shape

(80669, 3)