In [1]:
import numpy as np

In [57]:
class matrix_factorization():
    def __init__(self, data, number_of_features):
        self.data = data
        self.number_of_features = number_of_features        
        self.user_count = data.shape[0]
        self.item_count = data.shape[1]
        self.user_features = np.random.uniform(low=0.1, high=0.9, size=(self.user_count, self.number_of_features))
        self.item_features = np.random.uniform(low=0.1, high=0.9, size=(self.number_of_features, self.item_count))
    
    def mse(self):
        matrix_product = np.matmul(self.user_features, self.item_features)
        return np.sum((self.data - matrix_product)**2)
    
    def single_gradient(self, user_row, item_col, wrt_user_idx=None, wrt_item_idx=None):        
        
        if wrt_user_idx != None and wrt_item_idx != None:            
            return "To many elements!"
        elif wrt_user_idx == None and wrt_item_idx == None:            
            return "Insufficient elements!"
        
        else:
            u_row = self.user_features[user_row, :]
            i_col = self.item_features[:, item_col]
            reference_values = float(self.data[user_row, item_col])
            prediction = float(np.dot(u_row, i_col))
        
            if wrt_user_idx != None:
                row_element = float(i_col[wrt_user_idx])
                gradient = 2 * (reference_values - prediction) * row_element
            else:
                col_element = float(u_row[wrt_item_idx])
                gradient = 2 * (reference_values - prediction) * col_element               
            return gradient
    
    def user_feature_gradient(self, user_row, wrt_user_idx):
        summation = 0
        for col in range(0, self.item_count):
            summation += self.single_gradient(user_row=user_row, item_col=col, wrt_user_idx=wrt_user_idx)
        return summation / self.item_count
    
    def item_feature_gradient(self, item_col, wrt_item_idx):
        summation = 0
        for row in range(0, self.user_count):            
            summation += self.single_gradient(user_row=row, item_col=item_col, wrt_item_idx=wrt_item_idx)            
        return summation / self.user_count
    
    def update_user_features(self, learning_rate):
        for i in range(0, self.user_count):
            for j in range(self.number_of_features):
                self.user_features[i, j] += learning_rate * self.user_feature_gradient(user_row=i, wrt_user_idx=j)
    
    def update_item_features(self, learning_rate):
        for i in range(0, self.number_of_features):
            for j in range(0, self.item_count):                
                self.item_features[i, j] += learning_rate * self.item_feature_gradient(item_col=j, wrt_item_idx=i)
                
    def train(self, learning_rate=0.1, iterations=1000):
        for i in range(iterations):
            self.update_user_features(learning_rate)
            self.update_item_features(learning_rate)
            if (i % 50 == 0):
                print(self.mse())
        
    

## Treinando o modelo

In [67]:
data = np.array([[5,3,1], [1,3,5], [3,5,1]])
data

array([[5, 3, 1],
       [1, 3, 5],
       [3, 5, 1]])

In [68]:
model = matrix_factorization(data, 2)
model.train()

41.3551541150372
3.5386552903991793
3.5386552492239662
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966
3.538655249223966


## Verificando as features

In [69]:
model.user_features

array([[2.03681438, 0.30768046],
       [0.22276917, 2.35645899],
       [1.79131479, 0.76783969]])

In [70]:
model.item_features

array([[ 2.078815  ,  1.71412819, -0.04191824],
       [ 0.15152044,  1.20245465,  2.07847464]])

## Reconstruíndo a matriz original

In [71]:
np.dot(model.user_features, model.item_features)

array([[4.28078016, 3.86133275, 0.55412635],
       [0.8201476 , 3.21539   , 4.88850218],
       [3.84015545, 3.99383559, 1.52084655]])