<a href="https://colab.research.google.com/github/maitri2905/Matrix_Factorization/blob/main/Matrix_Factorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import files
uploaded = files.upload()

Saving input1.csv to input1.csv


In [57]:
import numpy as np
import pandas as pd
import csv
import io

class Matrix_Factorization():

    def __init__(self, input_matrix_df, latent_features_dimension, learning_rate, regularization_param, iterations):
      
        self.input_matrix_df = input_matrix_df
        self.users, self.items = input_matrix_df.shape
        self.latent_features_dimension = latent_features_dimension
        self.learning_rate = learning_rate
        self.regularization_param = regularization_param
        self.iterations = iterations

    def train(self):

        self.randon_latent_feature_matrices_users = np.random.normal(scale=1./self.latent_features_dimension, size=(self.users, self.latent_features_dimension))
        self.randon_latent_feature_matrices_items = np.random.normal(scale=1./self.latent_features_dimension, size=(self.items, self.latent_features_dimension))

        self.bias_users = np.zeros(self.users)
        self.bias_items = np.zeros(self.items)
        self.biases = np.mean(self.input_matrix_df[np.where(self.input_matrix_df != 0)])

        self.training_sample_list = [
            (user, item, self.input_matrix_df[user, item])
            for user in range(self.users)
            for item in range(self.items)
            if self.input_matrix_df[user, item] > 0
        ]

        training = []
        for iteration in range(self.iterations):
            np.random.shuffle(self.training_sample_list)
            self.gradient_descent_step()
            mean_sqare_error = self.mean_sqare_error()
            training.append((iteration, mean_sqare_error))
            if (iteration+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (iteration+1, mean_sqare_error))

        return training

    def gradient_descent_step(self):
       
        for user, item, value in self.training_sample_list:
            prediction = self.prediction(user, item)
            error = (value - prediction)

            self.bias_users[user] += self.learning_rate * (error - self.regularization_param * self.bias_users[user])
            self.bias_items[item] += self.learning_rate * (error - self.regularization_param * self.bias_items[item])

            self.randon_latent_feature_matrices_users[user, :] += self.learning_rate * (error * self.randon_latent_feature_matrices_items[item, :] - self.regularization_param * self.randon_latent_feature_matrices_users[user,:])
            self.randon_latent_feature_matrices_items[item, :] += self.learning_rate * (error * self.randon_latent_feature_matrices_users[user, :] - self.regularization_param * self.randon_latent_feature_matrices_items[item,:])
    
        

    def prediction(self, i, j):
       
        prediction = self.biases + self.bias_users[i] + self.bias_items[j] + self.randon_latent_feature_matrices_users[i, :].dot(self.randon_latent_feature_matrices_items[j, :].T)
        return prediction

    def mean_sqare_error(self):
       
        users, items = self.input_matrix_df.nonzero()
        prediction = self.full_matrix()
        error = 0
        for user, item in zip(users, items):
            error += pow(self.input_matrix_df[user, item] - prediction[user, item], 2)
        return np.sqrt(error)
   

    def full_matrix(self):
       
        return self.biases + self.bias_users[:,np.newaxis] + self.bias_items[np.newaxis:,] + self.randon_latent_feature_matrices_users.dot(self.randon_latent_feature_matrices_items.T)


input_matrix_df = pd.read_csv(io.BytesIO(uploaded['input1.csv']))

input_matrix_df = np.array(input_matrix_df)
 
matrix_obj = Matrix_Factorization(input_matrix_df, latent_features_dimension=3, learning_rate=0.001, regularization_param= 0.01, iterations=100)
train_output = matrix_obj.train()
print(matrix_obj.full_matrix())

Iteration: 10 ; error = 61.3180
Iteration: 20 ; error = 60.2548
Iteration: 30 ; error = 59.3671
Iteration: 40 ; error = 58.5480
Iteration: 50 ; error = 57.7285
Iteration: 60 ; error = 56.8658
Iteration: 70 ; error = 55.9435
Iteration: 80 ; error = 54.9715
Iteration: 90 ; error = 53.9829
Iteration: 100 ; error = 53.0219
[[4.42984434 4.29640653 4.45036755 5.38917334 4.68231571 4.22289067
  3.15594184 4.94078688 4.45872097 3.03467906 5.64515991 4.92499389
  5.14975879 4.914862   4.23287665 5.82062513 3.70145787 4.87218138
  5.53829417 4.09741246 4.33645878 4.99373305 4.65634384 5.12639607
  4.28904574 4.92345923 4.94684901 5.18206324 4.06200879 4.68600512
  4.02375663 2.44175773 4.99044045 5.07953938 4.52854961 4.51098884
  4.43847949 5.0310939  3.60247407 4.17809823 5.25067923 4.88814667
  4.82688684]
 [5.67975302 5.42651795 4.40396654 5.97649426 5.36325557 6.279389
  5.30640775 5.69562366 6.25187544 5.73091203 4.32121983 6.97935022
  4.98331064 5.12151207 4.05717456 6.7241272  3.7511079

# Refrences:







*   https://albertauyeung.github.io/2017/04/23/python-matrix-factorization.html
*   https://ieeexplore.ieee.org/document/5197422
*   https://dl.acm.org/doi/10.1145/1454008.1454049
*   https://towardsdatascience.com/recommendation-system-matrix-factorization-d61978660b4b
*   https://towardsdatascience.com/recommender-systems-in-python-from-scratch-643c8fc4f704





