In [1]:
import numpy as np
import pandas as pd

In [None]:
class FunkSVD:
    def __init__(self, n_factors=20, learning_rate=0.01, n_epochs=20, reg=0.02):
        self.n_factors = n_factors
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.reg = reg

    def fit(self, user_item_matrix):
        self.user_item_matrix = user_item_matrix
        self.n_users, self.n_items = user_item_matrix.shape
        self.user_factors = np.random.normal(scale=1./self.n_factors, size=(self.n_users, self.n_factors))
        self.item_factors = np.random.normal(scale=1./self.n_factors, size=(self.n_items, self.n_factors))

        for epoch in range(self.n_epochs):
            for i in range(self.n_users):
                for j in range(self.n_items):
                    if self.user_item_matrix[i, j] > 0:
                        self.gradient_descent(i, j)

    def gradient_descent(self, i, j):
        prediction = self.predict(i, j)
        error = self.user_item_matrix[i, j] - prediction

        self.user_factors[i, :] += self.learning_rate * (error * self.item_factors[j, :] - self.reg * self.user_factors[i, :])
        self.item_factors[j, :] += self.learning_rate * (error * self.user_factors[i, :] - self.reg * self.item_factors[j, :])

    def predict(self, i, j):
        return np.dot(self.user_factors[i, :], self.item_factors[j, :])

    def full_matrix(self):
        return np.dot(self.user_factors, self.item_factors.T)

## Load

In [2]:
features = pd.read_csv("../data/features/user_behaviour.csv")
features.head()

Unnamed: 0,profile_id,age,income,gender_F,gender_M,gender_O,become_member_on_year_2013,become_member_on_year_2014,become_member_on_year_2015,become_member_on_year_2016,...,offer_7,offer_8,offer_9,offer_type_bogo,offer_type_discount,offer_type_informational,channels_email,channels_mobile,channels_social,channels_web
0,0009655768c64bdeb2e877511632db8f,33,72000.0,False,True,False,False,False,False,False,...,6,3,0,8,11,12,9,9,6,7
1,0011e0d4e6b944f998e987f904e8c1e5,40,57000.0,False,False,True,False,False,False,False,...,6,0,4,6,12,12,10,8,4,8
2,0020c2b971eb4e9188eac86d93036a77,59,90000.0,True,False,False,False,False,False,True,...,0,0,0,11,12,6,8,8,8,5
3,0020ccbbb6d84e358d3414a3ff76cffd,24,60000.0,True,False,False,False,False,False,True,...,0,0,0,14,8,6,8,8,6,6
4,003d66b6608740288d6cc97a6903f4f0,26,73000.0,True,False,False,False,False,False,False,...,6,0,2,0,18,12,9,8,6,7


## Preprocessing

## Train

In [None]:
# Assuming user_item_matrix is already created and normalized
svd = FunkSVD(n_factors=20, learning_rate=0.01, n_epochs=20, reg=0.02)
svd.fit(user_item_matrix_normalized.values)

# Get the predicted full matrix
predicted_matrix = svd.full_matrix()

## Test