In [14]:
import numpy as np
import pandas as pd

In [15]:
class FunkSVD:
    def __init__(self, n_factors=20, learning_rate=0.01, n_epochs=20, reg=0.02):
        self.n_factors = n_factors
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.reg = reg

    def fit(self, user_item_matrix):
        self.user_item_matrix = user_item_matrix
        self.n_users, self.n_items = user_item_matrix.shape
        self.user_factors = np.random.normal(scale=1./self.n_factors, size=(self.n_users, self.n_factors))
        self.item_factors = np.random.normal(scale=1./self.n_factors, size=(self.n_items, self.n_factors))

        for epoch in range(self.n_epochs):
            for i in range(self.n_users):
                for j in range(self.n_items):
                    if self.user_item_matrix[i, j] > 0:
                        self.gradient_descent(i, j)

    def gradient_descent(self, i, j):
        prediction = self.predict(i, j)
        error = self.user_item_matrix[i, j] - prediction

        self.user_factors[i, :] += self.learning_rate * (error * self.item_factors[j, :] - self.reg * self.user_factors[i, :])
        self.item_factors[j, :] += self.learning_rate * (error * self.user_factors[i, :] - self.reg * self.item_factors[j, :])

    def predict(self, i, j):
        return np.dot(self.user_factors[i, :], self.item_factors[j, :])

    def full_matrix(self):
        return np.dot(self.user_factors, self.item_factors.T)

## Load

In [16]:
features = pd.read_csv("../data/features/user_item.csv")
features.head()

Unnamed: 0,profile_id,offer_0,offer_1,offer_2,offer_3,offer_4,offer_5,offer_6,offer_7,offer_8,offer_9
0,0009655768c64bdeb2e877511632db8f,0,4,0,4,0,3,0,3,0,0
1,00116118485d4dfda04fdbaba9a87b5c,0,0,0,8,0,0,0,0,0,0
2,0011e0d4e6b944f998e987f904e8c1e5,4,0,0,0,0,3,3,3,0,2
3,0020c2b971eb4e9188eac86d93036a77,0,4,4,0,0,3,0,0,0,0
4,0020ccbbb6d84e358d3414a3ff76cffd,4,0,0,4,0,3,3,0,0,0


## Transform

In [17]:
# Create user-item matrix from the offer columns
user_item_matrix = features.pivot_table(
    index='profile_id',
    values=['offer_0', 'offer_1', 'offer_2', 'offer_3', 'offer_4', 
            'offer_5', 'offer_6', 'offer_7', 'offer_8', 'offer_9'],
    aggfunc='sum'
)

# Normalize the matrix using min-max scaling to bring values between 0 and 1
user_item_matrix_normalized = (user_item_matrix - user_item_matrix.min()) / (user_item_matrix.max() - user_item_matrix.min())

# Fill NaN values with 0 as FunkSVD expects a dense matrix
user_item_matrix_normalized = user_item_matrix_normalized.fillna(0)

## Train

In [18]:
# Assuming user_item_matrix is already created and normalized
svd = FunkSVD(n_factors=20, learning_rate=0.01, n_epochs=20, reg=0.02)
svd.fit(user_item_matrix_normalized.values)

## Predict

In [19]:
# Get the predicted full matrix
predicted_matrix = svd.full_matrix()
predicted = pd.DataFrame(predicted_matrix)
predicted.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.002479,0.00232,0.00358,0.000463,-0.000856,0.002637,0.002536,0.005209,0.003526,0.011048
1,0.016446,0.009593,0.014165,0.010352,0.021327,0.011307,0.016792,0.009631,0.011392,0.01722
2,0.005484,0.006654,0.004574,0.002892,0.006652,0.003375,0.009596,0.00219,0.006794,0.011923
3,0.002894,-0.001304,-0.004617,0.001704,0.001132,0.001511,-0.003406,0.00283,-0.003585,0.004234
4,0.003222,0.000506,0.00249,-0.000265,-0.00036,-0.000795,0.002753,0.001147,0.005817,0.008368


## Save

In [20]:
predicted.to_csv("../data/predictions/matrix_factorization.csv", index=False)