In [13]:
from src.utils import TrainTestSplitter
from src.models import  BaseModelAverage
from src.metrics import ml_metrics, predictive_metrics, rank_metrics

from src.utils import read_pickles

from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [16]:
df_movies, df_users, df_ratings = read_pickles("../../data/ml-1m-after_eda/")
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

df_ratings['UserID'] = user_encoder.fit_transform(df_ratings['UserID'])
df_ratings['MovieID'] = movie_encoder.fit_transform(df_ratings['MovieID'])

rating_matrix = df_ratings.pivot_table(index='UserID', columns='MovieID', values='Rating', fill_value=0)

In [18]:
train, test, indicies_of_zero = TrainTestSplitter.split_by_deleting_reviews(rating_matrix, 0.1)


In [19]:
class AlternatingLeastSquares:
    def __init__(self, num_factors=10, regularization=0.1, iterations=10):
        self.num_factors = num_factors
        self.regularization = regularization
        self.iterations = iterations

    def fit(self, interaction_matrix):
        self.num_users, self.num_items = interaction_matrix.shape
        self.user_factors = np.random.random((self.num_users, self.num_factors))
        self.item_factors = np.random.random((self.num_items, self.num_factors))

        for iteration in range(self.iterations):
            self.user_factors = self._als_step(interaction_matrix, self.user_factors, self.item_factors)
            self.item_factors = self._als_step(interaction_matrix.T, self.item_factors, self.user_factors)


    def _als_step(self, interaction_matrix, update_vecs, fixed_vecs):
        A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_factors) * self.regularization
        b = interaction_matrix.dot(fixed_vecs)
        A_inv = np.linalg.inv(A)
        update_vecs = b.dot(A_inv)
        return update_vecs


    def predict(self, user_id):
        predictions =  self.user_factors.dot(self.item_factors.T)
        print(predictions.shape)
        return predictions[user_id]


In [20]:
als = AlternatingLeastSquares(num_factors=10, regularization=0.1, iterations=10)

als.fit(train)

In [21]:
def find_recommendations(user_id, number_of_recommendations=15):
    predictions = als.predict(user_id)
    high_score_movie_ids = np.argsort(predictions)[::-1][:number_of_recommendations]
    items_to_recommend = df_movies.where(df_movies['MovieID'].isin(high_score_movie_ids)).dropna()
    print(items_to_recommend[['Title', 'Genres']])

## We would recommend following items to user with id 20


In [22]:
find_recommendations(20, 15)

(6040, 3706)
                          Title            Genres
146   Awfully Big Adventure, An           [Drama]
519            Ruby in Paradise           [Drama]
1164               Hear My Song          [Comedy]
2234                  Nashville  [Drama, Musical]
3154       Zed & Two Noughts, A           [Drama]
3321          Shanghai Surprise       [Adventure]
3413             Price of Glory           [Drama]
3580            American Gigolo           [Drama]
