In [1]:
import pandas as pd
import numpy as np
import matrix_factorization_utilities

### Recommendations for Cold Start

1. Calculate the average rating for the movie across all users.
2. Subtract the average rating from each user's rating. 

### Data Loading

In [2]:
raw_df = pd.read_csv('Input_Data/movie_ratings_data_set.csv')

In [3]:
raw_df.dtypes

user_id     int64
movie_id    int64
value       int64
dtype: object

### Convert to Matrix

In [4]:
ratings_df = pd.pivot_table(raw_df, index='user_id',columns='movie_id',aggfunc=np.max)
# if one user rated the same movie more than once, take the largest rating score.

### Rating Score Normalization

In [5]:
# Normalize the ratings (center them around their mean)
normalized_ratings, means = matrix_factorization_utilities.normalize_ratings(ratings_df.as_matrix())

### Matrix Factorization

In [6]:
# Apply matrix factorization to find the latent features
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(normalized_ratings,
                                                                    num_features=11,
                                                                    regularization_amount=1.1)

Optimization terminated successfully.
         Current function value: 105.620378
         Iterations: 460
         Function evaluations: 685
         Gradient evaluations: 685


In [7]:
# Find all predicted ratings by multiplying U and M
predicted_ratings = np.matmul(U, M)

In [8]:
# Add back in the mean ratings for each product to de-normalize the predicted results
predicted_ratings = predicted_ratings + means

In [None]:
# Save features and predicted ratings to files for later use
pickle.dump(U, open("user_features.dat", "wb"))
pickle.dump(M, open("product_features.dat", "wb"))
pickle.dump(predicted_ratings, open("predicted_ratings.dat", "wb" ))
pickle.dump(means, open("means.dat", "wb" ))