In [None]:
def create_user_item_matrix(df):    #This line creates a user-item matrix using pivot_table and fills missing values with 0.
    """
    Transforms the DataFrame into a user-item matrix
    """
    pivot_table = df.pivot(index='CustomerID', columns='MovieID', values='Rating').fillna(0)
    return pivot_table

train_matrix_df = create_user_item_matrix(train_data)
validation_matrix_df = create_user_item_matrix(validation_data)
test_matrix_df = create_user_item_matrix(test_data)


# Convert the DataFrame to a sparse matrix
train_matrix = csr_matrix(train_matrix_df.values)
validation_matrix = csr_matrix(validation_matrix_df.values)
test_matrix = csr_matrix(test_matrix_df.values)

In [None]:
# Perform SVD on the CSR matrix
U, sigma, Vt = svds(train_matrix, k=50)  # k is the number of singular values and vectors
sigma_diag_matrix = np.diag(sigma)

# Convert the matrices back to DataFrames for easier interpretation and processing
user_factors_df = pd.DataFrame(U, index=train_matrix_df.index)
movie_factors_df = pd.DataFrame(Vt.T, index=train_matrix_df.columns)  # Transpose Vt to align with movie IDs

In [None]:
def predict_ratings(user_factors, sigma_diag_matrix, item_factors, user_item_matrix):
    # Prediction using dot product
    predictions = np.dot(np.dot(user_factors, sigma_diag_matrix), item_factors.T)

    # Convert predictions to a DataFrame with the same structure as the training set
    preds_df_temp = pd.DataFrame(predictions, columns=train_matrix_df.columns, index=train_matrix_df.index)

    # Reindex the predictions DataFrame to match the validation/test user-item matrix, filling missing values with 0
    preds_df = preds_df_temp.reindex(index=user_item_matrix.index, columns=user_item_matrix.columns, fill_value=0)

    return preds_df


In [None]:
# Evaluate the model's accuracy function remains unchanged
def evaluate_accuracy(predictions_df, actual_matrix_df):
    predicted_ratings = predictions_df.values.flatten()
    actual_ratings = actual_matrix_df.values.flatten()

    non_zero_indices = actual_ratings.nonzero()
    actual_ratings_filtered = actual_ratings[non_zero_indices]
    predicted_ratings_filtered = predicted_ratings[non_zero_indices]

    rmse = math.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)

    return rmse, mae

In [None]:
# Final evaluation on the test set
test_preds_df = predict_ratings(U, sigma_diag_matrix, movie_factors_df, test_matrix_df)
test_rmse, test_mae = evaluate_accuracy(test_preds_df, test_matrix_df)
print(f"Test RMSE: {test_rmse}, Test MAE: {test_mae}")


In [None]:
def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):
    user_row_number = predictions_df.index.get_loc(userID)  # Find the user's index
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False).reset_index()
    sorted_user_predictions.columns = ['MovieID', 'Predictions']

    user_data = original_ratings_df[original_ratings_df.CustomerID == userID]

    recommendations = (movies_df[~movies_df['MovieID'].isin(user_data['MovieID'])]
                       .merge(sorted_user_predictions, on='MovieID')
                       .sort_values('Predictions', ascending=False)
                       .iloc[:num_recommendations, :])[['MovieTitle', 'Predictions']]

    return recommendations




In [None]:
user_id = 596  # Example user ID
recommendations = recommend_movies(validation_preds_df, user_id, training_df, validation_data, 5)

print(f"Recommendations for User ID {user_id}:")
for index, row in recommendations.iterrows():
    print(f"{row['MovieTitle']} - Rating Prediction: {row['Predictions']:.2f}")