In [26]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import joblib
import ipywidgets as widgets
from IPython.display import display, clear_output

In [27]:

# Load MovieLens dataset
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
column_names = ['UserId', 'MovieId', 'Rating', 'Timestamp']
data = pd.read_csv(url, sep='\t', names=column_names)

# Create the user-item rating matrix
ratings_matrix = data.pivot_table(index='UserId', columns='MovieId', values='Rating')

# Normalize the matrix by subtracting user means
user_means = ratings_matrix.mean(axis=1)
normalized_matrix = ratings_matrix.subtract(user_means, axis=0)
normalized_matrix.fillna(0, inplace=True)

# Apply SVD to the normalized matrix
svd = TruncatedSVD(n_components=50) # Increased components for better accuracy
decomposed_matrix = svd.fit_transform(normalized_matrix)

# Reconstruct the ratings matrix and denormalize
approx_ratings = svd.inverse_transform(decomposed_matrix) + user_means.values.reshape(-1, 1)


# Function to recommend movies using SVD
def recommend(user_id, num_recommendations=5):
    # Use the correct ratings matrix for predictions
    user_ratings = approx_ratings[user_id - 1]
    # Get movies already rated by the user
    already_rated = ratings_matrix.loc[user_id].notna()
    # Filter out already rated movies and select top recommendations
    top_recommendations = [i + 1 for i in np.argsort(user_ratings)[::-1] if not already_rated[i+1]][:num_recommendations]
    return top_recommendations


In [28]:
# Split dataset for evaluation
train, test = train_test_split(data, test_size=0.2, random_state=42) # Added random_state for reproducibility

# Create user-item matrices for train and test, ensuring consistent columns
train_matrix = train.pivot_table(index='UserId', columns='MovieId', values='Rating').fillna(0)
test_matrix = test.pivot_table(index='UserId', columns='MovieId', values='Rating').fillna(0)

# Align test matrix users with train matrix users
common_users = train_matrix.index.intersection(test_matrix.index)
train_matrix = train_matrix.loc[common_users]
test_matrix = test_matrix.loc[common_users]

# Ensure both matrices have the same columns
all_movies = pd.concat([train_matrix, test_matrix], axis=0).columns
train_matrix = train_matrix.reindex(columns=all_movies, fill_value=0)
test_matrix = test_matrix.reindex(columns=all_movies, fill_value=0)


# Evaluate RMSE
predicted_ratings = svd.inverse_transform(svd.transform(train_matrix))
rmse = np.sqrt(mean_squared_error(test_matrix, predicted_ratings))
print("RMSE between actual and predicted ratings: ", rmse)


RMSE between actual and predicted ratings:  0.5404720665404639


In [29]:

# Q-learning (Improvements: more realistic reward, better exploration)

# Initialize Q-table
num_users = len(ratings_matrix.index)
num_items = len(ratings_matrix.columns)
Q = np.zeros((num_users, num_items))


# Q-learning hyperparameters (adjust these)
alpha = 0.2  # Increased learning rate
gamma = 0.95 # Increased discount factor
epsilon = 0.5  # Higher initial exploration
# Reward function based on actual rating differences
def update_Q(user_id, movie_id, rating):
    max_future_q = np.max(Q[user_id - 1])
    Q[user_id - 1, movie_id - 1] += alpha * (rating + gamma * max_future_q - Q[user_id - 1, movie_id - 1])


def simulate_user_feedback(user_id, movie_id):
    try:
        return ratings_matrix.loc[user_id, movie_id]
    except KeyError:
        return 0  # Treat unrated movies as 0 initially

# Increased number of training episodes
for episode in range(1000):
    for user_id in range(1, num_users + 1):
        if np.random.rand() < epsilon:
            movie_id = np.random.choice(ratings_matrix.columns)
        else:
            movie_id = ratings_matrix.columns[np.argmax(Q[user_id - 1])]
        # Handle cases where movie_id is out of bounds
        if movie_id in ratings_matrix.columns:
            reward = simulate_user_feedback(user_id, movie_id)
            update_Q(user_id, movie_id, reward)
    epsilon = max(0.01, epsilon * 0.999) # Decay epsilon slower


In [31]:
def evaluate_qlearning(Q, test_data, ratings_matrix):
    """Evaluates the performance of the Q-learning model.

    Args:
        Q: The Q-table.
        test_data: The test dataset.
        ratings_matrix: The user-item ratings matrix.

    Returns:
        The average reward obtained by the Q-learning agent on the test data.
    """
    total_reward = 0
    for _, row in test_data.iterrows():
        user_id = row['UserId']
        movie_id = row['MovieId']
        # Use the Q-table to get the predicted rating (or reward) for the user-movie pair
        predicted_rating = Q[user_id - 1, movie_id - 1]
        total_reward += predicted_rating

    average_reward = total_reward / len(test_data)
    return average_reward



In [32]:
# Save Model (include necessary data)
model_to_save = {
    'svd': svd,
    'Q': Q,
    'approx_ratings': approx_ratings,
    'ratings_matrix': ratings_matrix,
    'user_means': user_means
}
joblib.dump(model_to_save, 'movie_recommendation_model.joblib')
print("Model saved as movie_recommendation_model.joblib")



Model saved as movie_recommendation_model.joblib


In [33]:
# Frontend (improvements: movie title mapping, input validation)

loaded_model = joblib.load('movie_recommendation_model.joblib')
svd = loaded_model['svd']
Q = loaded_model['Q']
approx_ratings = loaded_model['approx_ratings']
ratings_matrix = loaded_model['ratings_matrix']
user_means = loaded_model['user_means']

# Get movie titles from the original data
movie_titles = pd.read_csv('http://files.grouplens.org/datasets/movielens/ml-100k/u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1])
movie_titles.columns = ['MovieId', 'Title']

user_id_input = widgets.IntText(description="User ID:")
recommend_button = widgets.Button(description="Get Recommendations")
output_area = widgets.Output()



In [34]:
def on_recommend_button_clicked(b):
    with output_area:
        clear_output()
        user_id = user_id_input.value
        if 1 <= user_id <= len(ratings_matrix):  # Check if the user ID is valid
            svd_recommendations = recommend(user_id, 5)
            q_learning_recommendations = np.argsort(Q[user_id - 1])[::-1][:5]
            print("Recommended Movies (SVD):")
            for movie_id in svd_recommendations:
                movie_title = movie_titles[movie_titles['MovieId'] == movie_id]['Title'].values
                if len(movie_title) > 0:  #Handle cases where movie_id is not found in movie_titles
                    print(f"- {movie_title[0]}")
                else:
                   print(f"- Movie ID: {movie_id} (Title not found)")

            print("\nRecommended Movies (Q-Learning):")
            for movie_id in q_learning_recommendations:
                movie_title = movie_titles[movie_titles['MovieId'] == movie_id + 1]['Title'].values  #Add 1 because movie ids are indexed at 1
                if len(movie_title) > 0:
                    print(f"- {movie_title[0]}")
                else:
                    print(f"- Movie ID: {movie_id + 1} (Title not found)")
        else:
            print(f"Invalid User ID. Please enter a valid ID between 1 and {len(ratings_matrix)}")



In [35]:
recommend_button.on_click(on_recommend_button_clicked)
display(user_id_input, recommend_button, output_area)

IntText(value=0, description='User ID:')

Button(description='Get Recommendations', style=ButtonStyle())

Output()

In [None]:
# prompt: Why the both systems are recommending totally different movies for the same user?
# "Recommended Movies (SVD):
# - Rear Window (1954)
# - Chinatown (1974)
# - Manchurian Candidate, The (1962)
# - Singin' in the Rain (1952)
# - Event Horizon (1997)
# Recommended Movies (Q-Learning):
# - Power 98 (1995)
# - SubUrbia (1997)
# - Wonderland (1997)
# - Palmetto (1998)
# - Midnight Dancers (Sibak) (1994)"

The discrepancy in movie recommendations between the SVD and Q-learning models stems from fundamental differences in how they operate:

1. **SVD (Singular Value Decomposition):**  SVD is a collaborative filtering method. It focuses on identifying latent factors in user-item interactions.  It finds patterns in the ratings matrix to predict how a user *would* rate movies they haven't seen, based on how similar users have rated them.  SVD excels at finding movies similar to what the user has already liked, based on the broader patterns in the data.  Its recommendations are primarily driven by *similarity* in user preferences across the entire dataset.

2. **Q-learning:** Q-learning is a reinforcement learning method.  Instead of directly analyzing existing ratings, it learns through simulated user interactions. The provided code simulates these interactions by randomly assigning rewards (ratings).  The quality of the Q-learning model's recommendations heavily relies on the effectiveness of this simulated feedback.  In the given code the reward function is simply assigning a random rating which doesn't provide much information to the agent.  It doesn't capture user preferences as accurately and is prone to suggesting movies that might not align with actual preferences.  The Q-learning recommendations are based on the *learned behavior* of the simulated agent which is not properly trained.

**Why the difference in this case?**
* **Poor Q-learning setup:** The provided code uses a very simple simulated reward function, which is not a good representation of true user preference.  Because of the random rewards the Q-learning model doesn't learn meaningful relationships between users and movies. The `simulate_user_feedback` function assigns a random rating rather than a more sophisticated simulation or learning function based on actual ratings.  This randomness leads to less consistent and meaningful recommendations.

* **SVD leveraging existing data:** SVD, on the other hand, is leveraging the available user rating data in the MovieLens dataset.  It will find and recommend movies that match general viewing patterns from similar users.

**How to improve Q-learning:**
* **Realistic Rewards:** A much better reward function would use actual user ratings from the training data or build a more comprehensive simulation model of user preferences. The reward should reflect the genuine 'goodness' of a movie choice relative to that particular user's profile.
* **Explore More (with Decaying Epsilon):** The `epsilon` parameter controls exploration.  While the code has epsilon decay, it might be beneficial to start with a higher `epsilon` and decay slower to ensure more exploration in the initial training phase.
* **More Episodes:** The number of episodes used for Q-learning is relatively low. Increase the number of training episodes to allow the Q-table to better converge.
* **More sophisticated state representation**:  Use additional information about users and movies as input into the Q-learning agent.

**Key takeaway:**  SVD is making recommendations based on the collective wisdom of the entire dataset.  Q-learning needs to learn from feedback that reflects the actual user preferences in order to be more effective. As the code stands, Q-learning is not learning well due to the poor reward structure.
