Case Study Notebook: Recommendation Systems
1. Problem and Objective:
   - Dataset: MovieLens 100K dataset (user-movie ratings)
   - Goal: Implement and compare various recommendation system techniques

2. Data Loading and Preprocessing:

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load data
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
movies = pd.read_csv('ml-100k/u.item', sep='|', encoding='latin-1', header=None, names=['movie_id', 'title'] + [f'genre_{i}' for i in range(19)])

# Split data
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)

print(f"Users: {ratings['user_id'].nunique()}, Movies: {ratings['movie_id'].nunique()}, Ratings: {len(ratings)}")


3. Recommendation Techniques:

a. Content-Based Filtering:

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def content_based_recommender(user_id, n=10):
    user_ratings = ratings[ratings['user_id'] == user_id]
    user_movies = movies[movies['movie_id'].isin(user_ratings['movie_id'])]
    user_profile = user_movies.iloc[:, 2:].mean()
    
    movie_similarities = cosine_similarity(movies.iloc[:, 2:], [user_profile])
    similar_movies = movies.iloc[movie_similarities.argsort()[0][::-1]]
    
    return similar_movies['movie_id'].head(n).tolist()

# Test content-based recommender
print(content_based_recommender(1, n=5))

b. Collaborative Filtering:

In [None]:
from surprise import SVD, Dataset, Reader

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['user_id', 'movie_id', 'rating']], reader)

svd = SVD()
trainset = data.build_full_trainset()
svd.fit(trainset)

def collaborative_filtering_recommender(user_id, n=10):
    user_movies = ratings[ratings['user_id'] == user_id]['movie_id']
    all_movies = ratings['movie_id'].unique()
    movies_to_predict = np.setdiff1d(all_movies, user_movies)
    
    predictions = [svd.predict(user_id, movie_id).est for movie_id in movies_to_predict]
    top_n = np.argsort(predictions)[::-1][:n]
    
    return movies_to_predict[top_n].tolist()

# Test collaborative filtering recommender
print(collaborative_filtering_recommender(1, n=5))

c. Hybrid Method:

In [None]:
def hybrid_recommender(user_id, n=10):
    content_based_recs = content_based_recommender(user_id, n=n)
    collab_filtering_recs = collaborative_filtering_recommender(user_id, n=n)
    
    hybrid_recs = []
    for i in range(n):
        if i % 2 == 0 and content_based_recs:
            hybrid_recs.append(content_based_recs.pop(0))
        elif collab_filtering_recs:
            hybrid_recs.append(collab_filtering_recs.pop(0))
    
    return hybrid_recs

# Test hybrid recommender
print(hybrid_recommender(1, n=5))

d. Deep Learning for Recommendations:

In [None]:
import torch
import torch.nn as nn

class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=100):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        self.fc1 = nn.Linear(embedding_size*2, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        
    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.output(x)

# Initialize and train the model (code omitted for brevity)

def deep_learning_recommender(user_id, n=10):
    # Implementation using trained NCF model
    pass

# Test deep learning recommender
# print(deep_learning_recommender(1, n=5))

e. Context-Aware Recommendations:

In [None]:
def time_based_recommender(user_id, timestamp, n=10):
    user_ratings = ratings[(ratings['user_id'] == user_id) & (ratings['timestamp'] <= timestamp)]
    recent_movies = user_ratings.sort_values('timestamp', ascending=False)['movie_id'].head(5).tolist()
    
    similar_movies = []
    for movie in recent_movies:
        similar_movies.extend(content_based_recommender(movie, n=2))
    
    return list(dict.fromkeys(similar_movies))[:n]

# Test context-aware recommender
print(time_based_recommender(1, ratings['timestamp'].max(), n=5))

4. Evaluation Metrics:

In [None]:

from sklearn.metrics import mean_squared_error
from math import sqrt

def rmse(predictions, targets):
    return sqrt(mean_squared_error(predictions, targets))

def precision_at_k(recommended_items, relevant_items, k):
    recommended_items = set(recommended_items[:k])
    relevant_items = set(relevant_items)
    return len(recommended_items.intersection(relevant_items)) / k

# Evaluate models (example for collaborative filtering)
test_set = [tuple(x) for x in test_data[['user_id', 'movie_id', 'rating']].values]
predictions = svd.test(test_set)
rmse_score = rmse([pred.est for pred in predictions], [true_r for (_, _, true_r) in test_set])
print(f"RMSE for Collaborative Filtering: {rmse_score}")

5. Cold Start Problem:

In [None]:
def cold_start_recommender(new_user_genres):
    genre_similarity = cosine_similarity([new_user_genres], movies.iloc[:, 2:])
    similar_movies = movies.iloc[genre_similarity.argsort()[0][::-1]]
    return similar_movies['movie_id'].head(10).tolist()

# Test cold start recommender
new_user_genres = [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  # Example genre preferences
print(cold_start_recommender(new_user_genres))

In [None]:

6. Final Analysis:
   - Compare the performance of different recommendation techniques
   - Discuss the trade-offs between accuracy, diversity, and computational efficiency
   - Analyze the effectiveness of the cold start strategy
   - Consider ethical implications and potential biases in the recommendation systems
