# EXP NO 3- Implement User Profile Learning using python to design a recommender system

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
file_path = "/content/movies.csv"
df = pd.read_csv(file_path)

# Generate sample user ratings for a subset of movies
np.random.seed(42)
sample_users = [1, 2, 3, 4, 5]
sample_movies = df.sample(n=10, random_state=42)  # Pick 10 random movies

ratings_data = []
for user in sample_users:
    for _, row in sample_movies.iterrows():
        ratings_data.append([user, row['movieId'], np.random.randint(1, 6), row['genres']])

# Create a DataFrame for user ratings
ratings_df = pd.DataFrame(ratings_data, columns=['user_id', 'item_id', 'rating', 'item_features'])

# Step 1: Build User-Item Interaction Matrix
user_item_matrix = ratings_df.pivot_table(index='user_id', columns='item_id', values='rating', fill_value=0)

# Step 2: Content-Based Filtering - Build Item Profiles
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(ratings_df.drop_duplicates(subset='item_id')['item_features'])

# Compute item similarity matrix
item_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 3: Generate User Profiles
def build_user_profiles(user_item_matrix, item_similarity):
    user_profiles = {}
    for user_id in user_item_matrix.index:
        user_ratings = user_item_matrix.loc[user_id].values
        weighted_avg_profile = np.dot(user_ratings, item_similarity) / (np.sum(user_ratings) + 1e-10)
        user_profiles[user_id] = weighted_avg_profile
    return user_profiles

user_profiles = build_user_profiles(user_item_matrix, item_similarity)

# Step 4: Make Recommendations
def recommend_items(user_id, user_profiles, item_similarity, item_ids, top_n=3):
    user_profile = user_profiles[user_id]
    scores = np.dot(user_profile, item_similarity)
    item_scores = dict(zip(item_ids, scores))
    recommended_items = sorted(item_scores, key=item_scores.get, reverse=True)[:top_n]
    return recommended_items

# Recommend items for a user
item_ids = ratings_df.drop_duplicates(subset='item_id')['item_id'].values
recommendations = recommend_items(1, user_profiles, item_similarity, item_ids)

# Display recommended movies
recommended_movies = df[df['movieId'].isin(recommendations)][['movieId', 'title', 'genres']]
print("Recommended Movies for User 1:")
print(recommended_movies)


Recommended Movies for User 1:
      movieId                              title        genres
1111     1446               Kolya (Kolja) (1996)  Comedy|Drama
5353     8920           Country Girl, The (1954)         Drama
6213    45635  Notorious Bettie Page, The (2005)         Drama
