In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
file_path = "/content/EdX.csv"
df = pd.read_csv(file_path)

# Generate sample user ratings for a subset of courses
np.random.seed(42)
sample_users = [1, 2, 3, 4, 5]
sample_courses = df.sample(n=10, random_state=42)  # Pick 10 random courses

ratings_data = []
for user in sample_users:
    for _, row in sample_courses.iterrows():
        ratings_data.append([user, row['Name'], np.random.randint(1, 6), row['Course Description']])

# Create a DataFrame for user ratings
ratings_df = pd.DataFrame(ratings_data, columns=['user_id', 'item_id', 'rating', 'item_features'])

# Step 1: Build User-Item Interaction Matrix
user_item_matrix = ratings_df.pivot_table(index='user_id', columns='item_id', values='rating', fill_value=0)

# Step 2: Content-Based Filtering - Build Item Profiles
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(ratings_df.drop_duplicates(subset='item_id')['item_features'])

# Compute item similarity matrix
item_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 3: Generate User Profiles
def build_user_profiles(user_item_matrix, item_similarity):
    user_profiles = {}
    for user_id in user_item_matrix.index:
        user_ratings = user_item_matrix.loc[user_id].values
        weighted_avg_profile = np.dot(user_ratings, item_similarity) / (np.sum(user_ratings) + 1e-10)
        user_profiles[user_id] = weighted_avg_profile
    return user_profiles

user_profiles = build_user_profiles(user_item_matrix, item_similarity)

# Step 4: Make Recommendations
def recommend_items(user_id, user_profiles, item_similarity, item_ids, top_n=3):
    user_profile = user_profiles[user_id]
    scores = np.dot(user_profile, item_similarity)
    item_scores = dict(zip(item_ids, scores))
    recommended_items = sorted(item_scores, key=item_scores.get, reverse=True)[:top_n]
    return recommended_items

# Recommend items for a user
item_ids = ratings_df.drop_duplicates(subset='item_id')['item_id'].values
recommendations = recommend_items(1, user_profiles, item_similarity, item_ids)

# Display recommended courses
recommended_courses = df[df['Name'].isin(recommendations)][['Name', 'University', 'Difficulty Level', 'Link']]
print("Recommended Courses for User 1:")
print(recommended_courses)

Recommended Courses for User 1:
                                                  Name  \
145                 Success: Practical Thinking Skills   
340  First Nights - Monteverdi’s L’Orfeo and the Bi...   
453        Critical thinking: reasoned decision making   

                               University Difficulty Level  \
145  The Hong Kong Polytechnic University         Beginner   
340                    Harvard University         Beginner   
453              Tecnológico de Monterrey         Beginner   

                                                  Link  
145  https://www.edx.org/course/practical-thinking-...  
340  https://www.edx.org/course/first-nights-montev...  
453  https://www.edx.org/course/critical-thinking-r...  
