In [6]:
import pandas as pd

# تحميل البيانات المنظفة
data = pd.read_csv("processed_movielens_100k.csv")

# -------------------------
# STEP 2: Prepare TF-IDF
# -------------------------
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['genres'])

# -------------------------
# STEP 3: Build Ratings Matrix and SVD
# -------------------------
from sklearn.decomposition import TruncatedSVD

ratings_matrix = data.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
svd = TruncatedSVD(n_components=20, random_state=42)
latent_matrix = svd.fit_transform(ratings_matrix)
predicted_ratings = pd.DataFrame(svd.inverse_transform(latent_matrix), index=ratings_matrix.index, columns=ratings_matrix.columns)

# -------------------------
# STEP 4: Hybrid Recommender
# -------------------------
def hybrid_recommend(user_id, input_genre, data, tfidf, tfidf_matrix, predicted_ratings, alpha=0.7, top_n=10):
    input_vector = tfidf.transform([input_genre.replace('|', ' ').lower()])
    content_similarities = cosine_similarity(input_vector, tfidf_matrix).flatten()
    
    user_svd_scores = predicted_ratings.loc[user_id]
    watched = data[data['userId'] == user_id]['movieId'].unique()
    
    hybrid_scores = []
    for idx in range(len(data)):
        movie_id = data.iloc[idx]['movieId']
        if movie_id in watched:
            continue
        
        content_score = content_similarities[idx]
        svd_score = user_svd_scores.get(movie_id, 0)
        final_score = alpha * svd_score + (1 - alpha) * content_score
        hybrid_scores.append((idx, final_score))
    
    # ترتيب النتائج
    hybrid_scores.sort(key=lambda x: x[1], reverse=True)
    top_indices = [idx for idx, _ in hybrid_scores[:top_n]]
    
    recommendations = data.iloc[top_indices][['title', 'genres']].drop_duplicates()
    return recommendations

# -------------------------
# STEP 5: Try it!
# -------------------------
# مثال تجربة
hybrid_recommend(user_id=10, input_genre="action adventure sci-fi", 
                 data=data, tfidf=tfidf, tfidf_matrix=tfidf_matrix, 
                 predicted_ratings=predicted_ratings, alpha=0.7, top_n=5)

Unnamed: 0,title,genres
909,Pirates of the Caribbean: The Curse of the Bla...,Action|Adventure|Comedy|Fantasy
