In [1]:
import pymongo
print("installed")

installed


In [9]:
from pymongo import MongoClient
import random
import string

# MongoDB connection
client = MongoClient("mongodb://localhost:27017/")
db = client["your_db"]
posts_col = db["posts"]
users_col = db["users"]

# Clear previous data
posts_col.delete_many({})
users_col.delete_many({})

# Tags pool
tags_pool = ["travel", "sports", "fashion", "technology", "health", "education", "entertainment", "news", "music", "finance"]

# Generating random strings
def random_text(word_count=5):
    return ' '.join(''.join(random.choices(string.ascii_lowercase, k=random.randint(4, 8))) for _ in range(word_count))

# Creating 200 sample posts
sample_posts = []
for _ in range(200):
    title = random_text(4).title()
    description = random_text(10)
    tags = random.sample(tags_pool, k=random.randint(1, 3))
    sample_posts.append({"title": title, "description": description, "tags": tags})

posts_col.insert_many(sample_posts)
print("200 posts inserted.")

# Creating 100 users with liked posts and preferences
all_posts = list(posts_col.find())
for i in range(100):
    user_id = f"user{i:03d}"
    preferred_tags = random.sample(tags_pool, k=2)
    liked_posts = [str(random.choice(all_posts)["_id"]) for _ in range(random.randint(1, 5))]
    users_col.insert_one({
        "user_id": user_id,
        "preferred_tags": preferred_tags,
        "liked_posts": liked_posts
    })

print("100 users inserted.")

200 posts inserted.
100 users inserted.


In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_for_user(user_id, top_n=5):
    user = users_col.find_one({"user_id": user_id})
    if not user:
        print("❌ User not found.")
        return

    liked_post_ids = user.get("liked_posts", [])
    preferred_tags = user.get("preferred_tags", [])

    all_posts = list(posts_col.find({}, {"_id": 1, "title": 1, "description": 1, "tags": 1}))
    post_ids = [str(post["_id"]) for post in all_posts]
    post_texts = [f"{post.get('title', '')} {post.get('description', '')}" for post in all_posts]

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(post_texts)

    liked_indexes = [i for i, pid in enumerate(post_ids) if pid in liked_post_ids]
    if not liked_indexes:
        tag_filtered_indexes = [
            i for i, post in enumerate(all_posts)
            if any(tag in post.get("tags", []) for tag in preferred_tags)
        ]
        if not tag_filtered_indexes:
            print(" No preference matches. Showing random content.")
            return
        liked_vector = tfidf_matrix[tag_filtered_indexes].mean(axis=0)
    else:
        liked_vector = tfidf_matrix[liked_indexes].mean(axis=0)

    liked_vector = np.asarray(liked_vector).reshape(1, -1)
    similarities = cosine_similarity(tfidf_matrix, liked_vector)

    recommend_indexes = [
        i for i in np.argsort(similarities.ravel())[::-1]
        if post_ids[i] not in liked_post_ids
    ]

    print(f"\n🔮 Recommendations for user: {user_id}\n")
    for idx in recommend_indexes[:top_n]:
        print(f"- {all_posts[idx]['title']} (Tags: {all_posts[idx].get('tags', [])})")