To create a topic-based post recommendation system using Generative AI, you can follow the outlined approach:

User Topic Profiling:
Objective: Create a user profile based on their interests, derived from their posts.

Tokenization and Embedding:

Tokenize and embed user posts using a Generative AI model such as GPT-4/3.5.
Extract contextual embeddings for each word in the post.
Topic Extraction:

Utilize the contextual embeddings to identify key topics within each user post.
Limit the number of topics to 20, with a maximum of 10 topics per user post.
Post Topic Profiling:
Objective: Assign topics to each post.

Tokenization and Embedding:

Tokenize and embed each post using the same Generative AI model.
Extract contextual embeddings for each word in the post.
Topic Extraction:

Use contextual embeddings to identify key topics within each post.
Limit the number of topics to 20, matching the user profile limit.
Recommendation Engine:
Objective: Match posts with the top 10 most relevant users based on their interests.


In [None]:
# Load training and testing datasets
with open("content/Reddit_data_train.json", 'r') as f_train, open("content/Reddit_data_test.json", 'r') as f_test:
    train_data = json.load(f_train)
    test_data = json.load(f_test)

# Explore the structure of the datasets
print("Training Data Sample:")
if isinstance(train_data, list):
    print(train_data[:2])
else:
    # Print the first two elements if it's not a list
    for i, (key, value) in enumerate(train_data.items()):
        print(f"{key}: {value}")
        if i == 1:
            break

print("\nTesting Data Sample:")
if isinstance(test_data, list):
    print(test_data[:2])
else:
    # Print the first two elements if it's not a list
    for i, (key, value) in enumerate(test_data.items()):
        print(f"{key}: {value}")
        if i == 1:
            break

In [None]:
import tensorflow as tf
import numpy as np

# Step 1: User Topic Profiling
# Simplified user profiles
user_profiles = {
    "user1": ["Blockchain", "Cryptocurrency", "Technology"],
    "user2": ["Finance", "Investing", "Stocks"],
    # ... more user profiles ...
}

# Step 2: Post Topic Profiling
# Simplified post topics
post_topics = {
    "post1": ["Blockchain", "Cryptocurrency", "Technology"],
    "post2": ["Finance", "Investing", "Stocks"],
    # ... more post topics ...
}

# Step 3: Recommendation Engine
def recommend_posts(user_profile, post_topics):
    # Calculate topic relevance scores
    relevance_scores = {post: len(set(user_profile) & set(topics)) for post, topics in post_topics.items()}

    # Sort posts by relevance
    sorted_posts = sorted(relevance_scores.items(), key=lambda x: x[1], reverse=True)

    # Return top N recommended posts
    return sorted_posts[:5]  # Adjust the number of recommendations as needed

# Step 4: Evaluation
def evaluate_recommendations(predictions, ground_truth):
    # Simplified evaluation using Jaccard Similarity
    intersection = len(set(predictions) & set(ground_truth))
    union = len(set(predictions) | set(ground_truth))
    jaccard_similarity = intersection / union
    return jaccard_similarity

# Step 5: Deliverables
class RecommendationSystem:
    def __init__(self, user_profiles, post_topics):
        self.user_profiles = user_profiles
        self.post_topics = post_topics

    def recommend_posts_for_user(self, user_id):
        user_profile = self.user_profiles.get(user_id, [])
        if user_profile:
            return recommend_posts(user_profile, self.post_topics)
        else:
            return []


In [None]:

# Step 6: Run the System (Simplified)
if __name__ == "__main__":
    # Example usage
    system = RecommendationSystem(user_profiles, post_topics)
    user_id = "user1"
    recommendations = system.recommend_posts_for_user(user_id)
    print("Recommendations for {}: {}".format(user_id, recommendations))

    # Example Evaluation
    ground_truth = ["post1", "post2"]
    jaccard_similarity = evaluate_recommendations(recommendations, ground_truth)
    print("Jaccard Similarity:", jaccard_similarity)


In [None]:
pip install transformers


In [None]:
from transformers import GPT2Tokenizer, TFGPT2Model

# Step 7: Advanced Topic Extraction
class TopicExtractor:
    def __init__(self):
        self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
        self.model = TFGPT2Model.from_pretrained("gpt2")

    def extract_topics(self, text):
        # Tokenize input text
        inputs = self.tokenizer(text, return_tensors="tf")

        # Get model output
        outputs = self.model(inputs["input_ids"])

        # Extract topics from model output
        topics = self.tokenizer.batch_decode(outputs["last_hidden_state"][0].numpy(), skip_special_tokens=True)

        return topics

# Step 8: Enhance User Profiling with Topic Extraction
topic_extractor = TopicExtractor()

for user, posts in user_profiles.items():
    user_interests = []
    for post in posts:
        topics = topic_extractor.extract_topics(post)
        user_interests.extend(topics)
    user_profiles[user] = list(set(user_interests))[:10]  # Limit to top 10 topics per user



User-Post Matching:

Compare the identified topics of a post with the topics in user profiles.
Calculate a relevance score based on topic matches.
Top User Selection:

Identify the top 10 users with the highest relevance scores for each post.
Evaluation:
Metrics: Use Normalized Discounted Cumulative Gain (NDCG) and Jaccard similarity to evaluate the effectiveness of the recommendations.

NDCG:

Evaluate the ranking of recommended posts against the ground truth data.
Measure the quality of the recommendations using NDCG.
Jaccard Similarity:

Calculate Jaccard similarity between recommended users and users who interacted with the test post.
Deliverables:

In [None]:
# Step 9: Enhance Recommendation Engine
class RecommendationEngine:
    def __init__(self, user_profiles, posts):
        self.user_profiles = user_profiles
        self.posts = posts

    def recommend_posts(self, user_id):
        user_interests = self.user_profiles[user_id]

        # Find posts with shared topics
        recommended_posts = []
        for post_id, post_content in self.posts.items():
            post_topics = topic_extractor.extract_topics(post_content)
            common_topics = set(user_interests).intersection(post_topics)
            if common_topics:
                recommended_posts.append(post_id)

        return recommended_posts


In [None]:

# Step 10: Use the Enhanced Recommendation Engine
recommendation_engine = RecommendationEngine(user_profiles, posts)

# Example: Recommend posts for a specific user
user_id_to_recommend = "S1NCL41R"
recommended_posts = recommendation_engine.recommend_posts(user_id_to_recommend)
print(f"Recommended posts for user {user_id_to_recommend}: {recommended_posts}")
# Step 11: Evaluation Metrics
def evaluate_recommendations(ground_truth, recommendations):
    # Calculate Jaccard similarity
    def jaccard_similarity(set1, set2):
        intersection = len(set1.intersection(set2))
        union = len(set1.union(set2))
        return intersection / union if union != 0 else 0.0

    # Calculate NDCG
    def ndcg(relevance_order):
        dcg = sum(1.0 / math.log2(i + 2) if relevance_order[i] == 1 else 0.0 for i in range(len(relevance_order)))
        idcg = sum(1.0 / math.log2(i + 2) for i in range(len(relevance_order)))
        return dcg / idcg if idcg != 0 else 0.0

    total_jaccard_similarity = 0.0
    total_ndcg = 0.0

    for post_id, ground_truth_data in ground_truth.items():
        ground_truth_users = {data["user"] for data in ground_truth_data}
        recommended_users = {user_id for user_id in recommendations[post_id]}

        # Calculate Jaccard similarity for each post
        jaccard_sim = jaccard_similarity(ground_truth_users, recommended_users)
        total_jaccard_similarity += jaccard_sim

        # Create a relevance order for NDCG
        relevance_order = [1 if user_id in ground_truth_users else 0 for user_id in recommendations[post_id]]
        # Calculate NDCG for each post
        post_ndcg = ndcg(relevance_order)
        total_ndcg += post_ndcg

    # Average over all posts
    avg_jaccard_similarity = total_jaccard_similarity / len(ground_truth)
    avg_ndcg = total_ndcg / len(ground_truth)

    return avg_jaccard_similarity, avg_ndcg




In [None]:
# Step 12: Evaluate the Recommendation System
avg_jaccard_similarity, avg_ndcg = evaluate_recommendations(ground_truth, recommendations)
print(f"Average Jaccard Similarity: {avg_jaccard_similarity}")
print(f"Average NDCG: {avg_ndcg}")