In [None]:
# =====================================
# Step 5: Semantic Search Recommender
# =====================================

# 1. Install and import dependencies
!pip install sentence-transformers pandas

import pandas as pd
from sentence_transformers import SentenceTransformer, util
import matplotlib.pyplot as plt


Recommendation using Knowledge Base 2


In [None]:
# from google.colab import files
import pandas as pd

# Prompt to upload files (you can select multiple)
# uploaded = files.upload()

# After uploading, you can read them like this:
kb = pd.read_csv("recommendation_knowledge_base.csv")  # update filename if different
topic_labels = pd.read_excel("reddit_topics_labeled.xlsx")

In [None]:
# -----------------------------------------------------
# 1Ô∏è Load Knowledge Base (Updated for New CSV)
# -----------------------------------------------------
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import matplotlib.pyplot as plt

kb = pd.read_csv("recommendation_knowledge_base.csv")
kb['llm_parent'] = kb['llm_parent'].fillna("Unknown").astype(str)
kb['topic_label'] = kb['topic_label'].astype(str)
print(f"‚úÖ Knowledge base loaded: {kb.shape[0]} rows")

# -----------------------------------------------------
# 2Ô∏è Load topic labels (BERTopic output) ‚Äî keep this part intact
# -----------------------------------------------------
topic_labels = pd.read_excel("reddit_topics_labeled.xlsx")
print("Columns in topic label file:", topic_labels.columns.tolist())

# Automatically detect best column pair
topic_col = [c for c in topic_labels.columns if 'topic' in c.lower()][0]
name_col_candidates = [c for c in topic_labels.columns if any(k in c.lower() for k in ['name','represent','label','desc'])]
name_col = name_col_candidates[0] if name_col_candidates else topic_col

topic_map = topic_labels[[topic_col, name_col]].dropna().copy()
topic_map.columns = ['Topic', 'Name']
topic_map['Topic'] = topic_map['Topic'].astype(str)
topic_dict = dict(zip(topic_map['Topic'], topic_map['Name']))

# Map topic names to knowledge base
kb['topic_name'] = kb['topic_label'].map(topic_dict).fillna(kb['topic_label'])

# -----------------------------------------------------
# 3Ô∏è Load the same model used in BERTopic
# -----------------------------------------------------
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create topic embeddings (based on the topic labels in KB)
topics = kb['topic_label'].unique().tolist()
topic_embeddings = model.encode(topics, convert_to_tensor=True, show_progress_bar=True)

# -----------------------------------------------------
# 4Ô∏è Define recommender (using new columns)
# -----------------------------------------------------
def recommend_llm(user_query, kb, model, topics, topic_embeddings):
    llm_col = 'llm_parent'  # updated
    score_col = 'positive_experience_score'  # updated

    # Encode query
    query_emb = model.encode(user_query, convert_to_tensor=True)
    sims = util.cos_sim(query_emb, topic_embeddings)[0]

    # Get best topic match
    best_idx = sims.argmax().item()
    best_topic = topics[best_idx]
    best_score = sims[best_idx].item()

    topic_name = kb[kb['topic_label'] == best_topic]['topic_name'].iloc[0]

    print("\n" + "="*70)
    print(f"üí¨ USER QUERY: {user_query}")
    print(f"üîç Matched Topic ‚Üí {topic_name}  (ID: {best_topic},  Similarity: {best_score:.2f})")

    # Filter and sort
    subset = kb[kb['topic_label'] == best_topic].copy()
    subset = subset.sort_values(score_col, ascending=False)
    subset[llm_col] = subset[llm_col].fillna("Unknown").astype(str)

    if subset.empty:
        print("‚ö†Ô∏è  No LLM data found for this topic.")
        return

    print(f"\nüìä  Top LLM recommendations for **{topic_name}**:\n")
    for rank, (_, row) in enumerate(subset.iterrows(), start=1):
        print(f"  {rank}. {row[llm_col]} ‚Äî {row[score_col]:.1f}% positive")

    # Visualization
    plt.figure(figsize=(8,4))
    plt.barh(subset[llm_col], subset[score_col], color="#4C72B0")
    plt.xlabel('Positive Experience (%)')
    plt.title(f"Best LLMs for '{topic_name}'")
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()

# -----------------------------------------------------
# 5Ô∏è Interactive Loop
# -----------------------------------------------------
while True:
    user_input = input("\nüí° Enter your use case (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        print("üëã Exiting Recommender. Goodbye!")
        break
    recommend_llm(user_input, kb, model, topics, topic_embeddings)
