# Meno Topic Feedback System with Enhanced Visualizations

This notebook demonstrates how to use Meno's topic feedback system along with the enhanced visualization components to improve topic modeling results interactively.

In [None]:
import meno
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from IPython.display import display, HTML
from sklearn.datasets import fetch_20newsgroups
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Add path for importing
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import feedback components
from meno import SimpleFeedback, TopicFeedbackManager
from meno import plot_feedback_impact, plot_topic_feedback_distribution

## 1. Load Sample Data

We'll use a subset of the 20 Newsgroups dataset for this example.

In [None]:
# Load sample dataset
print("Loading 20 newsgroups dataset...")
newsgroups = fetch_20newsgroups(
    subset='train',
    categories=['alt.atheism', 'comp.graphics', 'sci.space'],
    remove=('headers', 'footers', 'quotes'),
    random_state=42
)

# Take a small sample for this example
sample_size = 300
indices = np.random.RandomState(42).choice(len(newsgroups.data), sample_size, replace=False)
documents = [newsgroups.data[i] for i in indices]
true_labels = [newsgroups.target[i] for i in indices]

# Print dataset info
print(f"Loaded {len(documents)} documents from {len(newsgroups.target_names)} categories")
print(f"Categories: {newsgroups.target_names}")

## 2. Create and Fit a Topic Model

We'll use a simple TF-IDF based topic model for quick iteration.

In [None]:
# Create a simple topic model
print("Creating and fitting topic model...")
model = meno.TFIDFTopicModel(n_topics=5)  # Intentionally using fewer topics than categories
model.fit(documents)

# Get document topics
doc_topics = model.get_document_topics()
topics = doc_topics["topic"].tolist()

print("\nTopic distribution:")
topic_counts = pd.Series(topics).value_counts().sort_index()
for topic, count in topic_counts.items():
    print(f"  Topic {topic}: {count} documents")

# Show top words for each topic
print("\nTop words per topic:")
for topic in range(model.n_topics):
    words = model.get_topic_words(topic, n_words=8)
    print(f"  Topic {topic}: {', '.join(words)}")

## 3. Set up Topic Feedback Manager

Now we'll set up the topic feedback manager to collect feedback on topic assignments.

In [None]:
# Create wordcloud HTML for topics
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from io import BytesIO
import base64

def get_wordcloud_html(topic_model, topic_id, width=300, height=200):
    # Get topic words with weights
    words = topic_model.get_topic_words(topic_id, n_words=30)
    # Create frequencies dict (using decreasing weights)
    word_freqs = {word: 30-i for i, word in enumerate(words)}
    
    # Create wordcloud
    wc = WordCloud(width=width, height=height, background_color='white',
                  max_words=30, prefer_horizontal=0.9, relative_scaling=0.5)
    wc.generate_from_frequencies(word_freqs)
    
    # Convert to image
    plt.figure(figsize=(width/100, height/100), dpi=100)
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    
    # Save to buffer
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
    buf.seek(0)
    plt.close()
    
    # Convert to base64 for HTML
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    return f'<img src="data:image/png;base64,{image_base64}" width="{width}" height="{height}">'

# Create wordclouds for each topic
topic_wordclouds = []
for topic in range(model.n_topics):
    topic_wordclouds.append(get_wordcloud_html(model, topic))

# Create topic descriptions
topic_descriptions = []
for topic in range(model.n_topics):
    words = model.get_topic_words(topic, n_words=5)
    topic_descriptions.append(f"Key words: {', '.join(words)}")

# Set up the feedback manager
feedback_manager = meno.TopicFeedbackManager(model)

# Create the feedback system with our custom visualizations
feedback_system = feedback_manager.setup_feedback(
    n_samples=15,  # Start with 15 documents for review
    uncertainty_ratio=0.7,  # 70% uncertain, 30% diverse
    uncertainty_method="entropy",
    topic_descriptions=topic_descriptions,
    topic_wordclouds=topic_wordclouds
)

print("Feedback system ready. Let's start the review process!")

## 4. Collect Feedback (Session 1)

Now we'll start a feedback session to review and correct topic assignments.

In [None]:
# Display topic information for reference
feedback_system.display_topics()

# Start review session
feedback_system.start_review()

## 5. Apply Feedback and View Results

After providing feedback, we'll apply the changes and see the impact on the topic model.

In [None]:
# Display summary of feedback
feedback_system.display_summary()

# Apply the feedback to update the model
updated_topics = feedback_system.apply_updates()

# Save original topics before applying the next session
original_topics = model.get_document_topics()["topic"].tolist().copy()

## 6. Visualize Feedback Impact (Session 1)

Let's visualize the impact of our first feedback session using the new visualization tools.

In [None]:
# Create a comprehensive feedback impact visualization
fig = plot_feedback_impact(feedback_manager)
plt.figure(fig.number)
plt.show()

## 7. Collect More Feedback (Session 2)

Let's do another round of feedback, focusing on different documents.

In [None]:
# Start a new feedback session
feedback_system = feedback_manager.setup_feedback(
    n_samples=10,  # Fewer samples this time
    uncertainty_ratio=0.5,  # 50-50 split between uncertain and diverse
    topic_descriptions=topic_descriptions,
    topic_wordclouds=topic_wordclouds
)

# Quickly show topics and start review
feedback_system.display_topics()
feedback_system.start_review()

## 8. Apply Second Round of Feedback

In [None]:
# Display summary and apply feedback
feedback_system.display_summary()
updated_topics = feedback_system.apply_updates()

# Get current topics after feedback
current_topics = model.get_document_topics()["topic"].tolist()

## 9. Visualize Cumulative Feedback Impact

Now let's see the impact of both feedback sessions.

In [None]:
# Create a comprehensive feedback impact visualization
fig = plot_feedback_impact(feedback_manager, figsize=(14, 10))
plt.figure(fig.number)
plt.show()

## 10. Topic-Specific Feedback Visualization

Let's also use the specialized topic feedback distribution visualization to see which topics were most affected.

In [None]:
# Visualize topic-specific changes
fig = plot_topic_feedback_distribution(
    model,
    documents,
    original_topics,
    current_topics,
    figsize=(12, 8),
    show_wordclouds=True
)
plt.figure(fig.number)
plt.show()

## 11. Evaluate Final Model

Let's evaluate how well our topic model now aligns with the true categories.

In [None]:
# Create a dataframe with document info
doc_df = pd.DataFrame({
    "original_topic": original_topics,
    "final_topic": current_topics,
    "true_category": true_labels,
    "true_category_name": [newsgroups.target_names[i] for i in true_labels],
    "changed": [o != c for o, c in zip(original_topics, current_topics)]
})

# Show the distribution of true categories across topics
print("Distribution of true categories across topics:")
topic_category_table = pd.crosstab(
    doc_df["final_topic"], 
    doc_df["true_category_name"], 
    normalize="index"
) * 100

display(topic_category_table.round(1))

# Summary of changes
print(f"\nTotal documents changed: {doc_df['changed'].sum()} ({doc_df['changed'].mean():.1%})")

# Analyze changes by category
print("\nChanges by true category:")
category_changes = doc_df.groupby("true_category_name")["changed"].agg(["count", "sum", lambda x: x.mean() * 100])
category_changes.columns = ["Total", "Changed", "Percent Changed"]
display(category_changes.round(1))

## 12. Conclusion

In this notebook, we've demonstrated how to:

1. Set up a topic feedback system with Meno
2. Collect and apply feedback to improve topic assignments
3. Visualize the impact of feedback using the new visualization components
4. Evaluate the improvements made through interactive feedback

The combination of interactive feedback and informative visualizations makes it easier to understand and improve topic models.