# Quick Start Guide

This notebook demonstrates the basic usage of NBTM for topic modeling.

## Contents
1. Create a topic model
2. Prepare sample data
3. Train the model
4. View results
5. Basic visualization

In [None]:
# Import necessary modules
from nbtm.models import create_model, get_available_models
from nbtm.data import Corpus, TextPreprocessor
from nbtm.evaluation import compute_coherence, compute_topic_diversity
from nbtm.visualization import plot_topic_words, plot_topic_wordcloud

## 1. Available Models

Check what models are available in NBTM.

In [None]:
# List available models
print("Available models:")
for name, desc in get_available_models().items():
    print(f"  - {name}: {desc}")

## 2. Sample Data

We'll create sample documents for demonstration. In practice, you would load your own corpus.

In [None]:
# Sample documents (already tokenized)
documents = [
    ["machine", "learning", "algorithm", "data", "model", "training"],
    ["deep", "learning", "neural", "network", "layer", "training"],
    ["python", "programming", "code", "function", "class", "module"],
    ["data", "analysis", "statistics", "probability", "distribution"],
    ["neural", "network", "deep", "learning", "optimization", "gradient"],
    ["bayesian", "inference", "prior", "posterior", "probability", "distribution"],
    ["topic", "model", "document", "word", "distribution", "lda"],
    ["machine", "learning", "classification", "regression", "prediction"],
    ["programming", "python", "library", "package", "development"],
    ["statistics", "hypothesis", "testing", "confidence", "interval"],
    ["natural", "language", "processing", "text", "word", "embedding"],
    ["optimization", "gradient", "descent", "loss", "function", "convergence"],
]

print(f"Number of documents: {len(documents)}")
print(f"Sample document: {documents[0]}")

## 3. Create and Train Model

Create a Gibbs Sampling LDA model and train it on our documents.

In [None]:
# Create model
model = create_model(
    "lda_gibbs",
    num_topics=3,
    alpha=0.1,
    beta=0.01,
    random_state=42
)

print(model)

In [None]:
# Train model
model.fit(documents, num_iterations=500)

print(f"Training complete!")
print(f"Final log-likelihood: {model.log_likelihood():.2f}")

## 4. View Results

Examine the learned topics and document-topic distributions.

In [None]:
# Print topics
print("Learned Topics:")
print("=" * 50)
model.print_topics(top_n=5)

In [None]:
# Get topic words programmatically
topics = model.get_all_topic_words(top_n=5)

for i, topic in enumerate(topics):
    words = [word for word, prob in topic]
    print(f"Topic {i}: {', '.join(words)}")

In [None]:
# Get document-topic distributions
doc_topics = model.get_document_topics()

print(f"Document-topic matrix shape: {doc_topics.shape}")
print(f"\nFirst document topic distribution: {doc_topics[0]}")

## 5. Evaluation

Evaluate the model using coherence and diversity metrics.

In [None]:
# Compute coherence
coherence = compute_coherence(model, documents, measure="umass")
print(f"Topic Coherence (UMass): {coherence:.4f}")

# Compute diversity
diversity = compute_topic_diversity(model, top_n=10)
print(f"Topic Diversity: {diversity:.4f}")

## 6. Visualization

Visualize the learned topics.

In [None]:
# Plot topic words
fig = plot_topic_words(model, top_n=5)
fig.show()

In [None]:
# Plot word cloud for a topic
fig = plot_topic_wordcloud(model, topic_id=0)
fig.show()

## 7. Save and Load Model

Save the trained model for later use.

In [None]:
# Save model
model.save("outputs/quickstart_model.pkl")
print("Model saved!")

# Load model
from nbtm.models import GibbsLDA
loaded_model = GibbsLDA.load("outputs/quickstart_model.pkl")
print(f"Model loaded: {loaded_model}")

## Next Steps

- See `02_model_comparison.ipynb` for comparing different models
- See `03_full_tutorial.ipynb` for a complete workflow with real data