# LLM Visualization Exploration

Interactive notebook for exploring LLM concepts through visualizations.

In [None]:
import sys
sys.path.append('..')

import numpy as np
import matplotlib.pyplot as plt
from src.tokenization import tokenize, get_token_frequencies
from src.embeddings import create_similarity_matrix
from src.sampling import temperature_entropy_analysis

%matplotlib inline

## 1. Tokenization Example

In [None]:
text = "Large language models generate text token by token."
tokens = tokenize(text)
print(f"Tokens: {tokens}")
print(f"Token count: {len(tokens)}")

## 2. Temperature vs Entropy

In [None]:
temps, entropies = temperature_entropy_analysis(vocab_size=30)

plt.figure(figsize=(8, 5))
plt.plot(temps, entropies, marker='o', linewidth=2)
plt.xlabel('Temperature')
plt.ylabel('Entropy (bits)')
plt.title('Temperature vs Output Entropy')
plt.grid(True, alpha=0.3)
plt.show()

## 3. Word Similarity Matrix

In [None]:
words = ['model', 'neural', 'network', 'token', 'embedding', 'attention']
sim_matrix = create_similarity_matrix(words, dimensions=32)

plt.figure(figsize=(8, 6))
plt.imshow(sim_matrix, cmap='coolwarm', vmin=-1, vmax=1)
plt.colorbar(label='Cosine Similarity')
plt.xticks(range(len(words)), words, rotation=45, ha='right')
plt.yticks(range(len(words)), words)
plt.title('Word Similarity Heatmap')
plt.tight_layout()
plt.show()

## 4. Try Your Own Text

In [None]:
# Experiment with your own text here!
custom_text = "..."
custom_tokens = tokenize(custom_text)
frequencies = get_token_frequencies(custom_tokens, top_k=10)

# Plot frequencies
words, counts = zip(*frequencies) if frequencies else ([], [])
plt.figure(figsize=(10, 5))
plt.bar(words, counts)
plt.xlabel('Tokens')
plt.ylabel('Frequency')
plt.title('Token Frequency Distribution')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()