In [1]:
import os
import sys
sys.path.append(os.path.abspath('..'))

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from transformer_lens import HookedTransformer
from transformer_lens import utils as tutils
from transformer_lens.evals import make_pile_data_loader, evaluate_on_dataset

import numpy as np


from functools import partial
from datasets import load_dataset
from tqdm import tqdm
import json

from typing import List, Callable, Union, Optional, Literal

import einops

from sae_lens import SAE

from steering.evals_utils import evaluate_completions, multi_criterion_evaluation
from steering.utils import normalise_decoder, text_to_sae_feats, top_activations
from steering.patch import generate, scores_2d, patch_resid


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

from IPython.display import HTML, display
import html


import numpy as np
torch.set_grad_enabled(False)

In [None]:
os.environ['GEMMA_2_SAE_WEIGHTS_ROOT'] = '/workspace/gemmasaes/'

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model: HookedTransformer = HookedTransformer.from_pretrained("google/gemma-2-9b-it", device=device, dtype=torch.float16)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Loaded pretrained model google/gemma-2-9b-it into HookedTransformer


In [None]:
chat = [[
    {"role": "user", "content": "Write a Hello world program"},
]]
tokens = model.tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors='pt')
tokens

tensor([[    2,   106,  1645,   108,  5559,   476, 25957,  2134,  2733,   107,
           108,   106,  2516,   108]])

In [None]:
SEQ_LEN = 128 - 4 + 1 # 4 for the special tokens at the start, 1 because we'll cut bos.

# Load in the data (it's a Dataset object)
data = load_dataset("NeelNanda/c4-code-20k", split="train")
# print(data)
# data = load_dataset("HuggingFaceFW/fineweb", name="CC-MAIN-2024-10", split="train", streaming=True)
# assert isinstance(data, Dataset)

In [None]:
# Tokenize the data (using a utils function) and shuffle it
tokenized_data = tutils.tokenize_and_concatenate(data, model.tokenizer, max_length=SEQ_LEN) # type: ignore
tokenized_data = tokenized_data.shuffle(42)

# Get the tokens as a tensor
all_tokens = tokenized_data["tokens"]
assert isinstance(all_tokens, torch.Tensor)

print(all_tokens.shape)

torch.Size([222989, 125])


In [None]:
sae_layer = 20
hp = f"blocks.{sae_layer}.hook_resid_post"
sae, _, _ = SAE.from_pretrained(
    release = "gemma-2-it-saes-old",
    sae_id = f"20/post_mlp_residual/16384/0_00045",
    device = 'cpu'
)
sae.to(device)

SAE(
  (activation_fn): ReLU()
  (hook_sae_input): HookPoint()
  (hook_sae_acts_pre): HookPoint()
  (hook_sae_acts_post): HookPoint()
  (hook_sae_output): HookPoint()
  (hook_sae_recons): HookPoint()
  (hook_sae_error): HookPoint()
)

# Feature Visualization

In [None]:
with open("../baselines/top_k_values_20.json", "r") as f:
    top_k_values = json.load(f)

with open("../baselines/top_k_indices_20.json", "r") as f:
    top_k_indices = json.load(f)


# load tokens
ready_tokens = torch.load("../baselines/ready_tokens_20.pt")

def acts_for_feature(tokens, feature_idx: int, batch_size: int = 4):
    n_samples, seq_len = tokens.shape
    all_acts = []
    for i in tqdm(range(0, n_samples, batch_size)):
        batch = tokens[i:i+batch_size]
        _, acts = model.run_with_cache(batch, names_filter=hp, stop_at_layer=sae_layer+1)
        acts = acts[hp]
        batch_sae_acts = []
        for sample in acts:
            sae_acts = sae.encode(sample)
            batch_sae_acts.append(sae_acts[:, feature_idx])
        
        batch_sae_acts = torch.stack(batch_sae_acts, dim=0)
        all_acts.append(batch_sae_acts)
    
    return torch.cat(all_acts, dim=0)

def display_highlighted_tokens(tokens, values, tokenizer):
    # Ensure tensors are on CPU
    tokens = tokens.cpu()
    values = values.cpu().to(torch.float32)
    
    original_values = values.clone()
    
    # Normalize values to range [0, 1] for coloring
    values = (values - values.min()) / (values.max() - values.min())
    
    # print(values)
    
    # Function to convert value to RGB color
    def value_to_color(value):
        # print(value)
        # print(value.item())
        # Using a gradient from white to red
        color_value = int(255 * (1 - value.item()))
        # print(color_value)
        return f"rgb(255, {color_value}, {color_value})"
    
    html_output = ""
    
    for batch_idx in range(tokens.shape[0]):
        # Use the original values for max activation
        max_activation = original_values[batch_idx].max().item()
        html_output += f"<p>Max Activation: {max_activation:.4f}:</p>"
        html_output += "<p>"
        
        for token, value in zip(tokens[batch_idx], values[batch_idx]):
            word = tokenizer.decode([token.item()])
            # Escape special characters to prevent HTML conflicts
            word_escaped = html.escape(word)
            color = value_to_color(value)
            html_output += f'<span style="background-color: {color};">{word_escaped}</span>'
        
        html_output += "</p>"
    
    display(HTML(html_output))

def visualize_feature(ft_id: int):
    samples = ready_tokens[top_k_indices[ft_id]]
    
    acts = acts_for_feature(samples, ft_id)
    # print(samples.shape)
    # print(acts.shape)
    # print(samples[0, :])
    # print(model.to_string(samples[0, :20]))
    # print(model.to_string(samples[0, 3:20]))
    display_highlighted_tokens(samples[:, 3:], acts[:, 3:], model.tokenizer)

visualize_feature(101)

  ready_tokens = torch.load("../baselines/ready_tokens_20.pt")
100%|██████████| 3/3 [00:00<00:00,  7.08it/s]


# Generate

In [None]:
text = "Tell me a story"
toks = model.tokenizer.apply_chat_template([{"role": "user", "content": text}], return_tensors='pt', add_generation_prompt=True)
print(toks)
toks = toks.expand(10, -1)
print(toks)


tensor([[    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108]])
tensor([[    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [    2,   106,  1645,   108, 27445,   682,   476,  3904,   107,   108,
           106,  2516,   108],
        [

In [None]:
def patch_resid(resid, hook, steering, scale=1):
    resid[:, :, :] = resid[:, :, :] + steering * scale
    return resid


@torch.no_grad()
def generate_it(
    model: HookedTransformer,
    hooks: list[tuple[str, Callable]], # includes the steering hook.
    prompt = "",
    n_samples=4,
    batch_size=4,
    max_new_tokens=35,
    top_k=50,
    top_p=0.3,
):
    tokens = model.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], return_tensors='pt', add_generation_prompt=True)
    
    # tokens = model.to_tokens(prompt, prepend_bos=True)
    prompt_batch = tokens.expand(batch_size, -1)

    results = []
    num_batches = (n_samples + batch_size - 1) // batch_size  # Calculate number of batches

    with model.hooks(fwd_hooks=hooks):
        for _ in range(num_batches):
            batch_results = model.generate(
                prompt_batch,
                # prepend_bos=True,
                # use_past_kv_cache=True,
                max_new_tokens=max_new_tokens,
                verbose=False,
                top_k=top_k,
                top_p=top_p,
                stop_at_eos=True,
            )
            batch_results = batch_results[:, 1:]  # cut bos
            str_results = model.to_string(batch_results)
            results.extend(str_results)
    return results[:n_samples]

# Cosine Similarity

In [None]:
import torch.nn.functional as F

# Normalize the embeddings along the embedding dimension
normalized_embeddings = F.normalize(sae.W_dec, p=2, dim=1)

# Calculate the cosine similarity matrix
cos_sim_matrix = torch.mm(normalized_embeddings, normalized_embeddings.t()).cuda()

def find_similar_features(target_idx, top_k=10, return_rows=True):
    """
    Finds the top-k most similar rows (or columns) to a target row (or column) in a cosine similarity matrix.

    Args:
        cos_sim_matrix (torch.Tensor): The cosine similarity matrix.
        target_idx (int): The index of the target row (or column) to compare against.
        top_k (int, optional): The number of most similar rows (or columns) to return. Default is 10.
        return_rows (bool, optional): If True, return the most similar rows. If False, return the most similar columns. Default is True.

    Returns:
        torch.Tensor: A tensor containing the indices of the top-k most similar rows (or columns).
        torch.Tensor: A tensor containing the similarity scores of the top-k most similar rows (or columns).
    """
    target_vector = cos_sim_matrix[target_idx] if return_rows else cos_sim_matrix[:, target_idx]
    similarities = target_vector if return_rows else target_vector.T

    return similarities

    topk_similarities, topk_indices = torch.topk(similarities, k=top_k, largest=True, sorted=True)

    if not return_rows:
        topk_indices = topk_indices.T

    return topk_indices, topk_similarities

In [None]:
similarities = find_similar_features(101, top_k=sae.W_dec.shape[0])

# Co-activating 

I want to see the features that often co-occur in the dataset

TODO: Compute with chat tempate

In [None]:
def get_activations(text, max_length=8192):
    # toks = model.tokenizer.apply_chat_template([{"role": "user", "content": text}], return_tensors='pt')[0]
    # toks = toks.to(device)
    
    # # Truncate if longer than max_length
    # if toks.shape[0] > max_length:
    #     toks = toks[:max_length]

    # print(toks)
    # print(model.to_string(toks))

    _, acts = model.run_with_cache(text, names_filter=hp)
 
    acts = acts[hp]
    print(acts.shape)
   
    all_sae_acts = []
    for batch in acts:
        sae_acts = sae.encode(batch)
        all_sae_acts.append(sae_acts)

    return torch.stack(all_sae_acts, dim=0)

In [None]:
get_activations("Describe what science is")

torch.Size([1, 5, 3584])


tensor([[[ 0.0000, 45.7500,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]],
       device='cuda:0', dtype=torch.bfloat16)

In [None]:
def process_multiple_examples(dataset, num_examples=1000):
    all_activations = []
    for i, text in enumerate(dataset[:num_examples]):
        activations = get_activations(text)
        # Remove the batch dimension (which is always 1) and add to list
        all_activations.append(activations.squeeze(0))
        print(f"Processed example {i+1}/{num_examples}, shape: {activations.shape}")

    # Concatenate all activations along the first dimension
    combined_activations = torch.cat(all_activations, dim=0)
    print(f"Combined shape: {combined_activations.shape}")
    return combined_activations

# Process multiple examples
num_examples = 100  # Adjust this number as needed
all_activations = process_multiple_examples(data["text"], num_examples)

torch.Size([1, 8192, 3584])
Processed example 1/100, shape: torch.Size([1, 8192, 16384])
torch.Size([1, 80, 3584])
Processed example 2/100, shape: torch.Size([1, 80, 16384])
torch.Size([1, 1366, 3584])
Processed example 3/100, shape: torch.Size([1, 1366, 16384])
torch.Size([1, 594, 3584])
Processed example 4/100, shape: torch.Size([1, 594, 16384])
torch.Size([1, 594, 3584])
Processed example 5/100, shape: torch.Size([1, 594, 16384])
torch.Size([1, 686, 3584])
Processed example 6/100, shape: torch.Size([1, 686, 16384])
torch.Size([1, 479, 3584])
Processed example 7/100, shape: torch.Size([1, 479, 16384])
torch.Size([1, 213, 3584])
Processed example 8/100, shape: torch.Size([1, 213, 16384])
torch.Size([1, 4898, 3584])
Processed example 9/100, shape: torch.Size([1, 4898, 16384])
torch.Size([1, 244, 3584])
Processed example 10/100, shape: torch.Size([1, 244, 16384])
torch.Size([1, 407, 3584])
Processed example 11/100, shape: torch.Size([1, 407, 16384])
torch.Size([1, 690, 3584])
Processed 

In [None]:
def compute_cooccurrence_matrix(tensor, top_k=10):
    # Flatten the first two dimensions
    flattened = tensor.view(-1, tensor.shape[-1])
    print(flattened.shape)
    
    # Convert to binary (1 for non-zero, 0 for zero)
    binary = (flattened != 0).float()
    print(binary.shape)
    
    # Compute co-occurrence matrix
    cooccurrence = torch.mm(binary.t(), binary)
    
    # # Set diagonal to 0 to ignore self-cooccurrence
    # cooccurrence.fill_diagonal_(0)
    return cooccurrence
    
    # Get top k co-occurring pairs
    values, indices = torch.topk(cooccurrence.view(-1), k=top_k)
    
    # Convert flat indices to 2D indices
    rows, cols = np.unravel_index(indices.cpu().numpy(), cooccurrence.shape)
    
    return list(zip(rows, cols, values.cpu().numpy()))

cooccurrence_matrix = compute_cooccurrence_matrix(all_activations)

# cooccurrence_matrix[101]

# # Assuming your tensor is named 'all_activations'
# top_cooccurrences = find_cooccurring_features(all_activations)

# for i, (feat1, feat2, count) in enumerate(top_cooccurrences, 1):
#     print(f"{i}. Features {feat1} and {feat2} co-occur {count:.0f} times")

torch.Size([115548, 16384])
torch.Size([115548, 16384])


In [None]:
cooccurrence_matrix.diag()

tensor([183., 147.,  58.,  ...,   7., 310., 750.], device='cuda:0')

In [None]:
cooccurrence_matrix.shape

torch.Size([16384, 16384])

# Feature Effects

In [None]:
@torch.no_grad()
def get_feature_freqs(texts: list[str], model: HookedTransformer, sae: SAE, hook_point: str):
    all_sae_acts = torch.zeros(sae.cfg.d_sae, device=sae.W_enc.device)
    count = 0

    for text in tqdm(texts):
        _, acts = model.run_with_cache(text, names_filter=hook_point)
        acts = acts[hook_point]

        for batch in acts:
            sae_acts = sae.encode(batch)
            all_sae_acts += sae_acts.sum(dim=0)
            count += acts.shape[0]
    return all_sae_acts / count


In [None]:
def compute_feature_effects(feature, text="Tell me a story"):
    
    # generate_it(model, [(hp, partial(patch_resid, steering=poems, scale=150))], prompt=text)
    unsteered_texts = generate_it(
        model, 
        [],        
        max_new_tokens=35,
        prompt=text,
        batch_size=64,
        n_samples=1024
    )
    steered_texts = generate_it(
        model, 
        [(hp, partial(patch_resid, steering=feature, scale=150))],
        max_new_tokens=35,
        prompt=text,
        batch_size=64,
        n_samples=1024
    )
    # unsteered_texts = generate(model,
    #     hooks=[],
    #     max_new_tokens=35,
    #     prompt=text,
    #     batch_size=64,
    #     n_samples=1024,
    # )
    # steered_texts = generate(model,
    #     hooks=[(hp, partial(patch_resid, steering=feature, scale=150))],
    #     max_new_tokens=35,
    #     prompt=text,
    #     batch_size=64,
    #     n_samples=1024,
    # )
    unsteered_freqs = get_feature_freqs(unsteered_texts, model, sae, hp)
    steered_freqs = get_feature_freqs(steered_texts, model, sae, hp)
    return steered_freqs - unsteered_freqs

# All statistics

In [None]:
feature = 101 # poetry
feature_direction = sae.W_dec[feature]

## Co-occurrence matrix
cooccuring_features = cooccurrence_matrix[feature]

## Cosine
cosine_similarities = find_similar_features(feature, top_k=sae.W_dec.shape[0])

In [None]:
## Feature Effects
feature_effects = compute_feature_effects(feature_direction, text="Generate some random text about any topic")

100%|██████████| 1024/1024 [01:56<00:00,  8.82it/s]
100%|██████████| 1024/1024 [01:53<00:00,  9.03it/s]


In [None]:
print(cooccuring_features.shape)
print(cooccuring_features.topk(10).indices)
print(cooccuring_features.topk(10).values)
print(cosine_similarities.shape)
print(cosine_similarities.topk(10).indices)
print(cosine_similarities.topk(10).values)
print(feature_effects.shape)
print(feature_effects.topk(10).indices)
print(feature_effects.topk(10).values)

torch.Size([16384])
tensor([12169,  2599,  1878, 13095,  8962, 14587, 12414, 13215,  6802,   923],
       device='cuda:0')
tensor([32., 30., 21., 21., 20., 20., 18., 15., 13., 12.], device='cuda:0')
torch.Size([16384])
tensor([  101, 15057,  8975, 12145,  1178, 11722,  6698,  7868,   923,  8843],
       device='cuda:0')
tensor([1.0000, 0.2354, 0.2354, 0.2344, 0.2266, 0.2070, 0.1914, 0.1885, 0.1865,
        0.1826], device='cuda:0', dtype=torch.bfloat16)
torch.Size([16384])
tensor([ 5611, 10871,   990,  1178,  1401,  9702,  5576,  7603,  7644, 12013],
       device='cuda:0')
tensor([481.7472, 231.2720, 210.0735, 197.9990, 170.6366, 160.7101, 156.5237,
        151.3899, 146.5709, 123.4059], device='cuda:0')


In [None]:
visualize_feature(13095)

100%|██████████| 3/3 [00:00<00:00, 10.53it/s]


This implementation provides:
1. Weighted Overlap Score: Gives more weight to matches at higher ranks.
2. Rank-Biased Overlap (RBO): Compares rankings while allowing for incompleteness and giving more weight to higher ranks.
3. Normalized Discounted Cumulative Gain (nDCG): Measures the quality of ranking relative to the ideal ranking.
4. Spearman and Kendall correlations range from -1 to 1, with 1 indicating perfect positive correlation, -1 perfect negative correlation, and 0 no correlation.

In [None]:
import numpy as np
from scipy.stats import spearmanr, kendalltau
from pprint import pprint

def weighted_overlap_score(list1, list2, k=100):
    overlap = set(list1[:k]) & set(list2[:k])
    score = sum((1 / (i + 1)) for i, item in enumerate(list1[:k]) if item in overlap)
    return score / sum(1 / (i + 1) for i in range(k))

def rank_biased_overlap(list1, list2, p=0.9, k=None):
    if k is None:
        k = max(len(list1), len(list2))
    score = 0.0
    overlap = 0
    for i in range(k):
        if i < len(list1) and i < len(list2):
            overlap += int(list1[i] == list2[i])
        score += (p ** i) * (overlap / (i + 1))
    return score * (1 - p)

def ndcg(list1, list2, k=100):
    def dcg(list_):
        return np.sum(1 / np.log2(np.arange(2, min(k, len(list_)) + 2)))
    
    # Create a boolean mask for items in list1 that are also in list2[:k]
    mask = np.isin(list1, list2[:k])
    # Get the ranks of these items in list1
    ranks = np.where(mask)[0] + 1
    # Calculate DCG
    dcg_value = np.sum(1 / np.log2(ranks + 1))
    # Calculate IDCG
    idcg = dcg(np.arange(min(k, len(list1))))
    return dcg_value / idcg

def compare_rankings(ranking1, ranking2, k=100):
    r1 = np.array(ranking1)
    r2 = np.array(ranking2)
    
    wos = weighted_overlap_score(r1, r2, k)
    rbo = rank_biased_overlap(r1, r2, k=k)
    ndcg_score = ndcg(r1, r2, k)
    
    # Spearman and Kendall on full lists
    spearman, _ = spearmanr(r1, r2)
    kendall, _ = kendalltau(r1, r2)
    
    return {
        f"weighted_overlap@{k}": wos,
        f"rbo@{k}": rbo,
        f"ndcg@{k}": ndcg_score,
        "spearman": spearman,
        "kendall": kendall
    }

# Example usage
cooccurrence_top = cooccuring_features.topk(16384).indices.cpu().numpy()
cosine_top = cosine_similarities.topk(16384).indices.cpu().numpy()
effects_top = feature_effects.topk(16384).indices.cpu().numpy()

print("Cooccurrence vs Cosine:")
pprint(compare_rankings(cooccurrence_top, cosine_top))

print("\nCooccurrence vs Feature Effects:")
pprint(compare_rankings(cooccurrence_top, effects_top))

print("\nCosine vs Feature Effects:")
pprint(compare_rankings(cosine_top, effects_top))

print("Cooccurrence vs Cosine:")
pprint(compare_rankings(cooccurrence_top, cosine_top, 20))

print("\nCooccurrence vs Feature Effects:")
pprint(compare_rankings(cooccurrence_top, effects_top, 20))

print("\nCosine vs Feature Effects:")
pprint(compare_rankings(cosine_top, effects_top, 20))

Cooccurrence vs Cosine:
{'kendall': -0.0077192726454251345,
 'ndcg@100': 0.5036677635816204,
 'rbo@100': 0.0,
 'spearman': -0.01198233116981976,
 'weighted_overlap@100': 0.08428161978770628}

Cooccurrence vs Feature Effects:
{'kendall': 0.12704527940548127,
 'ndcg@100': 0.5294704992629292,
 'rbo@100': 0.0,
 'spearman': 0.1302298560780596,
 'weighted_overlap@100': 0.2947682661461735}

Cosine vs Feature Effects:
{'kendall': 0.004221339383812488,
 'ndcg@100': 0.5044086759653866,
 'rbo@100': 0.0006549556951818894,
 'spearman': 0.006286590117951959,
 'weighted_overlap@100': 0.2568755714287549}
Cooccurrence vs Cosine:
{'kendall': -0.0077192726454251345,
 'ndcg@20': 0.36283564978829835,
 'rbo@20': 0.0,
 'spearman': -0.01198233116981976,
 'weighted_overlap@20': 0.027795229652440172}

Cooccurrence vs Feature Effects:
{'kendall': 0.12704527940548127,
 'ndcg@20': 0.3486626420229472,
 'rbo@20': 0.0,
 'spearman': 0.1302298560780596,
 'weighted_overlap@20': 0.08603285368612434}

Cosine vs Feature Ef

100%|██████████| 3/3 [00:00<00:00, 10.16it/s]


In [None]:
# text = "Describe what science is"
# print("\nNext\n".join(generate_it(model, [], prompt=text)))
feature = 990 # beautiful nature

print("\n\nFeature 990, Scale 200:")
feature = 11722 # painting
print("\nNext\n".join(generate_it(model, [(hp, partial(patch_resid, steering=sae.W_dec[feature], scale=200))], prompt=text)))


print("\n\nFeature 990, Scale 300:")
print("\nNext\n".join(generate_it(model, [(hp, partial(patch_resid, steering=sae.W_dec[feature], scale=300))], prompt=text)))

print("\n\nFeature 990, Scale 400:")
print("\nNext\n".join(generate_it(model, [(hp, partial(patch_resid, steering=sae.W_dec[feature], scale=400))], prompt=text)))

# print("\n\nFeature 7603:")
# feature = 7603 # sunsets
# print("\nNext\n".join(generate_it(model, [(hp, partial(patch_resid, steering=sae.W_dec[feature], scale=300))], prompt=text)))




Feature 990, Scale 200:
<start_of_turn>user
Tell me a story<end_of_turn>
<start_of_turn>model
The old woman sat on a wooden bench beneath a sprawling willow, its weeping branches draping down around her like a curtain. The air was still, thick with the smell of
Next
<start_of_turn>user
Tell me a story<end_of_turn>
<start_of_turn>model
The old woman sat hunched over a crackling fireplace, the flames dancing in the warm glow. Her gnarled hands, thick with wrinkles, depicted a scene of bustling market life
Next
<start_of_turn>user
Tell me a story<end_of_turn>
<start_of_turn>model
The old woman sat on her porch, her wrinkles mirroring the texture of her aged hands. Rain lashed against the canvas, capturing the fury of the storm brewing outside. Her eyes
Next
<start_of_turn>user
Tell me a story<end_of_turn>
<start_of_turn>model
The rain fell with a ferocity unusual for the typically sunny skies of the Parisian cafe scene. A lone figure, perched beneath a canvas awning depicting a sultry T