# American Pizza Project — Lloom Theme Induction Experimentation...cloudless(using locally hosted AI tools)!

This notebook explores how Lloom can be applied to the APP using locally hosted AI tools:
1) Setup imports, establish API key, and load dataset
2) Preprocess / slice dataset -> Experiment filtering input data to include only particular questions!
3) Induce themes with LLooM  -> Experiment inputting various seed theme terms!
4) Review results, create data visualizations


In [None]:
# Imports
import os, asyncio, tiktoken
import pandas as pd
import text_lloom.workbench as wb
from text_lloom.llm import Model, EmbedModel
from openai import OpenAI  # client lib ONLY to talk to Ollama's OpenAI-style API

# Point to local Ollama (no cloud)
os.environ["OPENAI_BASE_URL"] = "http://localhost:11434/v1"
os.environ["OPENAI_API_KEY"]  = "ollama"  # any non-empty string

# Load data
data_path = "/Users/ltraum/Documents/GitHub/AmericanPizzaProject/data/pizza_interviews.xlsx"
df = pd.read_excel(data_path)

demo_cols = [
    "participant_id",
    "age",
    "city_of_residence",
    "state_of_residence",
    "region_of_residence",
    "income",
    "pizza_consumption",
    "food_restrictions"
]

# All response columns
response_cols = ["q1_response", "q2_response", "q3_response", "q4_response", "q5_response"]

# Add a column that isolates text of interest, can toggle
  
df["all_responses"] = df[response_cols].apply(
    lambda row: " ".join([str(r) for r in row if pd.notnull(r) and r.strip() != ""]),
    axis=1
)
q_all_df = df[demo_cols + ["all_responses"]].rename(columns={"all_responses": "text"}) #toggle with responses included!
q_4_df = df[demo_cols + ["q4_response"]].rename(columns={"q4_response": "text"})

# --- Tokenizer & counter (we choose it; LLooM won't guess)
enc = tiktoken.get_encoding("cl100k_base")
def count_tokens(text:str) -> int:
    return len(enc.encode(text or ""))

# --- LLM setup: talk to Ollama via OpenAI-compatible client
def setup_llm_fn(_api_key_unused):
    return OpenAI(api_key="ollama", base_url="http://localhost:11434/v1")

# --- How to call the LLM (Gemma3:27b) for LLooM
async def call_llm_fn(model, prompt: str):
    resp = model.client.chat.completions.create(
        model="gemma3:27b",                     # your local model tag
        messages=[{"role": "user", "content": prompt}],
        temperature=model.args.get("temperature", 0.2),
        max_tokens=model.args.get("max_tokens", 1200),
    )
    text = resp.choices[0].message.content
    # Ollama doesn't return token usage; return a placeholder
    return text, (0, 0)

# --- Build LLooM with fully custom models (no tokenizer auto-detect anywhere)
l = wb.lloom(
    df=q_4_df,
    id_col="participant_id",
    text_col="text",

    # Generation models (Gemma3:27b local)
    distill_model=Model(
        setup_fn=setup_llm_fn, fn=call_llm_fn,
        name="gemma3:27b",
        context_window=131_072,     # from `ollama show gemma3:27b`
        cost=(0, 0),                # local = free
        rate_limit=(5, 10),
        count_tokens_fn=count_tokens
    ),
    synth_model=Model(
        setup_fn=setup_llm_fn, fn=call_llm_fn,
        name="gemma3:27b",
        context_window=131_072,
        cost=(0, 0),
        rate_limit=(5, 10),
        count_tokens_fn=count_tokens
    ),
    score_model=Model(
        setup_fn=setup_llm_fn, fn=call_llm_fn,
        name="gemma3:27b",
        context_window=131_072,
        cost=(0, 0),
        rate_limit=(5, 10),
        count_tokens_fn=count_tokens
    ),

    # Clustering embeddings via Ollama's embeddings endpoint
    # (If you already pulled `nomic-embed-text`, you can also keep OpenAIEmbedModel.
    #  Here I show the custom path to stay consistent and avoid any hidden defaults.)
    cluster_model=EmbedModel(
        setup_fn=lambda _k: OpenAI(api_key="ollama", base_url="http://localhost:11434/v1"),
        fn=lambda model, texts: (
            model.client.embeddings.create(model="nomic-embed-text", input=texts).data[0].embedding
            if isinstance(texts, str)
            else [e.embedding for e in model.client.embeddings.create(model="nomic-embed-text", input=texts).data],
            (0, 0)
        ),
        name="nomic-embed-text",
        cost=0,
        batch_size=256
    ),
)

# --- Run it (async)
async def extract_lloom_concepts(l, max_concepts=5, seed=""):
    score_df = await l.gen_auto(max_concepts=max_concepts, seed=seed, debug=False)
    export_df = l.export_df()
    return score_df, export_df



In [None]:
score_df, export_df = await extract_lloom_concepts(l, max_concepts=5, seed="family")

Cost estimates not available for distill model `gemma3:27b`
Cost estimates not available for cluster model `nomic-embed-text`
Cost estimates not available for synth model `gemma3:27b`


[48;5;117mDistill-filter[0m
✅ Done    


[48;5;117mDistill-summarize[0m
✅ Done    


[48;5;117mCluster[0m
✅ Done    


[48;5;117mSynthesize[0m
⠹ Loading 



✅ Done    
✅ Done with concept generation!


[1mActive concepts[0m (n=5):
- [1mKid-Focused Fun[0m: Does the text highlight activities or experiences that are enjoyable for children within a family context?
- [1mGenerational Knowledge[0m: Does the text describe information, skills, or beliefs passed down from older to younger family members?
- [1mSimple Togetherness[0m: Does the text suggest that uncomplicated experiences foster stronger family bonds?
- [1mUnique Methods[0m: Does the text describe a special or unusual way of making or enjoying pizza that could be a family talking point?
- [1mFamily Dynamics[0m: Does the text describe interactions or challenges within a family?
Cost estimates not available for score model `gemma3:27b`
  0%|          | 0/5 [00:00<?, ?it/s]ERROR json_load on: ```json
{
    "pattern_results": [
        {
            "example_id": "49",
            "rationale": "The text focuses on personal eating habits and doesn't mention any activities or expe

In [None]:
# main outputs tables
print("Score DataFrame:")
display(score_df.head(5))
print("Exported Concepts/Themes:")
display(export_df.head(5))

Score DataFrame:


Unnamed: 0,doc_id,text,concept_id,concept_name,concept_prompt,score,rationale,highlight,concept_seed
0,2,"Pizza boxes are part of the ritual, especially...",bddb223c-a4f6-411b-94ee-ab4de6232a74,Kid-Focused Fun,Does the text highlight activities or experien...,1.0,The text explicitly mentions activities enjoye...,Kids love eating from the box - feels special ...,family
1,3,"Call me old-fashioned, but I prefer actual din...",bddb223c-a4f6-411b-94ee-ab4de6232a74,Kid-Focused Fun,Does the text highlight activities or experien...,0.0,The text focuses on personal preference regard...,"Call me old-fashioned, but I prefer actual din...",family
2,4,Boxes are essential for sharing and keeping th...,bddb223c-a4f6-411b-94ee-ab4de6232a74,Kid-Focused Fun,Does the text highlight activities or experien...,0.5,The text describes casual sharing and experien...,Boxes are essential for sharing and keeping th...,family
3,5,kids think eating from boxes is fun.\nPizza is...,bddb223c-a4f6-411b-94ee-ab4de6232a74,Kid-Focused Fun,Does the text highlight activities or experien...,1.0,The text directly mentions children finding en...,kids think eating from boxes is fun.,family
4,9,"I remember when pizza came wrapped in paper, n...",bddb223c-a4f6-411b-94ee-ab4de6232a74,Kid-Focused Fun,Does the text highlight activities or experien...,0.75,The text describes a personal experience with ...,I always eat pizza with my hands - that's how ...,family


Exported Concepts/Themes:


Unnamed: 0,concept,criteria,summary,rep_examples,prevalence,n_matches,highlights
0,Family Dynamics,Does the text describe interactions or challen...,"We navigate small, loving disagreements—like s...","[Good manners matter, even with casual food.\n...",0.06,3,[My kids eat straight from the box which drive...
1,Generational Knowledge,"Does the text describe information, skills, or...","We pass down simple, cherished traditions—like...",[I learned 60 years ago\nthat's how I learned ...,0.02,1,[I teach my kids to eat pizza with their hands...
2,Kid-Focused Fun,Does the text highlight activities or experien...,Kids find joy in simple things—like eating dir...,[kids think eating from boxes is fun.\nPizza i...,0.06,3,[My wife sometimes puts it on plates for the k...
3,Simple Togetherness,Does the text suggest that uncomplicated exper...,"Keep things easy, embrace familiar routines, a...","[Boxes are fine, whatever keeps it warm.\nI ea...",0.06,3,[Don't see the point in complicating something...
4,Unique Methods,Does the text describe a special or unusual wa...,We embrace unconventional approaches—eating di...,[I eat with my hands like everyone else.\nthou...,0.06,3,[My kids eat straight from the box which drive...


In [None]:
# report per-theme results for future app
for i, row in export_df.iterrows():
    print(f"\nTheme {i+1}: {row['concept']}")
    print(f"Criteria: {row['criteria']}")
    print(f"Summary: {row['summary']}")
    print(f"Prevalence: {row['prevalence']*100:.1f}% of participants")
    print("Representative Examples:")
    for ex in row['rep_examples']:
        print("-", ex)
    print("-" * 40)

# Save results to CSV for future dashboard use
export_df.to_csv("lloom_themes_summary.csv", index=False)
score_df.to_csv("lloom_theme_scores.csv", index=False)


Theme 1: Family Dynamics
Criteria: Does the text describe interactions or challenges within a family?
Summary: We navigate small, loving disagreements—like snack presentation—creating family rituals around simple joys, such as pizza night.
Prevalence: 6.0% of participants
Representative Examples:
- Good manners matter, even with casual food.
I always transfer pizza to proper plates - eating from boxes feels messy and informal.
I prefer sitting at the table with real dinnerware.
- My kids eat straight from the box which drives my wife crazy
My wife crazy
but I think it's part of pizza's casual appeal
----------------------------------------

Theme 2: Generational Knowledge
Criteria: Does the text describe information, skills, or beliefs passed down from older to younger family members?
Summary: We pass down simple, cherished traditions—like how to enjoy pizza—ensuring valuable knowledge and customs continue with the next generation.
Prevalence: 2.0% of participants
Representative Examp

In [None]:
#l.vis()
l.vis(slice_col="region_of_residence")

MatrixWidget(data='[{"id":"All","value":3,"example":"All","_my_score":0,"concept":"Kid-Focused Fun","n":3},{"i…

### optional demograohic data filtering step

In [None]:
# Preprocess / slice data
# This filtering step needs work

def filter_demographics(
    df, regions=None, ages=None, income=None, diet=None
):
    df_filtered = df.copy()
    if regions:
        df_filtered = df_filtered[df_filtered["region_of_residence"].isin(regions)]
    if ages:
        df_filtered = df_filtered[df_filtered["age"].isin(ages)]
    if income:
        df_filtered = df_filtered[df_filtered["income"].isin(income)]
    if diet:
        df_filtered = df_filtered[df_filtered["food_restrictions"].isin(diet)]
    return df_filtered.reset_index(drop=True)

# Example: filter to just Northeast region
# filtered = filter_demographics(df, regions=['Northeast'], ages=['18-40'])
# filtered.head()