In [None]:
import pandas as pd
from IPython.display import display, HTML
from langchain_openai import ChatOpenAI
from tqdm.auto import tqdm
import dotenv
import os

dotenv.load_dotenv()

In [65]:
llm = ChatOpenAI(
    base_url=os.getenv("OPENAI_BASE_URL"),
    api_key=os.getenv("OPENAI_API_KEY"),
    model="google/gemini-2.5-flash", 
    temperature=0.0,
)

def count_tokens(text_list: list[str]) -> int:
    total_tokens = 0
    for text in text_list:
        total_tokens += llm.get_num_tokens(text)
    return total_tokens

In [66]:
df = pd.read_csv("../result/viz_df.csv")
df["cluster_label"] = df["cluster_label"].astype(str)
clusters = df.groupby("cluster_label")

In [67]:
df.head(4)

Unnamed: 0,id,slang,description,umap_x,umap_y,cluster,cluster_prob,cluster_label
0,de7bed5e-22b7-516e-abdd-b6702d14f2c2,W,Shorthand for win,-1.586983,6.195272,29,1.0,Cluster 29
1,d0f78763-dafa-50e6-98cf-80e37b9ff0ac,L,Shorthand for loss/losing,5.152213,6.530412,7,0.534892,Cluster 7
2,acc7c7d5-9aaa-5d7d-9176-c8963f4ed424,L+ratio,Response to a comment or action on the interne...,5.337737,6.624958,7,0.494982,Cluster 7
3,99ca01c1-e02d-5c30-93f2-843624ab74bd,Dank,excellent or of very high quality,2.778757,6.843239,26,0.970819,Cluster 26


In [68]:
cluster_prompt = """
# title: Thematic Analysis of Slang Terms
## Your Task:
You will analyze slang terms and their descriptions from a single semantic cluster.

Use internal step-by-step reasoning (Chain of Thought) but DO NOT reveal it.
Output only the final concise summary.

Provide the following sections:

1. Theme Title (3–5 words)
2. Theme Description (1–2 sentences)
3. Usage Contexts (1 sentence)
4. Linguistic Traits (1 sentence)
5. Lexical Analysis (word formation, semantic shift, register — 1–2 sentences)
6. Distinctiveness (what makes this cluster unique — 1 sentence)

### Example:
<Example Input>
slay: used to praise someone for doing something impressively
ate: means someone performed extremely well
serve: used to express someone delivering strong style or attitude
mother: used online to praise a female celebrity or creator
queen: praise term for a powerful or admirable woman
<Example Output/>

<Example Output>
Theme Title: Performance & Empowerment Praise
Theme Description: This cluster includes expressive praise terms used to celebrate impressive actions or strong personal style. The tone is highly positive and tied to digital culture.
Usage Contexts: Common in social media comments, fan communities, and casual praise among peers.
Linguistic Traits: Hyperbolic tone, short expressive words, and strong positive emotion.
Lexical Analysis: Includes monosyllabic or simple forms (“slay,” “ate”), metaphorical semantic shifts (“mother”), and identity-marking honorifics (“queen”). Many terms come from AAVE and LGBTQ+ ballroom culture.
Distinctiveness: Focuses on amplified celebration and empowerment tied to performance and style.
<Example Output/>

Follow the same structure for the cluster below.

## Cluster Label:
{cluster_label}

## Cluster Text:
{cluster_text}
"""


### 0) Test the judgement model

In [69]:
# test

cluster_summaries = {}
label = "Cluster 0"
group = clusters.get_group(label)

combined_text = "\n".join([
    f"{row['slang']}: {row['description']}"
    for _, row in group.iterrows()
])

summary = llm.invoke(
    cluster_prompt.format(
        cluster_label=label,
        cluster_text=combined_text
    )
)

cluster_summaries[label] = summary


In [70]:
from pprint import pprint

print("Input:")
pprint(combined_text)

print("\nSummary:")
pprint(summary.content)

Input:
('ILY: I love you\n'
 'ILU: ILU: I Love You\n'
 'ILY :  I love you\n'
 '<3: sideways heart\n'
 '</3: broken heart\n'
 '<3333: a bigger heart\n'
 '143: I love you\n'
 '1432: I love you too\n'
 '459: I love you \n'
 '831: I love you\n'
 'AML: All my love\n'
 'ICFILWU: I could fall in love with you\n'
 'ILU: I love you\n'
 'ILUM: I love you man\n'
 'ILYSM: I love you so much\n'
 'ILY: I love you\n'
 'ImL:  I love you\n'
 'ISLY: I still love you\n'
 'IWALU: I will always love you\n'
 'IYQ: I like you\n'
 'LY: Love ya\n'
 'LYLAS: Love you like a sis\n'
 'LYLC: Love you like crazy\n'
 'LYSM: Love you so much')

Summary:
('Theme Title: Abbreviated Expressions of Love\n'
 'Theme Description: This cluster encompasses various shorthand forms, '
 'acronyms, and numerical codes used to convey feelings of love, affection, '
 'and emotional connection. These terms facilitate quick and intimate '
 'communication of romantic or platonic love.\n'
 'Usage Contexts: Primarily used in informal digi

### 1) Run thematic analysis on each cluster, gather summarize.

In [71]:
cluster_summaries = {}

for label, group in tqdm(clusters, desc="Processing Clusters"):
    # Combine slang + description for the cluster
    combined_text = "\n".join([
        f"{row['slang']}: {row['description']}"
        for _, row in group.iterrows()
    ])

    summary = llm.invoke(
        cluster_prompt.format(
            cluster_label=label,
            cluster_text=combined_text
        )
    )

    cluster_summaries[label] = summary


Processing Clusters:   0%|          | 0/31 [00:00<?, ?it/s]

In [72]:
all_summary_text = "\n\n".join(
    [f"{label}:\n{summary}" for label, summary in cluster_summaries.items()]
)

Show cluster summarize texts

In [73]:
df_summaries = pd.DataFrame([
    {"cluster_label": label, "summary": summary.content}  # first 150 chars
    for label, summary in cluster_summaries.items()
])

In [76]:
df_summaries = df_summaries.sort_values("cluster_label").reset_index(drop=True)

display(HTML(df_summaries.to_html().replace(
    '<table', '<table style="font-size:12px; font-family:Arial; line-height:1.5;"'
)))

Unnamed: 0,cluster_label,summary
0,Cluster 0,"Theme Title: Affectionate Digital Shorthand\nTheme Description: This cluster comprises various abbreviations and symbols used to express love and affection in digital communication. It reflects a desire for quick, intimate expressions of care.\nUsage Contexts: Primarily found in text messages, instant messaging, and social media comments between close individuals.\nLinguistic Traits: Predominantly initialisms, numerical codes, and emoticons, often conveying strong positive emotion.\nLexical Analysis: Features extensive use of acronyms and initialisms (ILY, LYSM), numerical substitutions (143), and symbolic representations (<3). These forms prioritize brevity and informal register.\nDistinctiveness: Characterized by its high density of direct ""I love you"" variations and numerical/symbolic codes for expressing affection."
1,Cluster 1,"Theme Title: Exclamations & Online Shorthand\nTheme Description: This cluster comprises acronyms and initialisms primarily used for expressing surprise, frustration, or affirmation, often in digital communication. It reflects a need for quick, informal expression.\nUsage Contexts: Predominantly found in online chats, text messages, and social media.\nLinguistic Traits: Heavy reliance on initialisms and acronyms, often representing common exclamatory phrases.\nLexical Analysis: Word formation is dominated by acronyms and initialisms, with some phonetic spellings (""OHHEMMGEE"") and abbreviations of common phrases (""On god""). Semantic shifts are minimal, largely retaining the original phrase's meaning.\nDistinctiveness: Characterized by its high density of initialisms and abbreviations for expressing immediate emotional reactions or brief status updates."
2,Cluster 10,"Theme Title: Acronyms, Initialisms & Digital Shorthand\nTheme Description: This cluster primarily consists of abbreviations and initialisms used for rapid communication, often in digital contexts. It also includes a few full slang terms.\nUsage Contexts: Predominantly found in online chats, text messages, gaming, and social media.\nLinguistic Traits: Heavy reliance on acronyms and initialisms, often phonetic spellings, and some portmanteaus.\nLexical Analysis: Word formation is dominated by initialisms and acronyms, with some phonetic spellings (""prolly,"" ""pwn"") and semantic shifts (""pick me girl""). The register is highly informal and context-dependent.\nDistinctiveness: Characterized by its extensive use of abbreviations for efficiency, particularly in digital communication, and a focus on parental surveillance."
3,Cluster 11,"Theme Title: Abbreviated Online Communication\nTheme Description: This cluster comprises acronyms and initialisms predominantly used for quick communication in digital environments. They facilitate brevity and convey specific questions, statements, or emotional responses.\nUsage Contexts: Primarily found in online chat, forums, gaming, and text messaging.\nLinguistic Traits: Heavy reliance on initialisms and phonetic abbreviations, often representing full phrases or concepts.\nLexical Analysis: Dominated by acronyms and initialisms (FAQ, QFT), with some phonetic spellings (QIK, Q_Q) and borrowings (QPSA?). Semantic shifts are minimal, focusing on abbreviation for efficiency.\nDistinctiveness: Characterized by its high density of 'Q' initialisms and its primary function of streamlining digital dialogue."
4,Cluster 12,"Theme Title: Acronyms and Parental References\nTheme Description: This cluster primarily consists of acronyms and initialisms, alongside terms related to parental figures or family roles. It reflects a mix of digital shorthand and familial associations.\nUsage Contexts: Predominantly found in informal digital communication, text messages, and online forums.\nLinguistic Traits: High prevalence of initialisms and abbreviations, often representing common phrases or roles.\nLexical Analysis: Word formation relies heavily on acronyms and initialisms (e.g., ""MBS,"" ""MYOB""), with some terms being direct abbreviations of common phrases (""MU,"" ""MUSM""). Semantic shifts are minimal, with most terms retaining their literal or abbreviated meaning.\nDistinctiveness: The cluster is characterized by its dual focus on brevity through acronyms and the recurring theme of parental or familial relationships."
5,Cluster 13,"Theme Title: Abbreviated Personal Statements\nTheme Description: This cluster comprises initialisms and acronyms used to express personal opinions, observations, or self-identification in a concise manner. They facilitate quick communication, especially in digital contexts.\nUsage Contexts: Predominantly found in online forums, chat rooms, text messages, and social media.\nLinguistic Traits: High frequency of initialisms, often starting with ""I"" for first-person perspective.\nLexical Analysis: Primarily formed through initialism (e.g., IMO, IC) and some phonetic abbreviations (Ion). Many terms are fixed phrases reduced to their first letters, indicating a shift towards efficiency in digital communication.\nDistinctiveness: Characterized by its focus on personal perspective and identity, communicated through highly condensed, often standardized, abbreviations."
6,Cluster 14,"Theme Title: Initialisms & Negative Stereotypes\nTheme Description: This cluster primarily consists of initialisms and abbreviations for common phrases, alongside a prominent negative stereotype. It reflects a mix of informal communication shortcuts and a specific cultural critique.\nUsage Contexts: Predominantly found in informal digital communication, text messages, and online forums.\nLinguistic Traits: Heavy reliance on initialisms, phonetic spellings, and abbreviated forms for brevity.\nLexical Analysis: Features numerous acronyms and initialisms (KISS, KYS, KMA), phonetic spellings (KEWL, MKAY), and a significant semantic shift for ""Karen"" to denote a specific behavioral stereotype. Register is overwhelmingly informal.\nDistinctiveness: The unique blend of practical communication shortcuts with a highly charged, culturally specific pejorative term."
7,Cluster 15,"Theme Title: Acronyms of Indifference & Ignorance\nTheme Description: This cluster comprises acronyms and initialisms primarily expressing a lack of knowledge, understanding, or concern. They serve as quick, informal ways to convey disinterest or uncertainty.\nUsage Contexts: Predominantly used in informal digital communication like texting, instant messaging, and online forums.\nLinguistic Traits: Characterized by initialisms and acronyms, often representing common phrases, and conveying a dismissive or uncertain tone.\nLexical Analysis: Formed almost exclusively through initialism (e.g., IDC, IDK) or phonetic abbreviation (IDUNNO), these terms are highly informal and belong to a digital register. Semantic shifts are minimal, as they directly represent their source phrases.\nDistinctiveness: The overwhelming reliance on initialisms to convey disinterest or lack of knowledge, reflecting a need for rapid, concise communication in digital contexts."
8,Cluster 16,"Theme Title: Initialism & Abbreviation Communication\nTheme Description: This cluster comprises a wide array of initialisms and abbreviations, primarily used for rapid communication in digital and gaming contexts. They serve to condense common phrases, roles, or concepts into shorter forms.\nUsage Contexts: Predominantly found in online chats, text messages, forums, and gaming environments.\nLinguistic Traits: Characterized by heavy use of acronyms, initialisms, and phonetic abbreviations, often omitting vowels or entire words.\nLexical Analysis: Word formation relies almost exclusively on initialisms (DTR, DAE) and phonetic shortening (d00d, DNR). Semantic shifts are minimal, as terms largely retain their original meaning in abbreviated form. The register is highly informal and context-dependent.\nDistinctiveness: The sheer volume and diversity of ""D"" initialisms highlight a pervasive trend of linguistic compression across various digital subcultures."
9,Cluster 17,"Theme Title: Abbreviated Digital Communication\nTheme Description: This cluster comprises a wide array of acronyms and initialisms primarily used for rapid, informal communication in digital environments. They serve to convey common phrases, expressions, and acknowledgments efficiently.\nUsage Contexts: Predominantly found in text messages, online chats, social media, and other forms of asynchronous digital communication.\nLinguistic Traits: High prevalence of initialisms and numerical homophones, often sacrificing grammatical completeness for brevity.\nLexical Analysis: Word formation relies heavily on acronyms and initialisms, with some instances of phonetic abbreviation (e.g., ""10Q"" for ""thank you"") and semantic compression. The register is exclusively informal.\nDistinctiveness: Characterized by its extreme brevity and reliance on shared digital literacy for comprehension, reflecting a need for speed and conciseness in online interactions."


In [None]:
# count total output tokens
print(f"Total output tokens from cluster summaries: {count_tokens(list(df_summaries['summary']))}")

Total output tokens from cluster summaries: 5502


### 2) Run thematic analysis on all cluster, gain overall analysis.

In [82]:
llm_candidates = [
    ChatOpenAI(
        base_url=os.getenv("OPENAI_BASE_URL"),
        api_key=os.getenv("OPENAI_API_KEY"),
        model=model_name,
        temperature=0.0,
    )
    for model_name in [
        "google/gemini-2.5-flash",
        "x-ai/grok-4.1-fast:free",
        "openai/gpt-4o-mini",
    ]
]

In [90]:
final_prompt = """
You are given concise summaries of semantic clusters from a slang dataset.

Use internal reasoning (Chain of Thought) but DO NOT reveal it. 
Only output your final analysis.

Your task is to **summarize each cluster** by giving:

1. A **unique theme name** for the cluster (3–5 words)
2. A **short description** (1–2 sentences) explaining the cluster's semantic focus and social/cultural context

Do NOT create new overarching themes or merge clusters.  
Keep the output structured, concise, and readable.  

Format example:

Cluster 0: Theme Name: <theme title>
Description: <short description>

Cluster 1: Theme Name: <theme title>
Description: <short description>

...

### Cluster Summaries:
{all_summaries}
"""


In [91]:
final_thematic_analysis = {}

for llm in tqdm(llm_candidates):
    try:
        final_thematic_analysis[llm.model_name] = llm.invoke(
            final_prompt.format(
                all_summaries=all_summary_text
            )
        )
        print(f"Model {llm.model_name} succeeded.")

    except Exception as e:
        print(f"Model {llm.model_name} failed with error: {e}")
        continue

  0%|          | 0/3 [00:00<?, ?it/s]

Model google/gemini-2.5-flash succeeded.
Model x-ai/grok-4.1-fast:free succeeded.
Model openai/gpt-4o-mini succeeded.


In [92]:
df_thematic = pd.DataFrame([
    {"model_name": model, "thematic_analysis": text}
    for model, text in final_thematic_analysis.items()
])

display(HTML(df_thematic.to_html().replace(
    '<table', '<table style="font-size:12px; font-family:Arial; line-height:1.2;"'
)))

Unnamed: 0,model_name,thematic_analysis
0,google/gemini-2.5-flash,"content='Cluster 0: Theme Name: Affectionate Digital Shorthand\nDescription: This cluster comprises abbreviations and symbols used to express love and affection in digital communication, reflecting a desire for quick, intimate expressions of care.\n\nCluster 1: Theme Name: Exclamations & Online Shorthand\nDescription: This cluster comprises acronyms and initialisms primarily used for expressing surprise, frustration, or affirmation in digital communication, reflecting a need for quick, informal expression.\n\nCluster 2: Theme Name: Abbreviated Digital Communication\nDescription: This cluster comprises a wide array of abbreviations and acronyms primarily used for quick, informal communication in digital environments, conveying various states, actions, and conversational cues.\n\nCluster 3: Theme Name: Keyboard-Centric Status Updates\nDescription: This cluster comprises acronyms and abbreviations indicating a user\'s presence, absence, or activity status relative to their computer, serving as quick communication cues in digital environments.\n\nCluster 4: Theme Name: Acronyms and Abbreviations\nDescription: This cluster primarily consists of acronyms and abbreviations used for brevity in digital communication, alongside a few slang terms, facilitating quick information exchange across various online platforms.\n\nCluster 5: Theme Name: Initialism & Abbreviation Communication\nDescription: This cluster comprises a wide array of initialisms and abbreviations, predominantly used for quick communication in digital or informal contexts, often conveying common phrases, reactions, or status updates.\n\nCluster 6: Theme Name: Acronyms and Abbreviations\nDescription: This cluster primarily consists of acronyms and abbreviations used for brevity in digital communication, alongside a few full phrases, serving to quickly convey common greetings, game-related terms, or general descriptors.\n\nCluster 7: Theme Name: Digital Communication Shorthand\nDescription: This cluster comprises abbreviations and acronyms primarily used for expressing laughter, negative reactions, or common phrases in online and text-based communication, reflecting a need for conciseness and rapid expression.\n\nCluster 8: Theme Name: Acronyms for Daily Communication\nDescription: This cluster comprises acronyms and initialisms primarily used to denote temporal markers, status updates, or common daily occurrences, serving as shorthand for frequently used phrases.\n\nCluster 9: Theme Name: Internet & Abbreviated Communication\nDescription: This cluster encompasses terms related to online personas, digital interactions, and highly abbreviated communication, reflecting the rapid, informal nature of internet discourse.\n\nCluster 10: Theme Name: Acronyms, Initialisms & Digital Shorthand\nDescription: This cluster primarily consists of abbreviations and initialisms used for rapid communication, often in digital contexts, and also includes a few full slang terms.\n\nCluster 11: Theme Name: Abbreviated Online Communication\nDescription: This cluster comprises acronyms and initialisms predominantly used for quick communication in digital environments, facilitating brevity and conveying specific questions, statements, or emotional responses.\n\nCluster 12: Theme Name: Acronyms and Parental References\nDescription: This cluster primarily consists of acronyms and initialisms, alongside terms related to parental figures or family roles, reflecting a mix of digital shorthand and familial associations.\n\nCluster 13: Theme Name: Abbreviated Personal Statements\nDescription: This cluster comprises initialisms and acronyms used to express personal opinions, observations, or self-identification in a concise manner, facilitating quick communication, especially in digital contexts.\n\nCluster 14: Theme Name: Initialisms & Negative Stereotypes\nDescription: This cluster primarily consists of initialisms and abbreviations for common phrases, alongside a prominent negative stereotype, reflecting a mix of informal communication shortcuts and a specific cultural critique.\n\nCluster 15: Theme Name: Acronyms of Indifference & Ignorance\nDescription: This cluster comprises acronyms and initialisms primarily expressing a lack of knowledge, understanding, or concern, serving as quick, informal ways to convey disinterest or uncertainty.\n\nCluster 16: Theme Name: Initialism & Abbreviation Communication\nDescription: This cluster comprises a wide array of initialisms and abbreviations, primarily used for rapid communication in digital and gaming contexts, serving to condense common phrases, roles, or concepts.\n\nCluster 17: Theme Name: Abbreviated Digital Communication\nDescription: This cluster comprises a wide array of acronyms and initialisms primarily used for rapid, informal communication in digital environments, serving to convey common phrases, expressions, and acknowledgments efficiently.\n\nCluster 18: Theme Name: Digital Farewell & Departure\nDescription: This cluster comprises abbreviations and phonetic spellings used to signify departure or a temporary farewell in digital communication, with terms primarily informal and aiming for brevity.\n\nCluster 19: Theme Name: Alphanumeric & Abbreviated Communication\nDescription: This cluster features highly abbreviated terms, often employing numbers or single letters as phonetic or symbolic stand-ins, primarily for informal digital communication where brevity was prioritized.\n\nCluster 20: Theme Name: Digital Shorthand & Gaming Jargon\nDescription: This cluster comprises a mix of internet acronyms, initialisms, and gaming-specific terms, primarily used for quick communication and to describe player types or game mechanics, reflecting a need for brevity and specialized language.\n\nCluster 21: Theme Name: Diverse Initialisms and Modern Slang\nDescription: This cluster encompasses a wide array of initialisms, acronyms, and contemporary slang terms, often characterized by their brevity and specific usage contexts, reflecting both digital communication shortcuts and evolving social lexicon.\n\nCluster 22: Theme Name: Digital Communication Acronyms\nDescription: This cluster comprises acronyms and initialisms primarily used for quick communication in digital environments, facilitating brevity and efficiency in online interactions.\n\nCluster 23: Theme Name: Acronyms, Abbreviations & Exclamations\nDescription: This cluster primarily consists of abbreviated forms and acronyms used for quick communication, alongside a few expressive exclamations, reflecting a need for brevity and informal interaction.\n\nCluster 24: Theme Name: Abbreviated Conversational Fillers\nDescription: This cluster comprises numerous abbreviations and initialisms primarily used to convey quick, informal interjections or clarifications in digital communication, often serving as conversational placeholders or mood indicators.\n\nCluster 25: Theme Name: Abbreviated Digital Communication\nDescription: This cluster comprises a wide array of acronyms, initialisms, and shortened forms primarily used for quick, informal communication in digital contexts, with many terms expressing reactions, providing information, or denoting social relationships.\n\nCluster 26: Theme Name: Contemporary Digital Vernacular\nDescription: This cluster encompasses a wide range of modern slang terms reflecting current social dynamics, online interactions, and personal expressions, often with a humorous or ironic undertone.\n\nCluster 27: Theme Name: ""You"" and Related Abbreviations\nDescription: This cluster comprises abbreviations and acronyms predominantly centered around the pronoun ""you,"" often used in direct address or to inquire about the recipient, facilitating quick communication.\n\nCluster 28: Theme Name: Texting Abbreviations for Questions\nDescription: This cluster comprises common initialisms and acronyms used in digital communication to quickly ask basic conversational questions, serving as informal greetings or information requests.\n\nCluster 29: Theme Name: Abbreviated Digital Communication\nDescription: This cluster comprises a wide array of initialisms, acronyms, and phonetic spellings predominantly used for rapid communication in digital environments, reflecting a drive for brevity and efficiency in online interactions.\n\nNoise: Theme Name: Digital Communication & Cultural Commentary\nDescription: This cluster encompasses terms used for rapid digital communication, expressing strong emotions, and offering social or cultural commentary, reflecting the dynamic and often informal nature of online interaction.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 1511, 'prompt_tokens': 15087, 'total_tokens': 16598, 'completion_tokens_details': {'accepted_prediction_tokens': None, 'audio_tokens': None, 'reasoning_tokens': 0, 'rejected_prediction_tokens': None, 'image_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'google/gemini-2.5-flash', 'system_fingerprint': None, 'id': 'gen-1764442066-rTsXrzsvfxugF9SVPElw', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--a7bf6dff-869d-499b-a2d6-c75632b311b1-0' usage_metadata={'input_tokens': 15087, 'output_tokens': 1511, 'total_tokens': 16598, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}}"
1,x-ai/grok-4.1-fast:free,"content='Cluster 0: Theme Name: Loving Text Abbreviations\nDescription: Short forms like ILY and 143 express affection in digital messages, fostering intimate bonds in casual online interactions among friends and partners.\n\nCluster 1: Theme Name: Emotional Exclamation Shorthand\nDescription: Acronyms for surprise or affirmation in chats reflect fast-paced digital exchanges, common in social media for quick emotional reactions.\n\nCluster 2: Theme Name: O-Prefixed Chat Shortcuts\nDescription: Brevity-focused abbreviations starting with O denote actions or hugs in online forums, emphasizing efficiency in informal digital conversations.\n\nCluster 3: Theme Name: Keyboard Status Indicators\nDescription: Acronyms like AFK signal user availability near computers, rooted in gaming and chat room culture for real-time coordination.\n\nCluster 4: Theme Name: General Digital Acronyms\nDescription: Varied abbreviations shorten phrases for online brevity, adapting to social media and forums in informal youth communication.\n\nCluster 5: Theme Name: G-Initialism Chat Tools\nDescription: Initialisms like G2G and GOAT enable rapid status updates in gaming and texting, highlighting informal digital efficiency.\n\nCluster 6: Theme Name: Gaming Greeting Abbreviations\nDescription: Short forms for hellos and good luck in online games promote concise well-wishes within competitive digital communities.\n\nCluster 7: Theme Name: Laughter Negative Shorthand\nDescription: Acronyms for laughs like LOL and negatives in texts capture emotional quick-fires, central to humorous internet banter.\n\nCluster 8: Theme Name: Daily Routine Acronyms\nDescription: Initialisms like OOTD mark time and status in social media, reflecting lifestyle sharing in casual online posts.\n\nCluster 9: Theme Name: Easy Online Personas\nDescription: Abbreviations and terms like e-boy describe digital identities, thriving in internet subcultures for playful self-expression.\n\nCluster 10: Theme Name: Parental Evasion Acronyms\nDescription: Initialisms for quick chats often dodge parents, prevalent in teen texting and gaming for private digital talks.\n\nCluster 11: Theme Name: Q-Question Initialisms\nDescription: Q-starting abbreviations streamline queries in chats, aiding efficient dialogue in gaming and online forums.\n\nCluster 12: Theme Name: Mom-Related Acronyms\nDescription: Family role shorthand mixes with acronyms for informal texts, evoking teen-parent dynamics in digital secrecy.\n\nCluster 13: Theme Name: Personal Opinion Initialisms\nDescription: I-led abbreviations voice views in forums, enabling concise self-expression in opinionated online discussions.\n\nCluster 14: Theme Name: Karen Stereotype Shorthand\nDescription: K-initialisms blend with negative labels like Karen, critiquing behavior in viral social media rants.\n\nCluster 15: Theme Name: Indifference Knowledge Acronyms\nDescription: IDK-style terms convey apathy in texts, mirroring casual dismissal in youth digital interactions.\n\nCluster 16: Theme Name: D-Status Initialisms\nDescription: D-prefixed abbreviations report statuses in gaming chats, prioritizing speed in multiplayer environments.\n\nCluster 17: Theme Name: Numerical Thanks Acronyms\nDescription: Brevity tools like 10Q speed up acknowledgments in messaging, embodying early SMS character limits.\n\nCluster 18: Theme Name: Farewell Chat Abbreviations\nDescription: L8R-like terms end digital convos politely, standard in instant messaging for brief goodbyes.\n\nCluster 19: Theme Name: Leetspeak Alphanumeric Codes\nDescription: Number-letter swaps like 8 for ate define early texting playfulness, nostalgic in retro internet culture.\n\nCluster 20: Theme Name: Gaming Noob Acronyms\nDescription: N-terms like NPC mock players in games, extending to social commentary in online communities.\n\nCluster 21: Theme Name: Modern B-Slang Mix\nDescription: B-initialisms with terms like Based span dating and politics, evolving rapidly in Gen Z social media.\n\nCluster 22: Theme Name: Status Update Acronyms\nDescription: BRB-like codes track presence in chats, essential for sustained digital group interactions.\n\nCluster 23: Theme Name: Sarcastic Query Shorthand\nDescription: ORLY and YEET abbreviate reactions in forums, fueling ironic humor in online discourse.\n\nCluster 24: Theme Name: Joke Clarifier Initialisms\nDescription: J-led terms like JK fill convos in texts, softening statements in playful teen messaging.\n\nCluster 25: Theme Name: Emotional F-Abbreviation Cluster\nDescription: FML-style acronyms vent frustrations in social media, amplifying feelings in informal digital vents.\n\nCluster 26: Theme Name: Viral Social Commentary Slang\nDescription: Terms like tea and sus dissect relationships online, driven by AAVE and meme culture among youth.\n\nCluster 27: Theme Name: You-Directed Query Shorthand\nDescription: U/UR abbreviations ask about recipients in texts, facilitating direct, casual check-ins digitally.\n\nCluster 28: Theme Name: Question Greeting Acronyms\nDescription: WBU-like TLAs open chats efficiently, standard in SMS for quick personal updates.\n\nCluster 29: Theme Name: W-Action Initialisms\nDescription: W-starting codes like WTG praise or query in forums, boosting motivational brevity online.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 2114, 'prompt_tokens': 13187, 'total_tokens': 15301, 'completion_tokens_details': {'accepted_prediction_tokens': None, 'audio_tokens': None, 'reasoning_tokens': 1070, 'rejected_prediction_tokens': None}, 'prompt_tokens_details': None}, 'model_provider': 'openai', 'model_name': 'x-ai/grok-4.1-fast:free', 'system_fingerprint': None, 'id': 'gen-1764442072-YtxTtjvNLw0wi6GnOBmR', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--900b962d-5f75-4c8b-a01e-6c8f67a1625f-0' usage_metadata={'input_tokens': 13187, 'output_tokens': 2114, 'total_tokens': 15301, 'input_token_details': {}, 'output_token_details': {'reasoning': 1070}}"
2,openai/gpt-4o-mini,"content='Cluster 0: Theme Name: Affectionate Digital Shorthand \nDescription: This cluster comprises various abbreviations and symbols used to express love and affection in digital communication, reflecting a desire for quick, intimate expressions of care.\n\nCluster 1: Theme Name: Exclamations & Online Shorthand \nDescription: This cluster consists of acronyms and initialisms primarily used for expressing surprise, frustration, or affirmation in digital communication, highlighting the need for quick, informal expression.\n\nCluster 10: Theme Name: Acronyms, Initialisms & Digital Shorthand \nDescription: This cluster includes abbreviations and initialisms used for rapid communication in digital contexts, emphasizing efficiency and informal language.\n\nCluster 11: Theme Name: Abbreviated Online Communication \nDescription: This cluster features acronyms and initialisms for quick communication in digital environments, facilitating brevity and conveying specific questions or emotional responses.\n\nCluster 12: Theme Name: Acronyms and Parental References \nDescription: This cluster consists of acronyms and initialisms related to parental figures or family roles, blending digital shorthand with familial associations.\n\nCluster 13: Theme Name: Abbreviated Personal Statements \nDescription: This cluster includes initialisms and acronyms expressing personal opinions or self-identification, facilitating quick communication in digital contexts.\n\nCluster 14: Theme Name: Initialisms & Negative Stereotypes \nDescription: This cluster features initialisms and abbreviations alongside a prominent negative stereotype, reflecting informal communication shortcuts and cultural critique.\n\nCluster 15: Theme Name: Acronyms of Indifference & Ignorance \nDescription: This cluster comprises acronyms and initialisms expressing a lack of knowledge or concern, serving as informal ways to convey disinterest or uncertainty.\n\nCluster 16: Theme Name: Initialism & Abbreviation Communication \nDescription: This cluster includes a variety of initialisms and abbreviations for rapid communication in digital and gaming contexts, condensing common phrases into shorter forms.\n\nCluster 17: Theme Name: Abbreviated Digital Communication \nDescription: This cluster features acronyms and initialisms for rapid, informal communication in digital environments, conveying various states and conversational cues.\n\nCluster 18: Theme Name: Digital Farewell & Departure \nDescription: This cluster comprises abbreviations and phonetic spellings used to signify departure or temporary farewells in digital communication, focusing on brevity.\n\nCluster 19: Theme Name: Alphanumeric & Abbreviated Communication \nDescription: This cluster features highly abbreviated terms using numbers or letters as phonetic or symbolic stand-ins, reflecting the informal nature of early digital communication.\n\nCluster 2: Theme Name: Abbreviated Digital Communication \nDescription: This cluster includes a wide array of acronyms and initialisms for quick, informal communication in digital environments, conveying various states and actions.\n\nCluster 20: Theme Name: Digital Shorthand & Gaming Jargon \nDescription: This cluster comprises internet acronyms and gaming-specific terms for quick communication, reflecting the need for brevity and specialized language in digital environments.\n\nCluster 21: Theme Name: Diverse Initialisms and Modern Slang \nDescription: This cluster encompasses a variety of initialisms, acronyms, and contemporary slang terms, reflecting digital communication shortcuts and evolving social lexicon.\n\nCluster 22: Theme Name: Digital Communication Acronyms \nDescription: This cluster features acronyms and initialisms for quick communication in digital environments, emphasizing brevity and efficiency in online interactions.\n\nCluster 23: Theme Name: Acronyms, Abbreviations & Exclamations \nDescription: This cluster includes abbreviated forms and acronyms for quick communication, alongside expressive exclamations, reflecting a need for brevity in informal interactions.\n\nCluster 24: Theme Name: Abbreviated Conversational Fillers \nDescription: This cluster comprises abbreviations and initialisms used as conversational placeholders or mood indicators in digital communication, focusing on brevity.\n\nCluster 25: Theme Name: Abbreviated Digital Communication \nDescription: This cluster features a wide array of acronyms, initialisms, and shortened forms for quick, informal communication in digital contexts, expressing reactions and social relationships.\n\nCluster 26: Theme Name: Contemporary Digital Vernacular \nDescription: This cluster encompasses modern slang terms reflecting current social dynamics and online interactions, highlighting both positive and negative judgments.\n\nCluster 27: Theme Name: ""You"" and Related Abbreviations \nDescription: This cluster includes abbreviations centered around the pronoun ""you,"" facilitating quick communication in digital contexts.\n\nCluster 28: Theme Name: Texting Abbreviations for Questions \nDescription: This cluster features common initialisms and acronyms used to quickly ask basic conversational questions in digital communication.\n\nCluster 29: Theme Name: Abbreviated Digital Communication \nDescription: This cluster comprises a wide array of initialisms, acronyms, and phonetic spellings for rapid communication in digital environments, emphasizing brevity and efficiency.\n\nCluster 3: Theme Name: Keyboard-Centric Status Updates \nDescription: This cluster includes acronyms and abbreviations indicating a user\'s presence or activity status relative to their computer, serving as quick communication cues.\n\nCluster 4: Theme Name: Acronyms and Abbreviations \nDescription: This cluster features acronyms and abbreviations for brevity in digital communication, facilitating quick information exchange across various online platforms.\n\nCluster 5: Theme Name: Initialism & Abbreviation Communication \nDescription: This cluster includes a variety of initialisms and abbreviations for quick communication in digital contexts, conveying common phrases and reactions.\n\nCluster 6: Theme Name: Acronyms and Abbreviations \nDescription: This cluster features acronyms and abbreviations for brevity in digital communication, conveying greetings, game-related terms, or general descriptors.\n\nCluster 7: Theme Name: Digital Communication Shorthand \nDescription: This cluster comprises abbreviations and acronyms for expressing laughter or negative reactions in online communication, reflecting a need for conciseness.\n\nCluster 8: Theme Name: Acronyms for Daily Communication \nDescription: This cluster includes acronyms and initialisms denoting temporal markers or status updates, serving as shorthand for frequently used phrases. \n\nCluster 9: Theme Name: Internet & Abbreviated Communication \nDescription: This cluster encompasses terms related to online personas and highly abbreviated communication, reflecting the rapid, informal nature of internet discourse.\n\nNoise: Theme Name: Digital Communication & Cultural Commentary \nDescription: This cluster includes terms for rapid digital communication and social commentary, reflecting the dynamic and informal nature of online interaction.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 1285, 'prompt_tokens': 13419, 'total_tokens': 14704, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'openai/gpt-4o-mini', 'system_fingerprint': 'fp_560af6e559', 'id': 'gen-1764442100-eWPNKjQ1TLHH4v2nCf0q', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--4186f3ca-e011-410e-b5e3-d9b10707007b-0' usage_metadata={'input_tokens': 13419, 'output_tokens': 1285, 'total_tokens': 14704, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}"


In [97]:
pprint(df_thematic['thematic_analysis'][0].content)

('Cluster 0: Theme Name: Affectionate Digital Shorthand\n'
 'Description: This cluster comprises abbreviations and symbols used to '
 'express love and affection in digital communication, reflecting a desire for '
 'quick, intimate expressions of care.\n'
 '\n'
 'Cluster 1: Theme Name: Exclamations & Online Shorthand\n'
 'Description: This cluster comprises acronyms and initialisms primarily used '
 'for expressing surprise, frustration, or affirmation in digital '
 'communication, reflecting a need for quick, informal expression.\n'
 '\n'
 'Cluster 2: Theme Name: Abbreviated Digital Communication\n'
 'Description: This cluster comprises a wide array of abbreviations and '
 'acronyms primarily used for quick, informal communication in digital '
 'environments, conveying various states, actions, and conversational cues.\n'
 '\n'
 'Cluster 3: Theme Name: Keyboard-Centric Status Updates\n'
 'Description: This cluster comprises acronyms and abbreviations indicating a '
 "user's presence, a

In [98]:
pprint(df_thematic['thematic_analysis'][1].content)

('Cluster 0: Theme Name: Loving Text Abbreviations\n'
 'Description: Short forms like ILY and 143 express affection in digital '
 'messages, fostering intimate bonds in casual online interactions among '
 'friends and partners.\n'
 '\n'
 'Cluster 1: Theme Name: Emotional Exclamation Shorthand\n'
 'Description: Acronyms for surprise or affirmation in chats reflect '
 'fast-paced digital exchanges, common in social media for quick emotional '
 'reactions.\n'
 '\n'
 'Cluster 2: Theme Name: O-Prefixed Chat Shortcuts\n'
 'Description: Brevity-focused abbreviations starting with O denote actions or '
 'hugs in online forums, emphasizing efficiency in informal digital '
 'conversations.\n'
 '\n'
 'Cluster 3: Theme Name: Keyboard Status Indicators\n'
 'Description: Acronyms like AFK signal user availability near computers, '
 'rooted in gaming and chat room culture for real-time coordination.\n'
 '\n'
 'Cluster 4: Theme Name: General Digital Acronyms\n'
 'Description: Varied abbreviations shor

In [99]:
pprint(df_thematic['thematic_analysis'][2].content)

('Cluster 0: Theme Name: Affectionate Digital Shorthand  \n'
 'Description: This cluster comprises various abbreviations and symbols used '
 'to express love and affection in digital communication, reflecting a desire '
 'for quick, intimate expressions of care.\n'
 '\n'
 'Cluster 1: Theme Name: Exclamations & Online Shorthand  \n'
 'Description: This cluster consists of acronyms and initialisms primarily '
 'used for expressing surprise, frustration, or affirmation in digital '
 'communication, highlighting the need for quick, informal expression.\n'
 '\n'
 'Cluster 10: Theme Name: Acronyms, Initialisms & Digital Shorthand  \n'
 'Description: This cluster includes abbreviations and initialisms used for '
 'rapid communication in digital contexts, emphasizing efficiency and informal '
 'language.\n'
 '\n'
 'Cluster 11: Theme Name: Abbreviated Online Communication  \n'
 'Description: This cluster features acronyms and initialisms for quick '
 'communication in digital environments, faci