In [16]:
import os
import pandas as pd
import numpy as np
import asyncio
from dotenv import load_dotenv

load_dotenv()

from text_lloom.src.text_lloom import workbench as wb

In [17]:
print("Starting lloom demo")
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

Starting lloom demo


In [18]:
df = pd.read_excel("data/2024_election.xlsx")
print(df.columns)
df= df[['commentID', 'recommendations', 'commentBody']]
print(df.head())

Index(['commentID', 'status', 'commentSequence', 'userID', 'userDisplayName',
       'userLocation', 'userTitle', 'userURL', 'picURL', 'commentTitle',
       'commentBody', 'createDate', 'updateDate', 'approveDate',
       'recommendations', 'replyCount', 'replies', 'editorsSelection',
       'parentID', 'parentUserDisplayName', 'depth', 'commentType', 'trusted',
       'recommendedFlag', 'permID', 'isAnonymous', 'text'],
      dtype='object')
   commentID  recommendations  \
0          0                8   
1          1                9   
2          2               22   
3          3                9   
4          4               29   

                                                                                                                                                                                                                                                                                                                                                                 

In [19]:
def print_concepts(lloom_instance, name_only=False, examples=True):
    for concept_id, concept in lloom_instance.concepts.items():
        if name_only:
            print(concept.name + " (Size: " + str(len(concept.members)) + ")")
        else:
            # print(concept)
            # print(df['commentID'])
            comments = lloom_instance.in_df[lloom_instance.in_df['id'].isin(map( lambda x: int(x), concept.members))]
            comments = comments['commentBody'].tolist()
            # print(f"Comments: {comments}")
            res = f"""
            Concept ID: {concept.id}.
            Concept Name: {concept.name}.
            Concept Prompt: {concept.prompt}.\n
            Concept Size: {len(concept.members)}.\n
            """


            if examples:
                # res += f"Concept Examples: \n\t {('\n\t').join(comments)}"
                comments = [x.replace("\n", " ") for x in comments]
                res += "Members: \n\t"
                res += ('\n\t*').join(comments)
                # res += f"Members: \n\t {comments}"

                # get the claim from the claim ids in members
                

            res += "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
            
            print(res)


In [20]:
def save_concepts_to_json(lloom_instance, filename):
    import json
    import datetime
    time = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    with open(filename + time, 'w') as f:
        concepts_and_comments = {}
        for concept_id, concept in lloom_instance.concepts.items():
            comments = lloom_instance.in_df[lloom_instance.in_df['id'].isin(map( lambda x: int(x), concept.members))]
            comments = comments['commentBody'].tolist()
            concepts_and_comments[concept.name] = comments
        json.dump(concepts_and_comments, f, indent=4)

In [21]:
lloom_nofilter_newsummarize = wb.lloom(
    df=df,
    text_col="commentBody",
    # id_col="commentID",  # Optional
    debug=True,

    # # Model specification
    distill_model_name = "gpt-3.5-turbo-0125",
    embed_model_name = "text-embedding-3-small",
    synth_model_name = "gpt-3.5-turbo-0125",
    score_model_name = "gpt-3.5-turbo-0125",
)
custom_prompts = {
    "distill_filter": None,
    "distill_summarize": lloom_nofilter_newsummarize.show_prompt("distill_summarize"),
    "synthesize": None,
}
params = {
    "filter_n_quotes": 3, #shouldn't matter
    "summ_n_bullets": 2,
    "synth_n_concepts": 0,
}
cur_seed = "explicit or implied claims or arguments"  # Optionally replace with string
await lloom_nofilter_newsummarize.gen(seed=cur_seed, custom_prompts=custom_prompts, params=params)
print(lloom_nofilter_newsummarize.summary())

No `id_col` provided. Created an ID column named 'id'.


[1mEstimated cost[0m: $0.05
**Please note that this is only an approximate cost estimate**


[1m[48;5;228mAction required[0m[0m


[48;5;117mDistill-summarize[0m
⠋ LoadingBatched version
processing distill summarize llm results
all_ex_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119]
⠇ Loading bullet_df:       id                                                           commentBody
0      0                         Biden stalls in primaries with filler phrases
1      0          Advisors

In [9]:
save_concepts_to_json(lloom_nofilter_newsummarize, "concept_logs/2024_election_concepts_")

In [10]:
print_concepts(lloom_nofilter_newsummarize, name_only=False, examples=True)


            Concept ID: 6bc09393-621e-4a62-b253-65663f11c38a.
            Concept Name: Debate preparation focus on Biden's advantage.
            Concept Prompt: Does the text example discuss how Biden's debate preparation gives him an advantage?.

            Concept Size: 6.

            Members: 
	Previously I submitted a comment without editing it. Here is what I meant to say:  I think anyone who has actually listened to Trump's weird tangents over the past few months and still somehow thinks he is going to do well is either not very intelligent or is in some partisan (or cultish) state of denial.   Plus, he can't talk over Biden and therefore can't belittle or try to block out every words he says. He will actually have to wait until Biden is finished to respond, and I'm honestly not sure he can keep a clear thought in his head that long.   Biden just needs to stay focused on the questions he is asked, and when responding to Trump, do so in a straightforward fashion, without groa

In [11]:
print_concepts(lloom_nofilter_newsummarize, name_only=True)

Debate preparation focus on Biden's advantage (Size: 6)
Dismissal of Complaints (Size: 4)
Media Bias Towards Biden (Size: 10)
Character Assassination through Drug Allegations (Size: 2)
Joe's Performance Concerns (Size: 6)
Criticism of Biden's Handling (Size: 4)
Media Bias Criticism (Size: 5)
Concerns about Trump's threat to democracy (Size: 13)
Call for Better Candidates (Size: 9)
Importance of November Elections (Size: 11)
Questioning Trump's legitimacy (Size: 5)
Unwavering support for Trump (Size: 4)
Analogical Reasoning (Size: 4)
Support for Biden's Campaign (Size: 6)
Critique of Trump's Behavior in Debates (Size: 3)
Trump's Impact on Elections (Size: 4)
Trump's Mic Control Impact (Size: 5)
Assessment of Biden's Health (Size: 5)
Criticism of Trump's Lack of Accomplishments (Size: 3)
Critique of Trump's Mental State (Size: 12)
Critique of Trump's Leadership (Size: 4)
Assessment of Trump's Performance (Size: 1)
Impact of COVID and Trump's Presidency (Size: 4)


In [12]:
lloom_nofilter_newsummarize.saved_dfs.keys()

dict_keys([('Distill-summarize', '2024-07-05-14-58-42'), ('Cluster', '2024-07-05-14-58-45'), ('Synthesize', '2024-07-05-14-58-47'), ('Review-remove', '2024-07-05-14-58-48'), ('Review-merge', '2024-07-05-14-58-50')])

In [14]:
synth_df = lloom_nofilter_newsummarize.saved_dfs[('Synthesize', '2024-07-05-14-58-47')]
synth_df

Unnamed: 0,id,commentBody,concept,concept_name,concept_prompt,seed,concept_namePrompt
0,39,Preference for Joe Biden over Trump,6a51ca76-5f47-4864-adbe-9646093b8066,Criticism of Trump's character and policies,Does the text example involve criticism of Trump's character or policies?,explicit or implied claims or arguments,Criticism of Trump's character and policies: Does the text example involve criticism of Trump's character or policies?
1,80,Biden should shame Trump in debate prep,6bc09393-621e-4a62-b253-65663f11c38a,Debate preparation focus on Biden's advantage,Does the text example discuss how Biden's debate preparation gives him an advantage?,explicit or implied claims or arguments,Debate preparation focus on Biden's advantage: Does the text example discuss how Biden's debate preparation gives him an advantage?
3,38,Massive inflation will follow,68090bec-34e8-4d6e-88d1-ee819929ec33,Concerns about COVID Impact,"Does the text example discuss potential negative impacts of COVID such as death toll, inflation, or disagreement with pandemic statistics?",explicit or implied claims or arguments,"Concerns about COVID Impact: Does the text example discuss potential negative impacts of COVID such as death toll, inflation, or disagreement with pandemic statistics?"
5,103,Dismissing complaints about time changes,7e03b0d1-e49d-482e-8bbe-d738ed4d5471,Dismissal of Complaints,Does the text example involve dismissing or belittling complaints or concerns?,explicit or implied claims or arguments,Dismissal of Complaints: Does the text example involve dismissing or belittling complaints or concerns?
6,93,"Media focuses on Biden's mistakes, not Trump's",0933e28f-3888-4b12-88fb-f11f68ca8ce3,Media Bias Towards Biden,Does the text example suggest a bias towards Biden in media coverage?,explicit or implied claims or arguments,Media Bias Towards Biden: Does the text example suggest a bias towards Biden in media coverage?
8,115,Speculation on Trump's drug cocktail for debate,5fa3bdfc-8f6a-4e6b-b381-53232be97b91,Character Assassination through Drug Allegations,Does the text example involve implying drug use to discredit a political figure?,explicit or implied claims or arguments,Character Assassination through Drug Allegations: Does the text example involve implying drug use to discredit a political figure?
9,14,Joe struggles with speaking and walking,c7a1ba5c-b8cd-44d4-a623-bfc2d357fc73,Joe's Performance Concerns,Assess if text example expresses concern about Joe's performance abilities.,explicit or implied claims or arguments,Joe's Performance Concerns: Assess if text example expresses concern about Joe's performance abilities.
11,107,Criticism of Biden not stepping aside,c79a59ac-7873-4a0f-8392-26e38412a092,Criticism of Biden's Handling,Does the text example involve criticism of Biden's actions or decisions?,explicit or implied claims or arguments,Criticism of Biden's Handling: Does the text example involve criticism of Biden's actions or decisions?
13,66,Moderators should set bias aside,8cd4ba70-0b4f-428d-bb9d-0ab87633b0a2,Media Bias Criticism,Does the text example criticize media bias or lack of objectivity?,explicit or implied claims or arguments,Media Bias Criticism: Does the text example criticize media bias or lack of objectivity?
15,93,Trump is an existential threat to democracy,6224bc66-5a8e-4a00-8a09-d2b34b026edd,Concerns about Trump's threat to democracy,Does the text express worries about Trump's impact on democracy?,explicit or implied claims or arguments,Concerns about Trump's threat to democracy: Does the text express worries about Trump's impact on democracy?


In [15]:
grouped_synth_df = synth_df.groupby('concept_name').size().sort_values(ascending=False)
grouped_synth_df

concept_name
Analogical Reasoning                                1
Assessment of Biden's Health                        1
Trump's Mic Control Impact                          1
Trump's Impact on Elections                         1
Support for Biden's Campaign                        1
Questioning Trump's legitimacy                      1
Negative impact of Trump's presidency               1
Media Bias Towards Biden                            1
Media Bias Criticism                                1
Joe's Performance Concerns                          1
Importance of November Elections                    1
Housing vs. Assisted Living Decision                1
Focus on Biden's Answering Ability                  1
Expectation vs Reality                              1
Dismissal of Complaints                             1
Debates as Entertainment                            1
Debate preparation focus on Biden's advantage       1
Critique of Trump's Mental State                    1
Critique of Tru