In [None]:
import pandas as pd
import themefinder
from langchain_openai import AzureChatOpenAI
import string

In [None]:
# Define the question the responses are answering and load the response data

question = "What improvements would you most like to see in local public transportation?"

responses = pd.read_json("./example_data.json")

In [None]:
# Check the dataframe has the following columns: response_id, response
# response_ids should start from 1
responses

In [None]:
# Create the LLM object for your use-case e.g. ChatGoogleGenerativeAI if using Google's Gemini or ChatAnthropic for Claude
# NOTE: make sure your .env file is correctly set up with the correct API key/any other variables you need

llm = AzureChatOpenAI(
    model_name="gpt-4o",
    temperature=0
)

In [None]:
# Running the whole pipeline end-to-end in one go.
results = await themefinder.find_themes(
    responses, 
    llm=llm, 
    question=question,
    )

In [None]:
results["themes"]

In [None]:
# The results of each stage of the pipeline can be viewed by accessing the keys of the returned dictionary e.g.
results["themes"]
# or
results["mapping"]

In [None]:
results["themes"]

In [None]:
# If you want to modify the themes generated by the LLM such as merging similar themes or adding in new themes such as a default fallback theme like "Other", this can be done by directly modifying the themes and feeding them into the mapping stage of the pipeline.
from themefinder import theme_mapping

themes = results["themes"][["topic_id", "topic"]].copy()
themes.loc[len(themes)] = {"topic_id": string.ascii_uppercase[len(themes)], "topic": "Other: The response does not match any of the listed themes"}

In [None]:
# It is possible for an LLM to be unable to process a response, if is too long or violates the models content filters, these responses can be reviewed in the 2nd element of the returned object for each task
mapping, unprocessed = await theme_mapping(
    responses,
    llm=llm,
    refined_themes_df=themes,
    question=question,
)

In [None]:
# To export the mapping to a spreadhseet
mapping.to_excel("mapping.xlsx")