In [103]:
import pandas as pd
import numpy as np
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
from umap import UMAP

In [101]:
# 1) Load and prepare text
df = pd.read_csv("../data/interactions.csv")

# combine input/output into a single text field
df["combined"] = (df["input"].fillna("").astype(str) + " " + df["output"].fillna("").astype(str)).str.strip()

# choose docs = one row per interaction (matches probs shape to df rows)
docs = df["combined"].tolist()

# 2) Fit BERTopic
umap_model = UMAP(random_state=11)
vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(
    umap_model=umap_model,
    vectorizer_model=vectorizer_model,
    calculate_probabilities=True
)

topics_assigned, probs = topic_model.fit_transform(docs)

# 3) Save topic summary
topic_info = topic_model.get_topic_info()  # includes -1 "outliers" row first
topic_info.to_csv("../data/topics.csv", index=False)

# 4) Build a labeled probability DataFrame (columns = topic names, excluding -1)
# The probability matrix `probs` has shape (n_docs, n_topics_without_outliers)
# Align names by taking non -1 topics from topic_info in order.
non_outlier = topic_info[topic_info["Topic"] != -1].copy()

# Ensure we have exactly as many names as there are columns in probs
expected_cols = probs.shape[1]
names = non_outlier["Name"].tolist()[:expected_cols]
ids = non_outlier["Topic"].tolist()[:expected_cols]

probs_df = pd.DataFrame(probs, columns=names)

# 5) Attach assigned topic and probs to the original rows
df_out = df.reset_index(drop=True).copy()
df_out["assigned_topic"] = topics_assigned  # the single best topic per row
df_out = pd.concat([df_out, probs_df.reset_index(drop=True)], axis=1)

# 6) Save
df_out.to_csv("../data/interactions_with_topic_probs.csv", index=False)

In [106]:
df = pd.read_csv("../data/regression/topic_coefficients.csv")
df = df[df["p_value"]<0.05]

In [128]:
for model_name in df["model_name"].unique():
    for task_name in df["task_name"].unique():
        curr = df[df["model_name"]==model_name]
        curr = curr[curr["task_name"]==task_name]
        #curr["topic_column"] = curr["topic_column"].str.replace(r"^X\d+_", "", regex=True)
        print(model_name, task_name)
        print(curr.sort_values(by='coefficient', ascending=False).head(10)["topic_column"].to_list())
        print()

claude-sonnet-4-20250514 aita
['X33_years_humans_ago_earth', 'X42_welcome_assist_today_hi', 'X9_chapter_adam_eve_god', 'X41_military_global_economic_power', 'X52_pregnancy_belly_sex_position', 'X20_republic_countries_united_births', 'X39_flour_acid_juice_wheat', 'X8_sperm_genetic_conservation_art', 'X72_phone_attendees_nfls_stage', 'X60_thermodynamics_heat_steam_efficiency']

claude-sonnet-4-20250514 politics
['X46_pyramids_ancient_theories_pyramid', 'X33_years_humans_ago_earth', 'X13_music_rihanna_public_oprah', 'X35_anime_fantasy_series_studio', 'X29_thought_fear_person_anxiety', 'X20_republic_countries_united_births', 'X26_hersheypark_weather_park_rides', 'X32_rights_lgbtq_io_international', 'X9_chapter_adam_eve_god', 'X52_pregnancy_belly_sex_position']

gpt-4.1-mini-2025-04-14 aita
['X33_years_humans_ago_earth', 'X42_welcome_assist_today_hi', 'X41_military_global_economic_power', 'X8_sperm_genetic_conservation_art', 'X9_chapter_adam_eve_god', 'X20_republic_countries_united_births',

In [152]:
interactions = pd.read_csv("../data/interactions_with_topic_probs.csv")
interactions = interactions[interactions["assigned_topic"]==9]
interactions = interactions.sort_values(by="9_chapter_adam_eve_god", ascending=False)
users = []
for i,r in interactions.iterrows():
    if r["user_id"] not in users:
        print(r["input"])
        users.append(r["user_id"])
        print()
        print()
        print()

Revelation 2:1-5 and consider what the meaning of the passage could be. What does the idea of ‘first love’ mean? How can that define my relationship with God? Could it? Help! What do you think?



Yes





In [156]:
interactions.loc[2359, "input"]

'Are you familiar with the "life of Adam and eve" Apocalypse of Moses?'

In [None]:
Are you familiar with the "life of Adam and ev...	