In [13]:
import pandas as pd
from tqdm.notebook import tqdm

from utils.models import Topic
from utils.constants import CURRENT_TOPICS, N_EXAMPLES_OF_EACH_TOPIC
from topic_modelling.agent import extract_topics_from_user_message, format_user_message
from topic_modelling.vectordb import create_topics_vectordb

In [None]:
useful_cols_and_completed_df = pd.read_csv("input_feedback_data/snowflake_data/useful_cols_and_completed_df.csv")
len(useful_cols_and_completed_df)

In [3]:
current_topics_with_count = {topic : Topic(topic_name=topic,
                                           resolution_statement=" ",
                                           examples = [],
                                           count = 0
                                           )
                            for topic in CURRENT_TOPICS}

In [None]:
extracted_topic_data = []

for i, row in useful_cols_and_completed_df.iterrows():
    improvement_needed = row["NZ_RELATIONSHIP_IMPORTANT_IMPROVEMENT_CMT"]
    reason_for_given_nps = row["NZ_RELATIONSHIP_NPS_REASON_CMT"]

    user_message = format_user_message(improvement_needed=improvement_needed,
                                       reason_for_given_nps=reason_for_given_nps)
    if user_message:
        print(user_message)

        topics_response = extract_topics_from_user_message(topics=list(current_topics_with_count.keys()),
                                                           user_message=user_message)
        if topics_response:
            for topic in topics_response["detected_topics"] + topics_response["suggested_topics"]:
                if topic in current_topics_with_count:
                    current_topics_with_count[topic].count += 1
                    if len(current_topics_with_count[topic].examples) < N_EXAMPLES_OF_EACH_TOPIC:
                        current_topics_with_count[topic].examples += [user_message]
                else:
                    current_topics_with_count[topic] = Topic(topic_name=topic,
                                                             resolution_statement=" ",
                                                             count=1,
                                                             examples=[user_message]
                                                             )
            #print(current_topics)

            print("\n\n----------------------------------------------------------\n\n")


In [None]:
len(useful_cols_and_completed_df), len(current_topics_with_count)

In [None]:
current_topics_with_count

In [None]:
# Extract data into rows
rows = [{
    "Topic": topic.topic_name,
    "Count": topic.count,
    "Resolution": topic.resolution_statement,
    "Examples" : topic.examples
} for topic in current_topics_with_count.values()]

# Convert to DataFrame and sort
df = pd.DataFrame(rows)
df_sorted = df.sort_values(by="Count", ascending=False)

# Export to Excel
excel_path = "output_topics_data/topics_by_count.xlsx"
df_sorted.to_excel(excel_path, index=False)

# Export to CSV
csv_path = "output_topics_data/topics_by_count.csv"
df_sorted.to_csv(csv_path, index=False)

print(f"Topics exported to: {excel_path} and {csv_path}")

In [None]:
create_topics_vectordb(topics=current_topics_with_count)