# Clustering Data and Prompting Classification

In [None]:
import json
import pprint

In [None]:
nomenclature = {
    "networking": """Networking: Visitors in this group are currently focused primarily on building professional or personal relationships and expanding their network. They attend this event to meet industry peers, experts, and potential partners. Their interactions are more about establishing connections than purchasing. They value personal interactions and opportunities for collaboration""",
    "learning": """Learning: These attendees are currently motivated by educational and learning opportunities. They seek to gain new insights, learn about industry trends, and enhance their knowledge and skills. They are likely to attend workshops, seminars, and keynote speeches. Their interest in products or services is secondary to their desire for professional development""",
    "searching_info_on_products_and_vendors": """Searching for info on products and vendor (Gathering Information on Products, Services, Vendors or Providers): Individuals in this category might have identified a need or a problem to solve but are still exploring what products, services, vendors or providers that can meet their specific requirements. Their primary goal is to collect as much information as possible without a strong inclination towards immediate purchasing. They may engage in more specific conversations about products or services but are not yet ready to make a decision. They are likely to engage with multiple vendors to compare offerings, gather brochures, and ask general questions""",
    "early_purchasing_intention": """Early Purchasing Intention: Attendees in this group are actively engaged in the sourcing process. They know well the type of product or service they require and might have narrowed down their options and are delving deeper into specific products or services. Their interactions are more detailed, involving discussions about pricing, implementation, or customization. They are seeking to understand how different offerings stack up against each other and may be forming preferences for vendor selection""",
    "high_purchasing_intention": """ High Purchase Intention: This group represents delegates who are at the final stages of their purchasing journey. They have all the information they need and are making final evaluations to choose a vendor. Their interactions are decisive, focusing on final terms, delivery, support, and other post-purchase considerations. Engagements with these individuals are very relevant and time-critical as they are on the verge of making a purchase decision.""",
}

In [None]:
examples_path = "output/examples.json"
merger_data_path = "output/merger_data.json"
nomenclature_path = "output/nomenclature.json"

In [None]:
with open(examples_path, "r") as f:
    examples = json.load(f)
with open(nomenclature_path, "r") as f:
    nomenclature = json.load(f)
with open(merger_data_path, "r") as f:
    merged_data = json.load(f)

In [None]:
# print(merged_data.get('BDAWL25_J59MXE4'))

In [None]:
len(merged_data.keys())

In [None]:
list(examples.keys())

In [None]:
import classes

In [None]:
import importlib

importlib.reload(classes)

# LLama3.2 Templates with examples

In [None]:
from classes import LLama_PromptTemplate

In [None]:
csm_template = LLama_PromptTemplate(nomenclature, examples)

In [None]:
profile = merged_data.get("BDAWL25_J59MXE4")

In [None]:
# profile

In [None]:
profile_template = csm_template.generate_clustering_prompt(profile)

print("Profile Template:")
print("-" * 50)
print(profile_template)
print("\n")

In [None]:
from langchain_ollama import ChatOllama
from langchain_core.messages import AIMessage

In [None]:
# llama3.2:3b

In [None]:
llm = ChatOllama(
    model="llama3:8b",
    temperature=0.3,
    num_ctx=4096,
)

In [None]:
ai_msg = llm.invoke(profile_template)

In [None]:
# print(ai_msg.content)
# print("-"*100)

In [None]:
list_profiles = list(merged_data.keys())
len(list_profiles)

In [None]:
list_profiles[0]

In [None]:
output = []
for p in list_profiles:
    profile = merged_data.get(p)
    profile_template = csm_template.generate_clustering_prompt(profile)
    ai_msg = llm.invoke(profile_template)
    print(f"Profile: {p} Category : {ai_msg.content}")
    print("-" * 100)

    output.append({p: ai_msg.content, "input": profile})

In [None]:
from datetime import datetime

# Get the current datetime
now = datetime.now()

# Format the datetime as a string including up to seconds
timestamp_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

In [None]:
with open(f"classification/llama3_8B_{timestamp_str}.json", "w") as f:
    json.dump(output, f, indent=4)

In [None]:
from collections import Counter

# Extract the values from each dictionary
values = [list(d.values())[0] for d in output]

# Count the occurrences of each label
label_counts = Counter(values)

# Output the result
print(label_counts)

# LLama3.2 without Examples

In [None]:
csm_template_2 = LLama_PromptTemplate(nomenclature, examples)

In [None]:
profile = merged_data.get("BDAWL25_J59MXE4")

In [None]:
profile_template_w = csm_template_2.generate_clustering_prompt(
    profile, include_examples=False
)

print("Profile Template Without:")
print("-" * 50)
print(profile_template_w)
print("\n")

In [None]:
llm2 = ChatOllama(
    model="llama3.2:3b",
    temperature=0.5,
    num_ctx=4096,
)

In [None]:
output2 = []
for p in list_profiles:
    profile = merged_data.get(p)
    profile_template = csm_template_2.generate_clustering_prompt(
        profile, include_examples=False
    )
    ai_msg = llm2.invoke(profile_template)
    print(f"Profile: {p} Category : {ai_msg.content}")
    print("-" * 100)

    output2.append({p: ai_msg.content})

In [None]:
with open("classification/llama3.2_no_examples.json", "w") as f:
    json.dump(output2, f, indent=4)

In [None]:
from collections import Counter

# Extract the values from each dictionary
values2 = [list(d.values())[0] for d in output2]

# Count the occurrences of each label
label_counts2 = Counter(values2)

# Output the result
print(label_counts2)