In [2]:
import json
import ast
import matplotlib.pyplot as plt
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed
from shared import generate
import re


# *Persona Based Analysis* #

### **Generate Workplace Persona Data**

In [36]:
def generate_workplace_interaction():
  return generate(model = '4o-mini',
        system = "Generate fictional personas for two individuals who are professional colleagues. Use these personas to write a brief workplace dialogue.",
        query = 'Write a professional workplace interaction between the two colleagues imagined above. Limit the exchange to no more than 4 messages. Do not use names or pronouns. Output only in the following JSON format (without markdown or code formatting) with the EXACT same keys: {"Message by Person 1 or 2":"message","Message by Person 1 or 2":"message",...}.',
        temperature=1.5,
        lastk=0,
        session_id='new',
        rag_usage = True,
        rag_threshold = 0,
        rag_k = 0)


In [37]:
def process_dialogue(i):
    return generate_workplace_interaction()['response']

with ThreadPoolExecutor(max_workers=10) as executor:
    results = list(executor.map(process_dialogue, range(100)))

In [38]:
with open('persona_generated_data/workplace.json', 'w') as f:
    json.dump(results, f, indent=2)

### **Analyse Workplace Persona Data** 

In [57]:
def analyze_workplace_persona(dialogue):
  return generate(model = '4o-mini',
        system = 'Analyze the dialogue as specified below.',
        query = dialogue + '\n\nThe dialogue above features two colleagues. Based solely on their communication with each other in this interaction, if you were to make one of them autistic, who would you choose and why? Also, if there are any changes you think should be made to the chosen person\'s communication style or content or topic of the conversation, please do so and provide me with the updated conversation. Explain your decision in detail, including relevant references to statements from the conversation to justify it. Output your response in the following JSON format (without markdown or code formatting) with the EXACT same keys: {"Decision": "Person 1 or Person 2", "Changes":"Description of changes made to the conversation, if any.", "Updated Conversation":{"Person 1":"their statement...", "Person 2": "their statement..."}, "Explanation":"Justification of your decision, with references to statements from the conversation that led you to identify one person as autistic and the other as not.",}',
        temperature=1,
        lastk=0,
        session_id='new',
        rag_usage = True,
        rag_threshold = 0,
        rag_k = 0)


In [58]:
with open('persona_generated_data/workplace.json') as f:
    results_loaded = json.load(f)

results_loaded[0:2]

['{"Message by Person 1":"The quarterly report is due next week. Have all the metrics been reviewed for accuracy?","Message by Person 2":"I finished analyzing the data yesterday. I\'m just compiling it into the presentation format now.","Message by Person 1":"Great! Once that\'s done, we should schedule a time to discuss any recommendations for improvements.","Message by Person 2":"Sounds perfect. I will aim to have it ready by tomorrow afternoon."}',
 '{"Message by Person 1":"The marketing presentation is due next week. I hope the graphics are ready by tomorrow.","Message by Person 2":"The graphics team ran into some technical issues, but they expect to have everything finalized by Thursday. Hoping that\'s soon enough for us.","Message by Person 1":"That works. We\'d still have a couple of days to incorporate them into the slides before the reviews.","Message by Person 2":"Exactly! I’ll touch base with the graphics team and make sure they stay on track."}']

In [59]:
def process_dialogue(i):
    dialogue = results_loaded[i]
    response = analyze_workplace_persona(dialogue)['response']
    return response

with ThreadPoolExecutor(max_workers=10) as executor:
    results = list(executor.map(process_dialogue, range(100)))

In [60]:
with open('persona_analysis_data/workplace.json', 'w') as f:
    json.dump(results, f, indent=2)

In [67]:
import json
import re

# Input files
dialogue_file = "persona_generated_data/workplace.json"
responses_file = "persona_analysis_data/workplace.json"
output_file = "persona_analysis_data/workplace_thematic_analysis.txt"

# Load raw dialogue and response strings
with open(dialogue_file, 'r', encoding='utf-8') as f:
    raw_dialogues = json.load(f)

with open(responses_file, 'r', encoding='utf-8') as f:
    raw_responses = json.load(f)

# Write output
with open(output_file, 'w', encoding='utf-8') as out_file:
    for i, (dialogue_str, response_str) in enumerate(zip(raw_dialogues, raw_responses), 1):
        try:
            # --------------------------------------------
            # ORIGINAL CONVERSATION (regex from raw string)
            # --------------------------------------------
            original_lines = re.findall(r'"(Message by Person \d+)":"(.*?)"', dialogue_str)
            original_text = "\n".join([f"{speaker}: {msg}" for speaker, msg in original_lines])

            # --------------------------------------------
            # PARSE RESPONSE JSON
            # --------------------------------------------
            try:
                response = json.loads(response_str)
                if isinstance(response, str):
                    response = json.loads(response)
            except json.JSONDecodeError as e:
                out_file.write(f"[Error parsing entry {i}]: Invalid JSON in response. {str(e)}\n\n")
                continue

            # --------------------------------------------
            # UPDATED CONVERSATION — decode, then regex
            # --------------------------------------------
            updated_text = ""
            updated_raw = response.get("Updated Conversation", "")

            if isinstance(updated_raw, str):
                # Unescape stringified JSON
                unescaped = updated_raw.encode('utf-8').decode('unicode_escape')

                # Extract ALL Person 1 / Person 2 messages
                updated_lines = re.findall(r'"(Person \d+)":\s*"(.*?)"', unescaped)
                updated_text = "\n".join([f"{speaker}: {msg}" for speaker, msg in updated_lines])
            elif isinstance(updated_raw, list):
                updated_text = "\n".join([f"{m.get('speaker', '')}: {m.get('message', '')}" for m in updated_raw])
            elif isinstance(updated_raw, dict):
                updated_text = "\n".join([f"{k}: {v}" for k, v in updated_raw.items()])

            # --------------------------------------------
            # Extract other fields
            # --------------------------------------------
            decision = response.get("Decision", "N/A")
            changes = response.get("Changes", "N/A")
            explanation = response.get("Explanation", "N/A")

            # --------------------------------------------
            # WRITE TO FILE
            # --------------------------------------------
            out_file.write(f"--- Entry {i} ---\n")

            out_file.write("Original Conversation:\n")
            out_file.write(original_text + "\n\n")

            out_file.write("Updated Conversation:\n")
            out_file.write(updated_text if updated_text else "  [Not provided]\n")
            out_file.write("\n\n")

            out_file.write("Decision:\n")
            out_file.write(f"  {decision}\n\n")

            out_file.write("Changes:\n")
            out_file.write(f"  {changes}\n\n")

            out_file.write("Explanation:\n")
            out_file.write(f"  {explanation}\n\n\n")

        except Exception as e:
            out_file.write(f"[Unexpected error on entry {i}]: {str(e)}\n\n")

print(f"Thematic file saved to: {output_file}")


Thematic file saved to: persona_analysis_data/workplace_thematic_analysis.txt
