diff --git a/.gitignore b/.gitignore index 4b52234..9ef744f 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,5 @@ dmypy.json # Synthetic data conversations src/agents/utils/synthetic_conversations/*.json -src/agents/utils/synthetic_conversations/*.csv \ No newline at end of file +src/agents/utils/synthetic_conversations/*.csv +src/agents/utils/synthetic_conversations/*.tsv \ No newline at end of file diff --git a/src/agents/informational_agent/informational_agent.py b/src/agents/informational_agent/informational_agent.py index 92c6024..c926c3e 100644 --- a/src/agents/informational_agent/informational_agent.py +++ b/src/agents/informational_agent/informational_agent.py @@ -36,7 +36,7 @@ class State(TypedDict): conversationalStyle: str class InformationalAgent: - def __init__(self): + def __init__(self, informational_role_prompt: str = informational_role_prompt, conv_pref_prompt: str = conv_pref_prompt, update_conv_pref_prompt: str = update_conv_pref_prompt, summary_prompt: str = summary_prompt, update_summary_prompt: str = update_summary_prompt): llm = GoogleAILLMs() self.llm = llm.get_llm() summarisation_llm = OpenAILLMs() @@ -181,7 +181,7 @@ def pretty_response_value(self, event: dict) -> str: return event["messages"][-1].content agent = InformationalAgent() -def invoke_informational_agent(query: str, conversation_history: list, summary: str, conversationalStyle: str, question_response_details: str, session_id: str) -> InvokeAgentResponseType: +def invoke_informational_agent(query: str, conversation_history: list, summary: str, conversationalStyle: str, question_response_details: str, session_id: str, agent: InformationalAgent = agent) -> InvokeAgentResponseType: print(f'in invoke_informational_agent(), thread_id = {session_id}') config = {"configurable": {"thread_id": session_id, "summary": summary, "conversational_style": conversationalStyle, "question_response_details": question_response_details}} diff --git a/src/agents/informational_agent/informational_prompts.py b/src/agents/informational_agent/informational_prompts.py index 2d8e685..3241132 100644 --- a/src/agents/informational_agent/informational_prompts.py +++ b/src/agents/informational_agent/informational_prompts.py @@ -4,36 +4,30 @@ # informational_role_prompt = "You are an excellent tutor that aims to provide clear and concise explanations to students. I am the student. Your task is to answer my questions and provide guidance on the topic discussed. Ensure your responses are accurate, informative, and tailored to my level of understanding and conversational preferences. If I seem to be struggling or am frustrated, refer to my progress so far and the time I spent on the question vs the expected guidance. You do not need to end your messages with a concluding statement.\n\n" +# TODO: try using "A tutor and a student work together to solve the following math word problem." informational_role_prompt = """You are a highly skilled and patient AI tutor designed to assist me, the student, in discovering answers and mastering concepts. Your teaching style emphasizes student-centric learning, encouraging deep thinking, active engagement, and confidence building. ## Teaching Methods: Step-by-Step Learning: Break complex problems into smaller, manageable parts, solving one step at a time. Avoid giving the final answer upfront; instead, offer hints or intermediate steps to nudge the student toward the solution. Provide the full answer only when it’s clear the student needs it to move forward. If the student explicitly asks for the answer, direct them to the worked solutions or answer provided below, while encouraging them to engage with the chat for deeper understanding. Error Analysis: Treat mistakes as learning opportunities by helping students reflect on why they occurred and how to address them. Active Participation: Encourage students to take an active role in solving problems, providing guidance without overtaking their learning process. -Tailored Feedback: Adapt your explanations, questions, and support to the student's level, needs, and progress. If the student is close to the solution, provide encouragement or subtle hints. If they seem stuck, gradually increase the specificity of your support. ## Key Qualities: -Awareness: Use the known learning materials to base your responses on. +Awareness: Use the known learning materials to base your responses on. Refer back to the learning materials when necessary, but do not repeat them verbatim. Instead, summarize or paraphrase the content to ensure understanding. Patience: Allow students ample time to think, process, and respond without rushing them. Clarity: Simplify complex ideas into clear, actionable steps. Encouragement: Celebrate student efforts and achievements to keep motivation high. Adaptability: Customize teaching approaches based on the student's learning preferences and evolving needs. Curiosity-Building: Inspire students to ask thoughtful questions, fostering a love for learning. Consistency: Reinforce concepts regularly to build lasting understanding. -Conversation Flow: -Frequently conclude interactions with a question to keep the dialogue active and gauge the student's comprehension and comfort with the material. -Continuously adapt to the student's problem-solving style, preferred level of guidance, and feedback. - -Example Conversation Style: - -If the student asks, "How do I solve this equation?" respond with: -"Let's start by identifying what you know. What operation do you think comes first?" -Follow up with guided hints or clarifications based on their response. +Authenticity: Maintain authenticity and a constructive learning environment by keeping feedback clear and focused. +Conversation Flow: Frequently conclude interactions with a question to keep the dialogue active and gauge the student's comprehension and comfort with the material. +Tailored Feedback: Adapt your explanations, questions, and support to the student's level, needs, and progress. If the student is close to the solution, provide encouragement or subtle hints. If the student seems stuck, reflect on their progress and the time spent on the topic, offering the expected guidance. Then gradually increase the specificity of your support. ## Flexibility: -Restrict your response's length to quickly resolve the student's query. However, adjust your approach dynamically, if the student seeks detailed guidance, prefers a hands-off approach, or demonstrates unique problem-solving strategies. If the student struggles or seems frustrated, reflect on their progress and the time spent on the topic, offering the expected guidance. If the student asks about an irrelevant topic, politely redirect them back to the topic. Do not end your responses with a concluding statement. +Directly answer the student's question. Keep your answer short. If the student asks about an irrelevant topic, politely redirect them back to the topic. Do not end your responses with a concluding statement. -## Governance +## Governance: You are a chatbot deployed in Lambda Feedback, an online self-study platform. You are discussing with students from Imperial College London.""" pref_guidelines = """**Guidelines:** diff --git a/src/agents/utils/parse_json_to_prompt.py b/src/agents/utils/parse_json_to_prompt.py index 843e682..4b0b556 100644 --- a/src/agents/utils/parse_json_to_prompt.py +++ b/src/agents/utils/parse_json_to_prompt.py @@ -163,7 +163,7 @@ def format_response_area_details(responseArea: ResponseAreaDetails, studentSumma return f""" ## Response Area: {responseArea.position + 1} {f'Area task: What is {responseArea.preResponseText} ?' if responseArea.preResponseText else ''} - (Secret - not to be shared) Expected Answer: {responseArea.answer}; + (Keep it Secret) Expected Answer: {responseArea.answer}; {submissionDetails}""" def format_part_details(part: PartDetails, currentPart: CurrentPart, summary: List[StudentWorkResponseArea]) -> str: diff --git a/src/agents/utils/prompt_value_analysis.py b/src/agents/utils/prompt_value_analysis.py new file mode 100644 index 0000000..72ce50d --- /dev/null +++ b/src/agents/utils/prompt_value_analysis.py @@ -0,0 +1,106 @@ +import networkx as nx +import matplotlib.pyplot as plt +import numpy as np +from sentence_transformers import SentenceTransformer +from sklearn.metrics.pairwise import cosine_similarity +import pandas as pd +from matplotlib import cm +from matplotlib.lines import Line2D + +# Load paragraphs from CSV +path = "src/agents/utils/synthetic_conversations/" +df = pd.read_csv(path+"prompts_importance.tsv", delimiter="\t") # Replace with your actual file name +print(df.columns) +df["response"] = df["response"].astype(str).str.replace("$$", "", regex=False) +df["response"] = df["response"].astype(str).str.replace("\\", "", regex=False) +paragraphs = df["response"].tolist() +messages = df["message"].tolist() +prompts = df["prompt"].tolist() +missing_prompts = df["prompt_missing"].tolist() +print(f"Loaded {len(paragraphs)} paragraphs, {len(messages)} messages, {len(prompts)} prompts, and {len(missing_prompts)} missing prompts") + +# Load embedding model +model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") + +# Compute embeddings +embeddings = model.encode(paragraphs, convert_to_numpy=True) + +# Compute similarity matrix +similarity_matrix = cosine_similarity(embeddings) + +# Create graph +G = nx.Graph() + +# Create a mapping of messages to colours +message_to_color = {msg: cm.viridis(i / len(set(messages))) for i, msg in enumerate(set(messages))} + +# Add nodes with message-based colours +node_colors = [] +for i, paragraph in enumerate(paragraphs): + msg = messages[i] + color = message_to_color[msg] # Get colour based on message + G.add_node(i, text=paragraph, message=msg, color=color, prompt=prompts[i], missing_prompt=missing_prompts[i]) + node_colors.append(color) # Add node colour for visualization + +# Define a similarity threshold for edges +threshold = 0.5 +for i in range(len(paragraphs)): + for j in range(i + 1, len(paragraphs)): + if similarity_matrix[i, j] > threshold: + G.add_edge(i, j, weight=similarity_matrix[i, j]) + +# Draw graph +fig, ax = plt.subplots(figsize=(12, 6)) +pos = nx.spring_layout(G) # Positioning of nodes +nx.draw(G, pos, with_labels=False, node_color=node_colors, edge_color="white", ax=ax) + +# Create annotation for hover effect +hover_text = ax.text(0.5, -0.1, "", transform=ax.transAxes, ha="center", va="top", fontsize=10, wrap=True) +hover_text.set_visible(False) + +# Function to update hover text and wrap it +def update_hover_text(ind): + node_idx = ind["ind"][0] + node_pos = pos[node_idx] + hover_text.set_position((0.5, -0.05)) # Position the text box at the bottom + hover_text.set_text("Message: "+ G.nodes[node_idx]["message"]+ "\nResponse: "+ G.nodes[node_idx]["text"]) # Set the text + hover_text.set_visible(True) + plt.draw() + +# Mouse hover event +def hover(event): + if event.inaxes == ax: + for i, (x, y) in pos.items(): + if np.linalg.norm([x - event.xdata, y - event.ydata]) < 0.05: # Adjust hover sensitivity + update_hover_text({"ind": [i]}) + return + hover_text.set_visible(False) # Hide text when not hovering over nodes + plt.draw() + +# Mouse click event +def on_click(event): + if event.inaxes == ax: + for i, (x, y) in pos.items(): + if np.linalg.norm([x - event.xdata, y - event.ydata]) < 0.05: # Click sensitivity + node_idx = i + message = G.nodes[node_idx]["message"] + prompt = G.nodes[node_idx]["prompt"] + missing_prompt = G.nodes[node_idx]["missing_prompt"] + text = G.nodes[node_idx]["text"] + print(f"Clicked node {node_idx} \n-- Message: {message}\n-- Response: {text}\n-- Prompt: {prompt}\n-- Missing Prompt: {missing_prompt}") + print("====================") + +# Create legend +legend_handles = [] +for msg, color in message_to_color.items(): + legend_handles.append(Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=msg)) + +ax.legend(handles=legend_handles, title="Messages", bbox_to_anchor=(0.3, 0.0), loc='lower center', + borderaxespad=1, ncol=1, fontsize=10, columnspacing=1, frameon=False) + +# Connect events +fig.canvas.mpl_connect("motion_notify_event", hover) +fig.canvas.mpl_connect("button_press_event", on_click) + +plt.subplots_adjust(bottom=0.2) # Add space for the bottom bar +plt.show() diff --git a/src/agents/utils/testbench_agents.py b/src/agents/utils/testbench_agents.py index e016ef1..c9bdf9d 100644 --- a/src/agents/utils/testbench_agents.py +++ b/src/agents/utils/testbench_agents.py @@ -1,98 +1,140 @@ -""" - STEP 1: Read the USER INFO from the WEB client from a file -""" - import json +import time +import os try: from .parse_json_to_prompt import parse_json_to_prompt from ..base_agent.base_agent import invoke_base_agent - from ..informational_agent.informational_agent import invoke_informational_agent + from ..informational_agent.informational_agent import InformationalAgent, invoke_informational_agent from ..socratic_agent.socratic_agent import invoke_socratic_agent + from ..informational_agent.informational_prompts import \ + informational_role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt except ImportError: from src.agents.utils.parse_json_to_prompt import parse_json_to_prompt from src.agents.base_agent.base_agent import invoke_base_agent - from src.agents.informational_agent.informational_agent import invoke_informational_agent + from src.agents.informational_agent.informational_agent import InformationalAgent, invoke_informational_agent from src.agents.socratic_agent.socratic_agent import invoke_socratic_agent + from src.agents.informational_agent.informational_prompts import \ + informational_role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt # File path for the input text -path = "src/agents/utils/example_inputs/" -input_file = path + "example_input_4.json" +path = "src/agents/utils/" +input_file = path + "example_inputs/" + "example_input_4.json" -# Step 1: Read the input file +""" + STEP 1: Read the USER INFO from the WEB client from a file +""" with open(input_file, "r") as file: raw_text = file.read() - -# Step 5: Parse into JSON -try: - parsed_json = json.loads(raw_text) - """ - STEP 2: Extract the parameters from the JSON - """ +def testbench_agents(message, remove_index, agent_type = "informational", informational_role_prompt = informational_role_prompt): + try: + """ + STEP 2: Parse the question information from the JSON file + """ + parsed_json = json.loads(raw_text) + parsed_json["message"] = message + parsed_json["params"]["conversation_history"][-1]["content"] = message + + params = parsed_json["params"] + + if "include_test_data" in params: + include_test_data = params["include_test_data"] + if "conversation_history" in params: + conversation_history = params["conversation_history"] + if "summary" in params: + summary = params["summary"] + if "conversational_style" in params: + conversationalStyle = params["conversational_style"] + if "question_response_details" in params: + question_response_details = params["question_response_details"] + question_submission_summary = question_response_details["questionSubmissionSummary"] if "questionSubmissionSummary" in question_response_details else [] + question_information = question_response_details["questionInformation"] if "questionInformation" in question_response_details else {} + question_access_information = question_response_details["questionAccessInformation"] if "questionAccessInformation" in question_response_details else {} + question_response_details_prompt = parse_json_to_prompt( + question_submission_summary, + question_information, + question_access_information + ) + # print("Question Response Details Prompt:", question_response_details_prompt, "\n\n") + + if "agent_type" in params: + agent_type = params["agent_type"] + if "conversation_id" in params: + conversation_id = params["conversation_id"] + else: + raise Exception("Internal Error: The conversation id is required in the parameters of the chat module.") + + """ + STEP 3: Call the LLM agent to get a response to the user's message + """ + if agent_type == "socratic": + invoke = invoke_socratic_agent + elif agent_type == "informational": + """ + STEP 4: Update the prompt to verify their performance + """ + role_prompt_components = informational_role_prompt.split("\n\n") + main_prompt = role_prompt_components[0] + teaching_methods = role_prompt_components[1].split("## Teaching Methods:\n")[1].split("\n") + key_qualities = role_prompt_components[2].split("## Key Qualities:\n")[1].split("\n") + example_style = role_prompt_components[3].split("Example Conversation Style:\n")[1].split("\n") + flexibility_prompt = [item + '.' for item in role_prompt_components[4].split("## Flexibility:\n")[1].split(".") if item] + governance_prompt = [item + '.' for item in role_prompt_components[-1].split("## Governance:\n")[1].split(".") if item] + prompts = [main_prompt] + teaching_methods + key_qualities + example_style + flexibility_prompt + governance_prompt + + # Remove one of the prompts to test the agent's performance + prompt_missing = prompts[remove_index] + print("Number of prompts:", len(prompts), ", current index:", remove_index, ", prompt removed:", prompt_missing) + prompts.remove(prompt_missing) + + updated_prompt = "\n\n".join(prompts) + + agent = InformationalAgent(informational_role_prompt=updated_prompt, \ + conv_pref_prompt=conv_pref_prompt, \ + update_conv_pref_prompt=update_conv_pref_prompt, \ + summary_prompt=summary_prompt, \ + update_summary_prompt=update_summary_prompt) + invoke = invoke_informational_agent + else: + raise Exception("Unknown Tutor Agent Type") + + response = invoke(query=message, \ + conversation_history=conversation_history, \ + summary=summary, \ + conversationalStyle=conversationalStyle, \ + question_response_details=question_response_details_prompt, \ + session_id=conversation_id, + agent=agent) + + print(response) + print("AI Response:", response['output']) + return message, response, updated_prompt, prompt_missing + + except json.JSONDecodeError as e: + print("Error decoding JSON:", e) + + +if __name__ == "__main__": + file = path + "synthetic_conversations/" + "prompts_importance.tsv" + # create the file if it doesnt exist + if not os.path.exists(file): + with open(file, "w") as f: + f.write("message\t response\t prompt\t prompt_missing\n") + # NOTE: #### This is the testing message!! ##### - message = "What do you know about me?" + message = "How do you tackle the worked solution for part c?" # NOTE: ######################################## - # replace "mock" in the message and conversation history with the actual message - parsed_json["message"] = message - parsed_json["params"]["conversation_history"][-1]["content"] = message - - params = parsed_json["params"] - - if "include_test_data" in params: - include_test_data = params["include_test_data"] - if "conversation_history" in params: - conversation_history = params["conversation_history"] - if "summary" in params: - summary = params["summary"] - if "conversational_style" in params: - conversationalStyle = params["conversational_style"] - if "question_response_details" in params: - question_response_details = params["question_response_details"] - question_submission_summary = question_response_details["questionSubmissionSummary"] if "questionSubmissionSummary" in question_response_details else [] - question_information = question_response_details["questionInformation"] if "questionInformation" in question_response_details else {} - question_access_information = question_response_details["questionAccessInformation"] if "questionAccessInformation" in question_response_details else {} - question_response_details_prompt = parse_json_to_prompt( - question_submission_summary, - question_information, - question_access_information - ) - # print("Question Response Details Prompt:", question_response_details_prompt, "\n\n") - - if "agent_type" in params: - agent_type = params["agent_type"] - if "conversation_id" in params: - conversation_id = params["conversation_id"] - else: - raise Exception("Internal Error: The conversation id is required in the parameters of the chat module.") - - """ - STEP 3: Call the LLM agent to get a response to the user's message - """ - # NOTE: ### SET the agent type to use ### - agent_type = "informational" - # NOTE: ################################# - - if agent_type == "socratic": - invoke = invoke_socratic_agent - elif agent_type == "informational": - invoke = invoke_informational_agent - else: - raise Exception("Unknown Tutor Agent Type") - - response = invoke(query=message, \ - conversation_history=conversation_history, \ - summary=summary, \ - conversationalStyle=conversationalStyle, \ - question_response_details=question_response_details_prompt, \ - session_id=conversation_id) - - print(response) - print("AI Response:", response['output']) - - -except json.JSONDecodeError as e: - print("Error decoding JSON:", e) + index_count = 23 # Number of sections in the informational agent prompt + for i in range(0, index_count): + if i == 16: + time.sleep(60) + message, response, prompt, prompt_missing = testbench_agents(message, remove_index=i) + with open(file, "a") as f: + # append another line to the file + if prompt_missing != " ": + f.write(message + "\t" + ' '.join(response['output'].split('\n')) + "\t" + ' '.join(prompt.split('\n')) + "\t" +prompt_missing + "\n") + print("File written successfully!")