In [11]:
import openai
from openai import OpenAI
import os
import re
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

openai.api_key = "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"
client = OpenAI()

In [12]:
def create_message(role, content):
    return {
        "role": role,
        "content": content
    }

In [13]:
system_content = """You are an AI assistant tasked with evaluating the coherence of answers in a 
question-and-answer format. Your evaluation should be based on how well the sentences in the answer integrate to form 
a unified and logical whole. Your responsibilities include: (1) assigning a coherence score using a 1-5 star rating 
system, (2) providing a detailed justification for the assigned score. Always provide a score as an integer between 1 and 5. Along with each score, include a clear justification, 
pointing out the aspects of the answer that influenced your rating."""

system_content_improvements = """System: You are an AI assistant tasked with suggesting improvements to user prompts based on coherence scores and justifications provided. Your goal is to analyze the given score and its justification, then identify and suggest specific changes to the user prompt that could lead to clearer and more coherent answers in the future. Consider factors like specificity, clarity, and guidance offered in the prompt. Provide actionable suggestions to refine the user's prompt, making it more effective for eliciting coherent responses."""

user_instructions_improvements = """User: When you receive a coherence score and justification for a response, use this information to suggest specific improvements to the original user prompt. Focus on how the prompt can be rephrased or structured differently to guide respondents towards providing more coherent and relevant answers. Consider aspects such as clarity, specificity, and the framing of the question. Your suggestions should aim to enhance the quality of future responses by refining the prompt."""

user_content = """When scoring the coherence of an answer, consider how seamlessly the sentences connect and contribute to a 
unified understanding of the topic. Use the following scale for scoring: - One star: The answer completely lacks 
coherence. - Two stars: The answer mostly lacks coherence. - Three stars: The answer is partially coherent. - Four 
stars: The answer is mostly coherent. - Five stars: The answer has perfect coherency."""
user_content_1 = """Question: What is your favorite indoor activity and why do you enjoy it? 
Answer: I like pizza. The sun is shining. """
user_content_2 = """Question: Can you describe your favorite movie without giving away any spoilers? 
Answer: It is a science fiction movie. There are dinosaurs. The actors eat cake. People must stop the villain. """
user_content_3 = """Question: What are some benefits of regular exercise?
Answer: Regular exercise improves your mood. A good workout also helps you sleep better. Trees are green."""
user_content_4 = """Question: How do you cope with stress in your daily life? Answer: I usually go for a walk to clear my head. Listening 
to music helps me relax as well. Stress is a part of life, but we can manage it through some activities."""
user_content_5 = """Question: What can you tell me about climate change and its effects on the environment? Answer: Climate change has 
far-reaching effects on the environment. Rising temperatures result in the melting of polar ice caps, contributing to 
sea-level rise. Additionally, more frequent and severe weather events, such as hurricanes and heatwaves, 
can cause disruption to ecosystems and human societies alike."""

assistant_content_1 = """Stars: 1 
Justification: The answer completely lacks coherence. The response doesn't address the question about a 
favorite indoor activity or provide a reason for enjoying it. The mention of pizza is irrelevant, and the statement 
about the sun shining is disconnected from the context of indoor activities"""
assistant_content_2 = """Stars: 2 
Justification: The answer mostly lacks coherence. While it starts relevantly by mentioning it's a science fiction movie with dinosaurs, 
the subsequent sentences become increasingly disjointed. The mention of actors eating cake is unrelated to a typical 
movie plot, and the final sentence is too vague to provide a coherent description. """
assistant_content_3 = """Stars: 3 Justification: The answer is partially coherent. The first two 
sentences coherently describe benefits of regular exercise (mood improvement and better sleep). However, 
the last sentence about green trees is completely unrelated to the topic of exercise benefits, disrupting the overall 
coherence."""
assistant_content_4 = """Stars: 4 
Justification: The answer is mostly coherent. It provides relevant personal strategies for coping with stress (
walking, listening to music) and concludes with a general statement about stress management. The answer maintains 
focus on the topic, though it could be slightly more detailed or specific to achieve perfect coherence."""
assistant_content_5 = """Stars: 5 Justification: The answer has perfect 
coherency. It directly addresses the question with a clear and concise explanation of climate change and its 
environmental impacts. The answer is well-structured, logically flowing from one point to the next, and covers a 
broad range of relevant effects, making it a coherent response."""

improvement_content_1 = """Improvement Suggestion: To ensure a coherent answer, the question could be more specific and guiding. For instance: "What is your favorite indoor activity that helps you relax or feel entertained, and can you explain why you find it enjoyable?" """
improvement_content_2 = """Improvement Suggestion: To guide the respondent towards a coherent answer, the question might include a prompt for structured response, such as: "Can you describe the genre, setting, and main theme of your favorite movie, without giving away any spoilers?" """
improvement_content_3 = """Improvement Suggestion: The question is already well-structured for coherent answers. To further guide the response, it could be slightly modified to: "Can you list and briefly explain at least three health benefits of regular exercise?" """
improvement_content_4 = """Improvement Suggestion: This question generally prompts coherent answers. To enhance it, consider adding: "How do you cope with stress in your daily life, and can you describe how these methods effectively help you manage stress?" """
improvement_content_5 = """Improvement Suggestion: The question is already conducive to coherent answers. To maintain this quality, it could be rephrased for clarity and depth: "What are the key impacts of climate change on the environment, and how do these effects manifest in different ecological and societal aspects?" """

In [14]:
few_shot_examples = [
    (user_content_1, assistant_content_1),
    (user_content_2, assistant_content_2),
    (user_content_3, assistant_content_3),
    (user_content_4, assistant_content_4),
    (user_content_5, assistant_content_5)
]

In [15]:
example_improvement_suggestions = [
    (user_content_1, assistant_content_1, improvement_content_1),
    (user_content_2, assistant_content_2, improvement_content_2),
    (user_content_3, assistant_content_3, improvement_content_3),
    (user_content_4, assistant_content_4, improvement_content_4),
    (user_content_5, assistant_content_5, improvement_content_5)
]

In [16]:
# get the messages array
def get_eval_messages(system_prompt, few_shot_examples_prompt, user_prompt):
    messages = []
    messages.append(create_message("system", system_prompt))
    for user, assistant in few_shot_examples_prompt:
        messages.append(create_message("user", user))
        messages.append(create_message("assistant", assistant))
    messages.append(create_message("user", user_prompt))
    return messages

In [17]:
def run_chat_completions(msgs):
    client.api_key = openai.api_key
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=msgs
    )
    return response.choices[0].message.content

# result = run_chat_completions(messages)
# print(result)


In [18]:
simple_prompt = create_message("system","You are a helpful assistant.")
def get_simple_response(user_prompt):
    msgs = [simple_prompt, create_message("user", user_prompt)]
    response = run_chat_completions(msgs)
    return response

In [19]:
prompt = "Tell me about the Napoleanic Wars"

primary_response = get_simple_response(prompt)

In [20]:
def get_eval_response(user_prompt, llm_response):
    prompt_and_response = f"Question:{user_prompt}\nAnswer:{llm_response}"
    msgs = get_eval_messages(system_content, few_shot_examples, prompt_and_response)
    response = run_chat_completions(msgs)
    return response

In [21]:
eval_response = get_eval_response(prompt, primary_response)

In [22]:
eval_response

'Stars: 5 \nJustification: The answer is highly coherent. It provides a comprehensive overview of the Napoleonic Wars, covering \nthe timeline, major battles, key players, and significant consequences. The information is presented in a logical and \norganized manner, allowing the reader to follow the narrative of the wars and understand their broader impact on \nEurope. The answer demonstrates a clear understanding of the topic and effectively integrates the various aspects of \nthe wars to form a cohesive response.'

In [36]:
class EvaluationCopilot:
    def __init__(self, system_prompt, user_instructions, example_evaluations):
        self.example_evaluations = example_evaluations
        self.system_prompt = system_prompt
        self.user_instructions = user_instructions

    def _create_message(self, role, content):
        return {"role": role, "content": content}

    def __create_evaluation_messages(self, user_prompt):
        messages = []
        messages.append(self._create_message("system", self.system_prompt))
        messages.append(self._create_message("user", self.user_instructions))
        for user, assistant in self.example_evaluations:
            messages.append(self._create_message("user", user))
            messages.append(self._create_message("assistant", assistant))
        messages.append(self._create_message("user", user_prompt))
        return messages

    def _run_chat_completions(self, messages):
        client.api_key = openai.api_key
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages
        )
        return response.choices[0].message.content
    
    def __parse_score_and_justification(self, full_response):
        score_match = re.search(r"Stars:\s*(\d+)", full_response)
        justification_match = re.search(r"Justification:\s*(.+)", full_response, re.DOTALL)

        score = score_match.group(1) if score_match else "0"
        justification = justification_match.group(1).strip() if justification_match else "No justification provided."
        return score, justification

    def get_score_and_justification(self, user_prompt, llm_response):
        prompt_and_response = f"Question:{user_prompt}\nAnswer:{llm_response}"
        messages = self.__create_evaluation_messages(prompt_and_response)
        full_response = self._run_chat_completions(messages)
        return self.__parse_score_and_justification(full_response)


In [33]:
copilot = EvaluationCopilot(system_content, user_content, few_shot_examples)
score, justification = copilot.get_score_and_justification(prompt, primary_response)
print("Score:", score)
print("Justification:", justification)

Score: 5
Justification: The answer has perfect coherency. It provides a detailed and comprehensive account of the Napoleonic 
Wars, covering the timeline, key battles, involved nations, and the consequences of the conflicts. The answer 
follows a logical and chronological structure, effectively integrating all the necessary information to provide a 
coherent understanding of the topic.


In [44]:
class ImprovementCopilot(EvaluationCopilot):
    def __init__(self, system_prompt, user_instructions, example_improvement_suggestions):
        # Extracting the first two elements for the base class
        example_evaluations = [(user, assistant) for user, assistant, _ in example_improvement_suggestions]
        super().__init__(system_prompt, user_instructions, example_evaluations)
        self.example_improvement_suggestions = example_improvement_suggestions


    def __create_improvement_messages(self, user_prompt):
        messages = []
        messages.append(self._create_message("system", self.system_prompt))
        messages.append(self._create_message("user", self.user_instructions))
        for user, assistant, improvement in self.example_improvement_suggestions:
            messages.append(self._create_message("user", user + "\n" + assistant))
            messages.append(self._create_message("assistant", improvement))
        messages.append(self._create_message("user", user_prompt))
        return messages
    
    
    def __parse_improvement_suggestion(self, full_response):
        improvement_match = re.search(r"Improvement Suggestion:\s*(.+)", full_response, re.DOTALL)
        improvement_suggestion = improvement_match.group(1).strip() if improvement_match else "No improvement suggestion provided."
        return improvement_suggestion
    
    def get_improvement_suggestion(self, user_prompt, llm_response):
        score, justification = self.get_score_and_justification(user_prompt, llm_response)
        # print("Score:", score)
        # print("Justification:", justification)
        extended_user_prompt = f"Question: {user_prompt}\nAnswer: {llm_response}\nStars: {score}\nJustification: {justification}"
        print("Extended User Prompt:", extended_user_prompt)
        messages = self.__create_improvement_messages(extended_user_prompt)
        # print("Messages:", messages)
        full_response = self._run_chat_completions(messages)
        print("Full Response:", full_response)
        return self.__parse_improvement_suggestion(full_response)
        # return full_response


In [45]:
copilot = ImprovementCopilot(system_content_improvements, user_instructions_improvements, example_improvement_suggestions)
improvement_suggestion = copilot.get_improvement_suggestion(prompt, primary_response)
print("Improvement Suggestion:", improvement_suggestion)

Extended User Prompt: Question: Tell me about the Napoleanic Wars
Answer: The Napoleonic Wars were a series of conflicts that took place from 1803 to 1815, primarily involving France and its allies against various coalitions of European powers. These wars were primarily led by the French Emperor Napoleon Bonaparte, who aimed to extend French dominance across Europe and establish a Napoleonic Empire.

The conflicts began with the War of the Third Coalition in 1803, when Britain, Austria, and Russia formed an alliance against France. Napoleon achieved several victories, including the famous Battle of Austerlitz in 1805, which resulted in the dissolution of the Third Coalition.

In 1806, the Fourth Coalition was formed by Prussia, Russia, Saxony, and Sweden, resulting in Napoleon's victory over Prussia in the Battle of Jena-Auerstedt. Napoleon went on to establish the Confederation of the Rhine, a network of client states under French influence in Germany.

The Peninsular War from 1808 to