# **Section 1: Setup and Imports** <a id="1"></a>

In [1]:
# Install if not already installed (uncomment below in Jupyter)
# !pip install openai python-dotenv
# %pip install duckduckgo-search

import openai # for GPT-3
import os # for environment variables
from dotenv import load_dotenv # for loading environment variables
from difflib import SequenceMatcher # for comparing strings
import re # regular expressions
from duckduckgo_search import DDGS # for searching DuckDuckGo (web information retrieval)
import matplotlib.pyplot as plt # for plotting
from markdown2 import markdown # for converting markdown to HTML
from playwright.sync_api import sync_playwright # for scraping web pages


In [2]:
# Load the OpenAI API key from .env file
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Check to confirm it's loaded (optional)
print("✅ API key loaded:", openai.api_key is not None)

✅ API key loaded: True


# **Section 2: Functions** <a id="2"></a>
## **2.1 OpenAI Functions** <a id="2.1"></a>

In [3]:
# Function to call OpenAI's ChatCompletion API with structured messages
def call_openai(messages, model="gpt-3.5-turbo", temperature=0.7):
    """
    Calls OpenAI's ChatCompletion API with structured messages.

    Parameters:
    messages (list): A list of message dictionaries, where each dictionary contains 'role' and 'content' keys.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.

    Workflow:
    1. The function takes the input parameters and calls the OpenAI ChatCompletion API.
    2. The API returns a response containing multiple choices.
    3. The function extracts the content of the first choice from the response.

    Returns:
    str: The content of the first choice from the API response.
    """
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature
    )
    return response['choices'][0]['message']['content']

In [4]:
# Helper to construct system + user messages for the reasoning agent
def build_review_prompt(report_text, history=[]):
    """
    Constructs system and user messages for the reasoning agent to review a consulting report.

    Parameters:
    report_text (str): The text of the consulting report to be reviewed.
    history (list): A list of previous messages (optional). Default is an empty list.

    Workflow:
    1. The function creates a system message that sets the context for the reasoning agent.
    2. It then creates a user message containing the consulting report text.
    3. The function combines the system message, the history of previous messages, and the user message into a single list.

    Returns:
    list: A list of message dictionaries, including the system message, any historical messages, and the user message.
    """
    system_msg = {
        "role": "system",
        "content": (
            "You are an experienced IT strategy consultant. "
            "You are reviewing a consulting report for completeness, clarity, risks, and alignment with best practices. "
            "Think step-by-step and identify gaps, ask clarifying questions, or suggest improvements. "
            "Your goal is to provide helpful, critical feedback using your expert knowledge."
        )
    }

    user_msg = {
        "role": "user",
        "content": f"Here is the consulting report to review:\n\n{report_text}"
    }

    return [system_msg] + history + [user_msg]


In [5]:
# Track total tokens used and estimated cost
total_tokens_used = 0
estimated_cost_usd = 0.0

# Cost per 1K tokens for GPT-3.5-turbo (adjust if using GPT-4)
COST_PER_1K_TOKENS = 0.0015

def call_openai_with_tracking(messages, model="gpt-3.5-turbo", temperature=0.7, max_tokens=500):
    """
    Calls OpenAI's ChatCompletion API with structured messages and tracks token usage and estimated cost.

    Parameters:
    messages (list): A list of message dictionaries, where each dictionary contains 'role' and 'content' keys.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.
    max_tokens (int): The maximum number of tokens to generate in the completion. Default is 500.

    Workflow:
    1. The function takes the input parameters and calls the OpenAI ChatCompletion API.
    2. The API returns a response containing multiple choices and token usage information.
    3. The function extracts the content of the first choice from the response.
    4. It updates the total tokens used and the estimated cost in USD.
    5. It logs the prompt tokens, completion tokens, total tokens used so far, and the estimated cost.

    Returns:
    str: The content of the first choice from the API response.
    """
    global total_tokens_used, estimated_cost_usd

    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )

    usage = response['usage']
    prompt_tokens = usage.get('prompt_tokens', 0)
    completion_tokens = usage.get('completion_tokens', 0)
    total = usage.get('total_tokens', prompt_tokens + completion_tokens)

    # Update tracking
    total_tokens_used += total
    estimated_cost_usd += (total / 1000) * COST_PER_1K_TOKENS

    # Logging
    print(f"🔢 Prompt: {prompt_tokens} tokens | Completion: {completion_tokens} tokens | Total so far: {total_tokens_used} tokens")
    print(f"💰 Estimated cost so far: ${estimated_cost_usd:.4f} USD")

    return response['choices'][0]['message']['content']


## **2.2 Data Preprocessing Functions** <a id="2.2"></a>

In [6]:
# Pre-process the report into sections
def split_report_into_sections(report_text):
    """
    Splits a consulting report into sections based on headers.

    Parameters:
    report_text (str): The text of the consulting report to be split into sections.

    Workflow:
    1. The function initializes an empty dictionary to store sections and sets the current section to "Header".
    2. It splits the report text into lines and iterates through each line.
    3. For each line:
       - If the line is blank, it skips to the next line.
       - If the line ends with a colon and contains 4 or fewer words, it is considered a section header.
         The current section is updated to this header (without the colon), and a new entry is created in the dictionary.
       - Otherwise, the line is added to the current section's content.
    4. After processing all lines, the function joins the content of each section into a single string.

    Returns:
    dict: A dictionary where keys are section headers and values are the corresponding section contents.
    """
    sections = {}
    lines = report_text.strip().split("\n")
    current_section = "Header"
    sections[current_section] = []

    for line in lines:
        line = line.strip()
        if not line:
            continue  # skip blank lines
        elif line.endswith(":") and len(line.split()) <= 4: # Check if it's a section header.  If yes, create a new section with the header as the key.
            current_section = line.replace(":", "").strip()
            sections[current_section] = []
        else:
            sections[current_section].append(line) # Add the line (contents below header) to the current section

    # Join section contents with newlines
    for key in sections:
        sections[key] = "\n".join(sections[key])

    return sections


## **2.3 Static Reasoning Agent Functions** <a id="2.3"></a>

In [7]:
class ITReportReviewer:
    """
    A class to review sections of an IT consulting report using OpenAI's ChatCompletion API.

    Process:
    1. Initializes the reviewer with the report sections
    2. Lets it review one section at a time using real reasoning
    3. Stores and prints each review with feedback

    Attributes:
    sections (dict): A dictionary where keys are section headers and values are the corresponding section contents.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.
    review_history (list): A list to store the history of reviews for each section.

    Methods:
    review_section(section_name):
        Reviews a specific section of the report using OpenAI's ChatCompletion API.
    """
    
    def __init__(self, report_sections, model="gpt-3.5-turbo", temperature=0.7):
        """
        Initializes the ITReportReviewer with the given report sections, model, and temperature.

        Parameters:
        report_sections (dict): A dictionary where keys are section headers and values are the corresponding section contents.
        model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
        temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.
        """
        self.sections = report_sections
        self.model = model
        self.temperature = temperature
        self.review_history = []

    def review_section(self, section_name):
        """
        Reviews a specific section of the report using OpenAI's ChatCompletion API.

        Parameters:
        section_name (str): The name of the section to review.

        Workflow:
        1. Retrieves the text of the specified section from the sections dictionary.
        2. If the section is empty or missing, prints a warning message.
        3. Builds a prompt for reviewing the section using the build_review_prompt function.
        4. Calls the OpenAI ChatCompletion API with tracking using the call_openai_with_tracking function.
        5. Saves the review in the review_history attribute.
        6. Prints the review for the specified section.

        Returns:
        None
        """
        section_text = self.sections.get(section_name, "")
        if not section_text:
            print(f"⚠️ Section '{section_name}' is empty or missing.")
            return

        # Build prompt for reviewing the section
        messages = build_review_prompt(
            report_text=f"Section: {section_name}\n\n{section_text}",
            history=[]
        )

        # Get AI-generated reasoning using tracked OpenAI call
        review = call_openai_with_tracking(messages, model=self.model, temperature=self.temperature)

        # Save the review
        self.review_history.append({
            "section": section_name,
            "review": review
        })

        print(f"\n✅ Review for section '{section_name}':\n{review}\n{'-'*60}")


In [8]:
def summarize_full_review(agent):
    # Combine all reviews into a single prompt
    combined_review_text = ""
    for step in agent.review_history:
        combined_review_text += f"Section: {step['section']}\nFeedback: {step['review']}\n\n"

    # Build new prompt asking for a final report assessment
    messages = [
        {
            "role": "system",
            "content": (
                "You are an expert IT strategy consultant reviewing an internal report assessment. "
                "Summarize the overall quality of the report based on the following section reviews. "
                "Highlight gaps, strengths, and suggest next steps to improve the report."
            )
        },
        {
            "role": "user",
            "content": combined_review_text
        }
    ]

    summary = call_openai_with_tracking(messages)
    return summary

## **2.4 ReAct Reasoning Agent Functions** <a id="2.4"></a>

In [9]:
class ReActConsultantAgent:
    """
    A class to review sections of an IT consulting report using the ReAct (Reason + Act) framework with OpenAI's ChatCompletion API.

    Process:
    1. Initializes the agent with the section name and text.
    2. Builds a prompt for the ReAct framework.
    3. Tracks the history of thoughts, actions, and observations.

    Attributes:
    section_name (str): The name of the section to review.
    section_text (str): The text of the section to review.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.
    history (list): A list to store the history of thoughts, actions, and observations.

    Methods:
    build_react_prompt():
        Builds a prompt for the ReAct framework based on the section text and history.
    """

    def __init__(self, section_name, section_text, model="gpt-3.5-turbo", temperature=0.7):
        """
        Initializes the ReActConsultantAgent with the given section name, section text, model, and temperature.

        Parameters:
        section_name (str): The name of the section to review.
        section_text (str): The text of the section to review.
        model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
        temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.
        """
        self.section_name = section_name
        self.section_text = section_text
        self.model = model
        self.temperature = temperature
        self.history = []
        self.tool_usage = {}  # {action_name: count}
        self.memory = {
            "section_notes": {},     # {section_name: [insight1, insight2, ...]}
            "cross_section_flags": [],  # [(sectionA, sectionB, observation)]
            "tool_history": []       # [(step_number, action, section)]
        }

    def build_react_prompt(self):
        """
        Builds a prompt for the ReAct framework based on the section text and history.

        Workflow:
        1. Constructs a base prompt with the section name and text.
        2. Iterates through the history of thoughts, actions, and observations, appending them to the base prompt.
        3. Adds a final line asking for the next thought and action.

        Returns:
        list: A list containing a single dictionary with the role 'user' and the constructed prompt as content.
        """
        base_prompt = (
            f"You are an expert IT strategy consultant reviewing a report section titled '{self.section_name}'.\n"
            "You are using ReAct (Reason + Act) to think through the review.\n\n"
            "Format each response like this:\n"
            "Thought: <your reasoning>\n"
            "Action: <one of: ask_question, flag_risk, recommend_fix, summarize>\n\n"
            f"Here is the section content:\n{self.section_text}\n\n"
        )

        for step in self.history:
            base_prompt += f"Thought: {step['thought']}\n"
            base_prompt += f"Action: {step['action']}\n"
            base_prompt += f"Observation: {step['observation']}\n\n"

        base_prompt += "What is your next Thought and Action?"

        return [{"role": "user", "content": base_prompt}]

    def build_react_prompt_withTools(self):
            """
            Builds a prompt for the ReAct framework based on the section text and history.

            Workflow:
            1. Constructs a base prompt with the section name and text.
            2. Iterates through the history of thoughts, actions, and observations, appending them to the base prompt.
            3. Adds a final line asking for the next thought and action.
            For the check_guideline action, the prompt includes a placeholder for the topic.
            LLM infers topic from the section_text

            Returns:
            list: A list containing a single dictionary with the role 'user' and the constructed prompt as content.
            """
            base_prompt = (
                f"You are an expert IT strategy consultant reviewing a report section titled '{self.section_name}'.\n"
                "You are using ReAct (Reason + Act) to think through the review.\n\n"
                "Format each response like this:\n"
                "Thought: <your reasoning>\n"
                "Action: <one of: " + format_tools_list(tool_catalog) + "'\n\n"
            )

            base_prompt += format_tool_catalog_for_prompt(tool_catalog)
            base_prompt += f"Here is the section content:\n{self.section_text}\n\n"
            
            for step in self.history:
                base_prompt += f"Thought: {step['thought']}\n"
                base_prompt += f"Action: {step['action']}\n"
                base_prompt += f"Observation: {step['observation']}\n\n"

            base_prompt += "What is your next Thought and Action?"

            return [{"role": "user", "content": base_prompt}]


In [10]:
def run_react_loop_static(agent, max_steps=5):
    """
    Runs the ReAct (Reason + Act) loop for a specified number of steps.

    Purpose:
    This function iterates through a reasoning and action loop using the ReAct framework to review a section of an IT consulting report. It generates thoughts, actions, and observations at each step, and stores the history of these steps.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, initialized with the section name and text.
    max_steps (int): The maximum number of steps to run the loop. Default is 5.

    Workflow:
    1. Iterates through the loop for a maximum of `max_steps` times.
    2. In each iteration:
    - Calls `agent.build_react_prompt()` to construct the prompt for the ReAct framework.
    - Calls `call_openai_with_tracking()` to get the response from the OpenAI API.
    - Parses the response to extract the thought and action.
    - Generates an observation based on the action.
    - Stores the thought, action, and observation in the agent's history.
    - Prints the result of the current step.
    - Breaks the loop if the action is "summarize".
    3. Returns the full reasoning history.

    Returns:
    list: A list of dictionaries, where each dictionary contains the thought, action, and observation for each step.
    """
    for step_num in range(max_steps):
        messages = agent.build_react_prompt()
        response = call_openai_with_tracking(messages, model=agent.model, temperature=agent.temperature)

        # Parse response
        try:
            lines = response.strip().split("\n")
            thought = next(line.split(":", 1)[1].strip() for line in lines if line.lower().startswith("thought"))
            action = next(line.split(":", 1)[1].strip() for line in lines if line.lower().startswith("action"))
        except:
            print("⚠️ Failed to parse model response.")
            break

        # Generate observation based on action
        if action == "ask_question":
            observation = "Good question to ask the client for clarification."
        elif action == "flag_risk":
            observation = "This is a legitimate risk that should be addressed."
        elif action == "recommend_fix":
            observation = "The recommendation improves the section's clarity and compliance."
        elif action == "summarize":
            observation = "Review complete."
        else:
            observation = "Unrecognized action."

        # Store step
        agent.history.append({
            "thought": thought,
            "action": action,
            "observation": observation
        })

        # Print result of this step
        print(f"\n🔁 Step {step_num + 1}")
        print(f"🧠 Thought: {thought}")
        print(f"⚙️ Action: {action}")
        print(f"👀 Observation: {observation}")

        if action == "summarize":
            break

    # Return full reasoning history
    return agent.history


In [41]:
def run_react_loop_check_withTool(agent, max_steps=5):
    """
    Runs the ReAct (Reason + Act) loop for a specified number of steps.

    Purpose:
    This function iterates through a reasoning and action loop using the ReAct framework to review a section of an IT consulting report. It generates thoughts, actions, and observations at each step, and stores the history of these steps.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, initialized with the section name and text.
    max_steps (int): The maximum number of steps to run the loop. Default is 5.

    Workflow:
    1. Iterates through the loop for a maximum of `max_steps` times.
    2. In each iteration:
       - Calls `agent.build_react_prompt()` to construct the prompt for the ReAct framework.
       - Calls `call_openai_with_tracking()` to get the response from the OpenAI API.
       - Parses the response to extract the thought and action.
       - Generates an observation based on the action.
       - Stores the thought, action, and observation in the agent's history.
       - Prints the result of the current step.
       - Breaks the loop if the action is "summarize".
    3. Returns the full reasoning history.

    Returns:
    list: A list of dictionaries, where each dictionary contains the thought, action, and observation for each step.
    """
    for step_num in range(max_steps):
        messages = agent.build_react_prompt_withTools()
        response = call_openai_with_tracking(messages, model=agent.model, temperature=agent.temperature)

        # Parse response
        try:
            lines = response.strip().split("\n")
            thought = next(line.split(":", 1)[1].strip() for line in lines if line.lower().startswith("thought"))
            action = next(line.split(":", 1)[1].strip() for line in lines if line.lower().startswith("action"))
            # Log tool usage
            if hasattr(agent, "tool_usage"):
                agent.tool_usage[action] = agent.tool_usage.get(action, 0) + 1
        except:
            print("⚠️ Failed to parse model response.")
            break

        # Generate observation based on action
        if action.startswith("check_guideline"): # tool #1: check_guideline
            match = re.match(r'check_guideline\["(.+?)"\]', action)
            if match:
                topic = match.group(1)
                observation = check_guideline(topic)
            else:
                print("⚠️ Could not parse check_guideline action.")
        elif action.startswith("keyword_match_in_section"): # tool #2: keyword_match_in_section
                match = re.match(r'keyword_match_in_section\["(.+?)"\]', action)
                if match:
                    term = match.group(1)
                    observation = keyword_match_in_section(term, agent.section_text)
                else:
                    print("⚠️ Could not parse keyword_match_in_section action.")
        elif action.startswith("check_timeline_feasibility"): # tool #3: check_timeline_feasibility
            try:
                duration = action.split("[", 1)[1].rstrip("]").strip('"')
                observation = check_timeline_feasibility(duration)
            except:
                observation = "⚠️ Could not parse check_timeline_feasibility action."
        elif action.startswith("search_report"): # tool #4: search_report
            match = re.match(r'search_report\["(.+?)"\]', action)
            if match:
                term = match.group(1)
                observation = search_report(term, report_sections)
            else:
                observation = "⚠️ Could not parse search_report action."
        elif action.startswith("search_web"):
            match = re.match(r'search_web\["(.+?)"\]', action)
            if match:
                query = match.group(1)
                observation = search_web(query)
            else:
                observation = "⚠️ Could not parse search_web action."
        elif action == "check_for_jargon":
            observation = check_for_jargon(agent.section_text)
        elif action == "generate_client_questions":
            observation = generate_client_questions(agent.section_text)
        elif action == "highlight_missing_sections":
            observation = highlight_missing_sections(report_sections)
        elif action.startswith("check_alignment_with_goals"):
            match = re.match(r'check_alignment_with_goals\["(.+?)"\]', action)
            if match:
                sec_name = match.group(1)
                observation = check_alignment_with_goals(sec_name, report_sections)
            else:
                observation = "⚠️ Could not parse check_alignment_with_goals action."
        elif action.startswith("compare_with_other_section"):
            match = re.match(r'compare_with_other_section\["(.+?)",\s*"(.+?)"\]', action)
            if match:
                section_a = match.group(1)
                section_b = match.group(2)
                observation = compare_with_other_section(section_a, section_b, report_sections)
            else:
                observation = "⚠️ Could not parse compare_with_other_section action."
        elif action == "ask_question": # hardcoded action
            observation = "Good question to ask the client for clarification."
        elif action == "flag_risk": # hardcoded action
            observation = "This is a legitimate risk that should be addressed."
        elif action == "recommend_fix": # hardcoded action 
            observation = "The recommendation improves the section's clarity and compliance."
        elif action == "summarize": # hardcoded action; end the loop
            observation = "Review complete."
        elif action == "tool_help":
            observation = format_tool_catalog_for_prompt(tool_catalog)
        elif action.startswith("suggest_tool_for"):
            match = re.match(r'suggest_tool_for\["(.+?)"\]', action) # Extract the goal from the action
            if match:
                goal = match.group(1)
                matches = pick_tool_by_intent_fuzzy(goal, tool_catalog)
                if matches:
                    observation = "Best match based on your goal:\n" + "\n".join(
                        [f"{tool} (match: {score})" for tool, score in matches]
                    )
                else:
                    observation = "⚠️ No matching tool found. Showing available tools:\n" + format_tool_catalog_for_prompt(tool_catalog)
            else:
                observation = "⚠️ Could not parse suggest_tool_for action."
        elif action == "final_summary":
            observation = generate_final_summary(agent)
        else:
            observation = "Unrecognized action."
        
        # update agent memory 
        # Track tool history
        agent.memory["tool_history"].append((step_num, action, agent.section_name))

        # If observation is useful, store in section memory
        if action not in {"summarize", "tool_help", "suggest_tool_for"}:
            agent.memory["section_notes"].setdefault(agent.section_name, []).append(observation)

        # Special case for section comparisons
        if action.startswith("compare_with_other_section"):
            match = re.match(r'compare_with_other_section\["(.+?)",\s*"(.+?)"\]', action)
            if match:
                sectionA, sectionB = match.groups()
                agent.memory["cross_section_flags"].append((sectionA, sectionB, observation))

        # Store step
        agent.history.append({
            "thought": thought,
            "action": action,
            "observation": observation
        })

        # Print result of this step
        print(f"\n🔁 Step {step_num + 1}")
        print(f"🧠 Thought: {thought}")
        print(f"⚙️ Action: {action}")
        print(f"👀 Observation: {observation}")

        if action == "summarize":
            break

    # Return full reasoning history
    return agent.history


In [12]:
# Define dicionary of tools available for ReActConsultantAgent to call

tool_catalog = {
    "check_guideline": {
        "description": "Look up a best practice for a given topic",
        "usage": 'check_guideline["cloud security"]',
        "version": "v1.0",
        "examples": [
            "check_guideline[\"data governance\"]",
            "check_guideline[\"migration strategy\"]"
        ]
    },
    "keyword_match_in_section": {
        "description": "Check if a keyword appears in the current section",
        "usage": 'keyword_match_in_section["encryption"]',
        "version": "v1.0",
        "examples": [
            "keyword_match_in_section[\"stakeholders\"]"
        ]
    },
    "check_timeline_feasibility": {
        "description": "Check if a project timeline is realistic for migration",
        "usage": 'check_timeline_feasibility["12 weeks"]',
        "version": "v1.0",
        "examples": [
            "check_timeline_feasibility[\"3 months\"]"
        ]
    },
    "search_report": {
        "description": "Search the entire report for a concept or term",
        "usage": 'search_report["data governance"]',
        "version": "v1.0",
        "examples": [
            "search_report[\"Zero Trust\"]"
        ]
    },
    "ask_question": {
        "description": "Ask a clarifying question about the report",
        "usage": "ask_question",
        "version": "v1.0",
        "examples": ["ask_question"]
    },
    "flag_risk": {
        "description": "Flag a gap, issue, or concern in the section",
        "usage": "flag_risk",
        "version": "v1.0",
        "examples": ["flag_risk"]
    },
    "recommend_fix": {
        "description": "Suggest a specific improvement",
        "usage": "recommend_fix",
        "version": "v1.0",
        "examples": ["recommend_fix"]
    },
    "summarize": {
        "description": "Summarize your review and end the loop",
        "usage": "summarize",
        "version": "v1.0",
        "examples": ["summarize"]
    },
    "tool_help": {
        "description": "View descriptions, usage, and examples for available tools",
        "usage": "tool_help",
        "version": "v1.0",
        "examples": ["tool_help"]
    },
    "suggest_tool_for": {
        "description": "Ask which tool best supports a particular goal or intent",
        "usage": 'suggest_tool_for["goal or intent"]',
        "version": "v1.0",
        "examples": [
            'suggest_tool_for["check if encryption is included"]',
            'suggest_tool_for["evaluate feasibility of 12-week timeline"]'
        ]
    }, 
    "search_web": {
        "description": "Look up a concept, framework, or term on the web (DuckDuckGo)",
        "usage": 'search_web["Zero Trust model"]',
        "version": "v1.0",
        "examples": ['search_web["data mesh"]']
    },
    "check_for_jargon": {
        "description": "Identify jargon or overly technical terms that may need simplification",
        "usage": "check_for_jargon",
        "version": "v1.0",
        "examples": ["check_for_jargon"]
    },
    "generate_client_questions": {
        "description": "Generate clarifying or skeptical questions a client might ask about the section",
        "usage": "generate_client_questions",
        "version": "v1.0",
        "examples": ["generate_client_questions"]
    },
    "highlight_missing_sections": {
        "description": "Identify which expected sections are missing from the report",
        "usage": "highlight_missing_sections",
        "version": "v1.0",
        "examples": ["highlight_missing_sections"]
    },
    "check_alignment_with_goals": {
        "description": "Evaluate how well a report section aligns with the stated goals",
        "usage": 'check_alignment_with_goals["section_name"]',
        "version": "v1.0",
        "examples": ['check_alignment_with_goals["Key Recommendations"]']
    },
    "compare_with_other_section": {
        "description": "Compare two sections to identify overlaps, contradictions, or gaps",
        "usage": 'compare_with_other_section["sectionA", "sectionB"]',
        "version": "v1.0",
        "examples": [
            'compare_with_other_section["Key Recommendations", "Roadmap & Timeline"]',
            'compare_with_other_section["Future State Vision", "Technology Architecture"]'
        ]
    },
    "final_summary": {
        "description": "Generate a final client-facing summary based on insights gathered across all sections",
        "usage": "final_summary",
        "version": "v1.0",
        "examples": ["final_summary"]
    }
}


In [13]:
# Format the tool catalog for display in the prompt for consumption by LLM

def format_tool_catalog_for_prompt(tool_catalog):
    """
    Formats the tool catalog for display in the prompt for consumption by LLM.

    Purpose:
    This function formats the tool catalog into a human-readable string that lists each tool along with its description and usage. This formatted string can be used in prompts for language models to understand the available tools.

    Parameters:
    tool_catalog (dict): A dictionary where keys are tool names and values are dictionaries containing tool metadata, including 'description' and 'usage'.

    Workflow:
    1. Initializes a list with the header "Available tools:".
    2. Iterates through each tool in the tool_catalog dictionary.
    3. For each tool, appends its name, description, and usage to the list.
    4. Joins the list into a single string with newline characters.

    Returns:
    str: A formatted string listing all tools with their descriptions and usage.
    """
    lines = ["Available tools:\n"]
    for tool, meta in tool_catalog.items():
        lines.append(f"- {tool} (v{meta['version']}): {meta['description']}")
        lines.append(f"  Usage: {meta['usage']}")
        if meta.get("examples"):
            for ex in meta["examples"]:
                lines.append(f"  Example: {ex}")
        lines.append("")  # spacing between tools
    return "\n".join(lines)



In [14]:
def format_tools_list(tool_catalog):
    """
    Formats the list of tools from the tool_catalog dictionary as a comma-separated string.

    Parameters:
    tool_catalog (dict): A dictionary where keys are tool names and values are dictionaries containing tool metadata.

    Returns:
    str: A comma-separated string of tool names.
    """
    tools_list = ", ".join(tool_catalog.keys())
    return tools_list

# Example usage
formatted_tools = format_tools_list(tool_catalog)
print(formatted_tools)

check_guideline, keyword_match_in_section, check_timeline_feasibility, search_report, ask_question, flag_risk, recommend_fix, summarize, tool_help, suggest_tool_for, search_web, check_for_jargon, generate_client_questions, highlight_missing_sections, check_alignment_with_goals, compare_with_other_section, final_summary


In [15]:
# Tool #1: Check for best practices in a given section

# Simulated best practices reference data
best_practices = {
    "cloud security": "Follow NIST Cybersecurity Framework. Include access control, encryption at rest/in-transit, and regular audits.",
    "data governance": "Establish data stewards, quality standards, lifecycle rules, and metadata documentation.",
    "migration": "Use phased migration, sandbox testing, rollback planning, and stakeholder communication."
}

def check_guideline(topic):
    """
    Checks for best practices related to a given topic.

    Purpose:
    This function looks up best practices for a specified topic from a predefined dictionary of best practices.

    Parameters:
    topic (str): The topic for which best practices are to be checked.

    Workflow:
    1. The function converts the topic to lowercase to ensure case-insensitive matching.
    2. It looks up the topic in the `best_practices` dictionary.
    3. If a matching guideline is found, it returns the guideline.
    4. If no matching guideline is found, it returns a message indicating that no matching guideline was found.

    Returns:
    str: The best practice guideline for the specified topic, or a message indicating no matching guideline was found.
    """
    return best_practices.get(topic.lower(), "No matching guideline found.")


In [16]:
# Tool #2: Keyword matching in a section
# This tool helps the agent check if a keyword or concept is explicitly mentioned in the section.
# This is great for validating whether the report includes key elements (e.g., "encryption", "stakeholders", "Zero Trust").  

def keyword_match_in_section(term, section_text):
    """
    Checks if a keyword or concept is explicitly mentioned in a section of the report.

    Purpose:
    This function helps validate whether the report includes key elements by checking if a specified keyword or concept is mentioned in the section text.

    Parameters:
    term (str): The keyword or concept to search for in the section.
    section_text (str): The text of the section to search within.

    Workflow:
    1. Converts the keyword and section text to lowercase to ensure case-insensitive matching.
    2. Checks if the keyword is present in the section text.
    3. Returns a message indicating whether the keyword was found or not.

    Returns:
    str: A message indicating whether the keyword was found in the section.
    """
    term_lower = term.lower()
    if term_lower in section_text.lower():
        return f"The keyword '{term}' was found in the section."
    else:
        return f"The keyword '{term}' was NOT found in the section."


In [17]:
# Tool #3: Check feasibility of timeline for IT migration
# This tool helps the agent assess the feasibility of a timeline for an IT migration project.
# It checks if the timeline is too short, potentially feasible, or reasonable for a full migration.

def check_timeline_feasibility(duration_str):
    """
    Checks the feasibility of a timeline for an IT migration project.

    Purpose:
    This function helps assess whether a given timeline for an IT migration project is too short, potentially feasible, or reasonable.

    Parameters:
    duration_str (str): The timeline duration as a string (e.g., "6-12 months", "8 to 10 weeks", "a few months").

    Workflow:
    1. Converts the duration string to lowercase and strips any leading/trailing whitespace.
    2. Initializes a dictionary of fuzzy terms (e.g., "a few", "several") with their estimated numeric values.
    3. Checks if the duration string contains any fuzzy terms and estimates the duration in months.
    4. If no fuzzy terms are found, checks for ranges (e.g., "6-12 months") or single values (e.g., "6 months") and calculates the average duration in months.
    5. If the duration cannot be parsed, returns a warning message.
    6. Evaluates the feasibility of the timeline based on the calculated duration in months:
       - If less than 3 months, returns that the timeline is likely too short.
       - If between 3 and 12 months, returns that the timeline is potentially feasible.
       - If more than 12 months, returns that the timeline seems reasonable.

    Returns:
    str: A message indicating the feasibility of the timeline.
    """
    duration_str = duration_str.lower().strip()

    fuzzy_terms = {
        "a few": 3,
        "a couple": 2,
        "several": 6,
        "some": 4,
        "many": 9,
        "a handful": 5
    }

    months = None

    # Check for fuzzy terms like "a few months"
    for fuzzy_word, estimated_num in fuzzy_terms.items():
        if fuzzy_word in duration_str:
            if "week" in duration_str:
                months = estimated_num / 4
            elif "month" in duration_str:
                months = estimated_num
            break

    # Check for ranges like "6-12 months" or "8 to 10 weeks"
    if months is None:
        range_match = re.match(r'(\d+)\s*[-to]+\s*(\d+)\s*(weeks|months)', duration_str)
        single_match = re.match(r'(\d+)\s*(weeks|months)', duration_str)

        try:
            if range_match:
                start = int(range_match.group(1))
                end = int(range_match.group(2))
                unit = range_match.group(3)
                avg = (start + end) / 2
                months = avg / 4 if "week" in unit else avg

            elif single_match:
                num = int(single_match.group(1))
                unit = single_match.group(2)
                months = num / 4 if "week" in unit else num
        except:
            return "⚠️ Could not parse timeline value."

    if months is None:
        return "⚠️ Could not understand the timeline. Use phrases like '6 months', '8-12 weeks', or 'a few months'."

    # Evaluate the feasibility
    if months < 3:
        return f"The timeline ({duration_str}) is likely too short for a full migration."
    elif 3 <= months <= 12:
        return f"The timeline ({duration_str}) is potentially feasible depending on complexity."
    else:
        return f"The timeline ({duration_str}) seems reasonable for a full IT migration."


In [18]:
# Tool #4: Search for a term in the entire report
# This tool helps the agent search for a specific term in the entire consulting report.
# It returns the sections where the term was found, if any.

def search_report(term, report_sections):
    """
    Searches for a specific term in the entire consulting report.

    Purpose:
    This function helps the agent search for a specific term in the entire consulting report and returns the sections where the term was found, if any.

    Parameters:
    term (str): The term to search for in the report.
    report_sections (dict): A dictionary where keys are section headers and values are the corresponding section contents.

    Workflow:
    1. Initializes an empty list `found_in` to store the sections where the term is found.
    2. Iterates through each section in the `report_sections` dictionary.
    3. For each section, checks if the term (case-insensitive) is present in the section content.
    4. If the term is found, appends the section header to the `found_in` list.
    5. After checking all sections, returns a message indicating the sections where the term was found or a message indicating that the term was not found.

    Returns:
    str: A message indicating the sections where the term was found or a message indicating that the term was not found.
    """
    found_in = []
    for section, content in report_sections.items():
        if term.lower() in content.lower():
            found_in.append(section)
    if found_in:
        return f"The term '{term}' was found in: {', '.join(found_in)}."
    else:
        return f"The term '{term}' was NOT found anywhere in the report."

In [19]:
# Tool picker: Pick tools based on intent description
# This function picks tools from the tool catalog based on the intent description.
# It searches through the tool catalog and returns a list of tools whose descriptions match the given intent description.

def pick_tool_by_intent(intent_description, tool_catalog):
    """
    Picks tools from the tool catalog based on the intent description.

    Purpose:
    This function searches through the tool catalog and returns a list of tools whose descriptions match the given intent description.

    Parameters:
    intent_description (str): A description of the intent to match against tool descriptions.
    tool_catalog (dict): A dictionary where keys are tool names and values are dictionaries containing tool metadata, including 'description'.

    Workflow:
    1. Initializes an empty list `matches` to store the names of matching tools.
    2. Iterates through each tool in the `tool_catalog` dictionary.
    3. For each tool, checks if the `intent_description` is present in the tool's description (case-insensitive).
    4. If a match is found, appends the tool name to the `matches` list.
    5. Returns the list of matching tools.

    Returns:
    list: A list of tool names whose descriptions match the given intent description.
    """
    matches = []
    for tool, meta in tool_catalog.items():
        if intent_description.lower() in meta["description"].lower():
            matches.append(tool)
    return matches


In [20]:
# Tool picker: Pick tools based on fuzzy intent description matching
# This function picks tools from the tool catalog based on a fuzzy intent description matching.
# It uses fuzzy string matching to find tools whose descriptions closely match the given intent description.

def pick_tool_by_intent_fuzzy(intent_description, tool_catalog, threshold=0.3):
    """
    Picks tools from the tool catalog based on a fuzzy intent description matching.

    Purpose:
    This function uses fuzzy string matching to find tools whose descriptions closely match the given intent description.

    Parameters:
    intent_description (str): A description of the intent to match against tool descriptions.
    tool_catalog (dict): A dictionary where keys are tool names and values are dictionaries containing tool metadata, including 'description'.
    threshold (float): The minimum similarity ratio (between 0 and 1) to consider a match. Default is 0.3.

    Workflow:
    1. Initializes an empty list `matches` to store the names and similarity ratios of matching tools.
    2. Iterates through each tool in the `tool_catalog` dictionary.
    3. For each tool, calculates the similarity ratio between the `intent_description` and the tool's description using `SequenceMatcher`.
    4. If the similarity ratio exceeds the `threshold`, appends the tool name and ratio to the `matches` list.
    5. Sorts the `matches` list by similarity ratio in descending order.

    Returns:
    list: A list of tuples, where each tuple contains a tool name and its similarity ratio, sorted by best match.
    """
    matches = []
    for tool, meta in tool_catalog.items():
        ratio = SequenceMatcher(None, intent_description.lower(), meta["description"].lower()).ratio()
        if ratio > threshold:
            matches.append((tool, round(ratio, 2)))
    return sorted(matches, key=lambda x: -x[1])  # sort by best match


In [21]:
# Print tool usage summary

def print_tool_usage(agent):
    """
    Prints a summary of tool usage by the ReActConsultantAgent.

    Purpose:
    This function provides a summary of how many times each tool was used by the ReActConsultantAgent during the review process.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, which contains the tool usage data.

    Workflow:
    1. Prints a header "Tool Usage Summary".
    2. Iterates through the `tool_usage` dictionary of the agent, sorted by usage count in descending order.
    3. Prints the name of each tool and the number of times it was used.

    Returns:
    None
    """
    print("\n📊 Tool Usage Summary:")
    for tool, count in sorted(agent.tool_usage.items(), key=lambda x: -x[1]):
        print(f"- {tool}: {count} times")


In [22]:
# Plot tool usage summary
def plot_tool_usage(tool_usage_dict, title="Tool Usage Summary"):
    """
    Plots a horizontal bar chart summarizing the usage of various tools.

    Purpose:
    This function visualizes the usage count of different tools in a horizontal bar chart, making it easy to see which tools were used most frequently.

    Parameters:
    tool_usage_dict (dict): A dictionary where keys are tool names and values are their respective usage counts.
    title (str): The title of the plot. Default is "Tool Usage Summary".

    Workflow:
    1. Extracts the tool names and their usage counts from the tool_usage_dict.
    2. Creates a horizontal bar chart using matplotlib.
    3. Adds usage counts as text labels on the bars.
    4. Inverts the y-axis to display the highest count at the top.
    5. Adjusts the layout for better visualization and displays the plot.

    Returns:
    None
    """
    tools = list(tool_usage_dict.keys())
    counts = list(tool_usage_dict.values())

    plt.figure(figsize=(10, 6))
    bars = plt.barh(tools, counts)
    plt.xlabel("Usage Count")
    plt.title(title)

    # Add counts on the bars
    for bar in bars:
        width = bar.get_width()
        plt.text(width + 0.2, bar.get_y() + bar.get_height()/2, str(int(width)), va='center')

    plt.gca().invert_yaxis()  # Highest count at top
    plt.tight_layout()
    plt.show()


In [23]:
# Tool to search the web for relevant information

def search_web(query, max_results=1):
    """
    Searches the web for relevant information using DuckDuckGo.

    Purpose:
    This function uses the DuckDuckGo search engine to find relevant information based on a given query. It returns the snippet of the first search result.

    Parameters:
    query (str): The search query to find relevant information.
    max_results (int): The maximum number of search results to retrieve. Default is 1.

    Workflow:
    1. Initializes a DuckDuckGo search session using DDGS.
    2. Performs a text search with the given query and retrieves up to `max_results` results.
    3. Iterates through the search results and returns the snippet of the first result.
    4. If no results are found, returns a message indicating no relevant results were found.
    5. If an exception occurs during the search, returns a message indicating the web search failed along with the exception message.

    Returns:
    str: The snippet of the first search result, or a message indicating no relevant results were found or the web search failed.
    """
    try:
        with DDGS() as ddgs:
            results = ddgs.text(query, max_results=max_results)
            for r in results:
                return r["body"]  # return the first result's snippet
        return "No relevant results found."
    except Exception as e:
        return f"⚠️ Web search failed: {str(e)}"


In [24]:
# Tool to check for jargon or technical terms in a section

JARGON_LIST = {
    "synergy", "leverage", "optimize", "stakeholder alignment", "enablement",
    "digital transformation", "bandwidth", "scalability", "paradigm",
    "blockchain", "AI", "ML", "IoT", "Zero Trust", "DevOps", "infrastructure-as-code",
    "EHR", "CRM", "VPN", "cloud-native", "containerization", "agile methodology"
}

def check_for_jargon(section_text):
    """
    Checks for jargon or technical terms in a section of the report.

    Purpose:
    This function helps identify the presence of jargon or technical terms in a section of the report by searching for predefined terms.

    Parameters:
    section_text (str): The text of the section to search within.

    Workflow:
    1. Initializes an empty list `found_terms` to store the jargon or technical terms found in the section.
    2. Iterates through each term in the `JARGON_LIST` set.
    3. For each term, constructs a regex pattern to match the term as a whole word (case-insensitive).
    4. Searches for the term in the section text using the regex pattern.
    5. If the term is found, appends it to the `found_terms` list.
    6. After checking all terms, returns a message indicating the jargon or technical terms found or a message indicating that no notable jargon or technical terms were found.

    Returns:
    str: A message indicating the jargon or technical terms found in the section, or a message indicating that no notable jargon or technical terms were found.
    """
    found_terms = []
    for term in JARGON_LIST:
        pattern = r"\b" + re.escape(term) + r"\b"
        if re.search(pattern, section_text, flags=re.IGNORECASE):
            found_terms.append(term)
    if found_terms:
        return f"The section includes jargon or technical terms: {', '.join(found_terms)}."
    else:
        return "No notable jargon or technical terms found."


In [25]:
# Tool to generate client questions based on a section of a report
def generate_client_questions(section_text, model="gpt-3.5-turbo", temperature=0.6):
    """
    Generates client questions based on a section of an IT strategy report.

    Purpose:
    This function acts as a skeptical client reviewing a section of an IT strategy report and generates 3-5 clarifying or challenging questions based on potential assumptions, unclear terms, or missing context.

    Parameters:
    section_text (str): The text of the section to generate questions for.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.6.

    Workflow:
    1. Constructs a prompt that instructs the model to act as a skeptical client and generate questions based on the section text.
    2. Creates a list of messages with the constructed prompt.
    3. Calls the OpenAI API with tracking using the call_openai_with_tracking function.
    4. If the API call is successful, returns the generated questions.
    5. If an exception occurs, returns a failure message with the exception details.

    Returns:
    str: The generated questions or a failure message if the API call fails.
    """
    prompt = (
        "You are acting as a skeptical client reviewing the following section of an IT strategy report.\n"
        "Generate 3-5 clarifying or challenging questions the client might ask based on potential assumptions, unclear terms, or missing context.\n\n"
        f"Section:\n{section_text}\n\n"
        "Questions:"
    )

    messages = [{"role": "user", "content": prompt}]
    try:
        response = call_openai_with_tracking(messages, model=model, temperature=temperature)
        return response.strip()
    except Exception as e:
        return f"⚠️ Failed to generate questions: {str(e)}"


In [26]:
# Tool to check report has expected sections
# can tailor expected sections list as needed

EXPECTED_SECTIONS = {
    "Executive Summary",
    "Goals & Objectives",
    "Current State Assessment",
    "Gap Analysis",
    "Future State Vision",
    "Roadmap & Timeline",
    "Technology Architecture",
    "Data & Analytics Strategy",
    "Security & Privacy",
    "Change Management Plan",
    "Risks & Mitigations",
    "Key Recommendations"
}

def highlight_missing_sections(report_sections_dict):
    """
    Highlights missing sections in the consulting report based on expected sections.

    Purpose:
    This function checks the provided report sections against a predefined set of expected sections and identifies any missing sections.

    Parameters:
    report_sections_dict (dict): A dictionary where keys are section headers and values are the corresponding section contents.

    Workflow:
    1. Converts the keys of the provided report sections dictionary to a set.
    2. Calculates the difference between the expected sections and the found sections.
    3. If there are missing sections, returns a message listing the missing sections.
    4. If all expected sections are present, returns a message indicating that all sections are present.

    Returns:
    str: A message indicating the missing sections or confirming that all expected sections are present.
    """
    found = set(report_sections_dict.keys())
    missing = EXPECTED_SECTIONS - found
    if missing:
        return f"Missing report sections: {', '.join(sorted(missing))}."
    else:
        return "✅ All expected sections are present."


In [27]:
# Tool to check alignment between section and report goals
def check_alignment_with_goals(section_name, report_sections_dict, model="gpt-3.5-turbo", temperature=0.6):
    """
    Checks the alignment between the goals of the report and a specific section.

    Purpose:
    This function evaluates whether a specific section of the report aligns with the stated goals and objectives. It uses the OpenAI API to generate an evaluation of the alignment.

    Parameters:
    section_name (str): The name of the section to evaluate for alignment.
    report_sections_dict (dict): A dictionary where keys are section headers and values are the corresponding section contents.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.6.

    Workflow:
    1. Tries to find the "Goals & Objectives" section in the report.
    2. If the "Goals & Objectives" section is not found, searches for goals in other sections based on keywords.
    3. Retrieves the text of the specified section to evaluate.
    4. If either the goals or the section text is not found, returns a warning message.
    5. Constructs a prompt for the OpenAI API to evaluate the alignment between the goals and the specified section.
    6. Calls the OpenAI API with tracking to get the evaluation.
    7. Returns the evaluation or an error message if the API call fails.

    Returns:
    str: The evaluation of the alignment between the goals and the specified section, or an error message if the API call fails.
    """
    # Step 1: Try exact match first
    try:
        response = call_openai_with_tracking(messages, model=model, temperature=temperature)
        return response.strip()
    except Exception as e:
        return f"⚠️ Failed to check alignment: {str(e)}"


In [28]:
# Tool to compare two sections of a report for duplication, contradictions, or inconsistencies
# This tool compares two sections of an IT consulting report for duplication, contradictions, or inconsistencies.
# It also notes if one section covers content that the other should include.
def compare_with_other_section(section_a, section_b, report_sections_dict, model="gpt-3.5-turbo", temperature=0.6):
    """
    Compares two sections of an IT consulting report for duplication, contradictions, or inconsistencies.

    Purpose:
    This function compares two specified sections of an IT consulting report to identify any duplication, contradictions, or inconsistencies between them. It also notes if one section covers content that the other should include.

    Parameters:
    section_a (str): The name of the first section to compare.
    section_b (str): The name of the second section to compare.
    report_sections_dict (dict): A dictionary where keys are section headers and values are the corresponding section contents.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.6.

    Workflow:
    1. Retrieves the text of the specified sections from the report_sections_dict.
    2. If either section is not found, returns a warning message.
    3. Constructs a prompt for the OpenAI API to compare the two sections.
    4. Calls the OpenAI API with tracking to get the comparison.
    5. Returns the comparison or an error message if the API call fails.

    Returns:
    str: A summary of the comparison between the two sections, or an error message if the API call fails.
    """
    text_a = report_sections_dict.get(section_a)
    text_b = report_sections_dict.get(section_b)

    if not text_a or not text_b:
        return f"⚠️ One or both sections not found: '{section_a}' or '{section_b}'"

    prompt = (
        f"You are comparing two sections of an IT consulting report.\n"
        f"Identify any duplication, contradictions, or inconsistencies between them.\n"
        f"Also note if one section covers content the other should include.\n\n"
        f"Section A: {section_a}\n{text_a}\n\n"
        f"Section B: {section_b}\n{text_b}\n\n"
        "Provide a 3-5 sentence summary of your comparison:"
    )

    messages = [{"role": "user", "content": prompt}]
    try:
        response = call_openai_with_tracking(messages, model=model, temperature=temperature)
        return response.strip()
    except Exception as e:
        return f"⚠️ Failed to compare sections: {str(e)}"


In [29]:
# Show agent memory
def show_agent_memory(agent):
    """
    Displays a snapshot of the agent's memory, including section notes, cross-section observations, and tool usage history.

    Purpose:
    This function provides a detailed view of the agent's internal memory, which includes notes for each section, observations comparing different sections, and the history of tool usage. This is useful for understanding the agent's reasoning process and the actions it has taken.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, which contains the memory data to be displayed.

    Workflow:
    1. Prints a header "Agent Memory Snapshot".
    2. Prints the section notes stored in the agent's memory.
    3. Prints the cross-section observations stored in the agent's memory.
    4. Prints the tool usage history stored in the agent's memory.

    Returns:
    None
    """
    print("\n🧠 Agent Memory Snapshot\n")

    print("🔹 Section Notes:")
    for section, notes in agent.memory["section_notes"].items():
        print(f"- {section}:")
        for note in notes:
            print(f"  • {note}")

    print("\n🔹 Cross-Section Observations:")
    for a, b, obs in agent.memory["cross_section_flags"]:
        print(f"- {a} vs. {b}: {obs}")

    print("\n🔹 Tool History:")
    for step, action, section in agent.memory["tool_history"]:
        print(f"Step {step} | {action} | Section: {section}")


In [30]:
def generate_final_summary(agent, model="gpt-3.5-turbo", temperature=0.7):
    """
    Generates a final summary for the client based on the agent's memory of section insights and cross-section observations.

    Purpose:
    This function constructs a prompt using the agent's memory of section insights and cross-section observations to generate a final summary for the client. It uses the OpenAI API to create a concise summary covering strengths, issues, and overall alignment with goals.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, which contains the memory data to be used for generating the summary.
    model (str): The model to use for the API call. Default is "gpt-3.5-turbo".
    temperature (float): The sampling temperature to use. Higher values mean the model will take more risks. Default is 0.7.

    Workflow:
    1. Retrieves section notes and cross-section observations from the agent's memory.
    2. Constructs a prompt that includes the section insights and cross-section observations.
    3. Adds instructions to write a short, clear 4-6 sentence final summary covering strengths, issues, and overall alignment with goals.
    4. Creates a list of messages with the constructed prompt.
    5. Calls the OpenAI API with tracking using the call_openai_with_tracking function.
    6. If the API call is successful, returns the generated summary.
    7. If an exception occurs, returns a failure message with the exception details.

    Returns:
    str: The generated final summary or a failure message if the API call fails.
    """
    # Build a summary prompt using memory
    notes_by_section = agent.memory.get("section_notes", {})
    cross_section = agent.memory.get("cross_section_flags", [])

    prompt = "You are a senior consultant wrapping up your review of an IT strategy report.\n"
    prompt += "Use the following section insights and cross-section observations to write a final summary for the client.\n\n"

    for section, notes in notes_by_section.items():
        prompt += f"Section: {section}\n"
        for note in notes:
            prompt += f"- {note}\n"
        prompt += "\n"

    if cross_section:
        prompt += "Cross-Section Findings:\n"
        for a, b, obs in cross_section:
            prompt += f"- {a} vs. {b}: {obs}\n"
        prompt += "\n"

    prompt += "Write a short, clear 4-6 sentence final summary covering strengths, issues, and overall alignment with goals."

    messages = [{"role": "user", "content": prompt}]
    
    try:
        return call_openai_with_tracking(messages, model=model, temperature=temperature).strip()
    except Exception as e:
        return f"⚠️ Failed to generate final summary: {str(e)}"


In [31]:
def export_report_to_markdown(agent, filename="consultant_ai_report.md"):
    """
    Exports the consulting report review to a markdown file.

    Purpose:
    This function generates a markdown file summarizing the consulting report review, including the final summary, section insights, and cross-section findings.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, which contains the memory data to be exported.
    filename (str): The name of the markdown file to save. Default is "consultant_ai_report.md".

    Workflow:
    1. Creates an output directory if it does not exist.
    2. Opens the specified file in write mode.
    3. Writes the report title to the file.
    4. Retrieves the final summary from the agent's memory and writes it to the file.
    5. Iterates through the section notes in the agent's memory and writes each section's insights to the file.
    6. If there are cross-section findings, writes them to the file.
    7. Prints a confirmation message indicating the file has been saved.

    Returns:
    None
    """
    output_dir = "./outputs"
    os.makedirs(output_dir, exist_ok=True)
    filename = os.path.join(output_dir, filename)
    with open(filename, "w") as f:
        f.write("# 🧾 AI-Powered Consulting Report\n\n")

        # Final summary
        final = agent.memory.get("final_summary", "No final summary generated.")
        f.write("## Final Summary\n\n")
        f.write(final + "\n\n")

        # Section Notes
        f.write("## Section Insights\n")
        for section, notes in agent.memory["section_notes"].items():
            f.write(f"\n### {section}\n")
            for note in notes:
                f.write(f"- {note}\n")

        # Cross-section
        if agent.memory["cross_section_flags"]:
            f.write("\n## Cross-Section Findings\n")
            for a, b, obs in agent.memory["cross_section_flags"]:
                f.write(f"- **{a} vs. {b}**: {obs}\n")

    print(f"✅ Markdown report saved as: {filename}")


In [None]:
def export_report_to_markdown_and_pdf(agent, markdown_file="consultant_ai_report.md", pdf_file="consultant_ai_report.pdf"):
    """
    Exports the consulting report review to both a markdown file and a PDF file.

    Purpose:
    This function generates a markdown file summarizing the consulting report review and then converts it to a PDF file. It ensures that the output directory exists, exports the report to markdown, converts the markdown to HTML, and finally renders the HTML to a PDF file.

    Parameters:
    agent (ReActConsultantAgent): An instance of the ReActConsultantAgent class, which contains the memory data to be exported.
    markdown_file (str): The name of the markdown file to save. Default is "consultant_ai_report.md".
    pdf_file (str): The name of the PDF file to save. Default is "consultant_ai_report.pdf".

    Workflow:
    1. Ensures the output directory exists.
    2. Exports the report to a markdown file using the export_report_to_markdown function.
    3. Reads the markdown file and converts its content to HTML.
    4. Optionally wraps the HTML content in a basic HTML page structure.
    5. Saves the HTML content to a temporary file.
    6. Uses Playwright to render the HTML file to a PDF file.
    7. Handles any exceptions that occur during the PDF rendering process and prints an appropriate message.

    Returns:
    None
    """
    # Ensure output directory exists
    output_dir = "../outputs/"
    os.makedirs(output_dir, exist_ok=True)
    markdown_file = os.path.join(output_dir, markdown_file)
    pdf_file = os.path.join(output_dir, pdf_file)
    
    # Step 1: Export to Markdown
    export_report_to_markdown(agent, filename=markdown_file)

    # Step 2: Convert to HTML
    with open(markdown_file, "r") as f:
        md_text = f.read()
    html_content = markdown(md_text)

    # Optional: wrap in basic HTML page
    full_html = f"""
    <html>
    <head>
        <meta charset="utf-8">
        <title>Consulting Report</title>
        <style>
            body {{ font-family: sans-serif; margin: 40px; line-height: 1.6; }}
            h1, h2, h3 {{ color: #003366; }}
            ul {{ margin-top: 0; }}
        </style>
    </head>
    <body>
    {html_content}
    </body>
    </html>
    """

    # Step 3: Save HTML and render to PDF
    temp_html_path = os.path.join(output_dir, "temp_report.html")
    with open("temp_report.html", "w") as f:
        f.write(full_html)

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch()
            page = browser.new_page()
            page.goto("file://" + os.path.abspath("temp_report.html"))
            page.pdf(path=pdf_file, format="A4")
            browser.close()
            print(f"✅ PDF report saved as: {pdf_file}")
    except Exception as e:
        print(f"⚠️ PDF export failed: {str(e)}")


# **Section 3: Load Data** <a id="3"></a>

In [33]:
# Sample IT consulting report (replace with real one later)
# You can later replace this with a real one or load from a file.
# We can add file upload later (open("report.txt").read())
sample_report = """
Client: HealthConnect Systems
Industry: Healthcare
Project: IT Modernization Assessment

Summary:
HealthConnect currently operates on-premise infrastructure for its core clinical systems. While some departments use SaaS tools, there is no centralized cloud strategy. Security policies are documented but not consistently followed. There is no formal data governance framework. Leadership has expressed interest in migrating systems to the cloud.

Key Recommendations:
1. Conduct a cloud readiness assessment.
2. Begin phased migration of CRM and EHR systems.
3. Establish a data governance committee.
4. Update security protocols to align with NIST standards.

Timeline: Estimated at 6–12 months for full migration planning and execution.
"""

# **Section 4: Pre-process Data** <a id="4"></a>

In [34]:
# Run it on the sample report
report_sections = split_report_into_sections(sample_report)

# Display for verification
for section, content in report_sections.items():
    print(f"📌 {section}:\n{content}\n{'-'*60}")


📌 Header:
Client: HealthConnect Systems
Industry: Healthcare
Project: IT Modernization Assessment
------------------------------------------------------------
📌 Summary:
HealthConnect currently operates on-premise infrastructure for its core clinical systems. While some departments use SaaS tools, there is no centralized cloud strategy. Security policies are documented but not consistently followed. There is no formal data governance framework. Leadership has expressed interest in migrating systems to the cloud.
------------------------------------------------------------
📌 Key Recommendations:
1. Conduct a cloud readiness assessment.
2. Begin phased migration of CRM and EHR systems.
3. Establish a data governance committee.
4. Update security protocols to align with NIST standards.
Timeline: Estimated at 6–12 months for full migration planning and execution.
------------------------------------------------------------


# **Section 5: Model - Basic: Single-Hop Reasoning + Static Action Loop** <a id="5"></a>

1. Iterate through sections of report
2. Send each section to ChatGPT for feedback
3. Summarize feedback 

## **5.1 Initialize Agent** <a id="5.1"></a> ##

In [35]:
# Initialize the reasoning agent
agent = ITReportReviewer(report_sections)

## **5.2 Run Agent** <a id="5.2"></a> ##

In [36]:
# Boolean to control cell execution
execute_cell = False

if execute_cell:
    # Define the order in which we want to review sections
    sections_to_review = list(report_sections.keys())

    # Loop through each section and generate a review
    for section in sections_to_review:
        agent.review_section(section)
else:
    print("Cell execution skipped.")

Cell execution skipped.


In [37]:
# Call the summarize_full_review function to get the final summary
final_summary = summarize_full_review(agent)
print("📋 Final Summary of the Report Review:\n")
print(final_summary)

🔢 Prompt: 54 tokens | Completion: 434 tokens | Total so far: 488 tokens
💰 Estimated cost so far: $0.0007 USD
📋 Final Summary of the Report Review:

Overall, the internal report assessment shows a comprehensive analysis of the IT strategy. Here is a summary based on the section reviews:

1. **Executive Summary**:
   - Strengths: The executive summary provides a clear and concise overview of the IT strategy assessment.
   - Gaps: It could benefit from including key findings and recommendations to provide a more holistic view right at the beginning.

2. **Current IT Landscape**:
   - Strengths: The section offers a detailed description of the existing IT infrastructure and systems.
   - Gaps: It lacks an evaluation of the current IT capabilities and how they align with business goals and industry standards.

3. **SWOT Analysis**:
   - Strengths: The SWOT analysis identifies internal strengths and weaknesses along with external opportunities and threats effectively.
   - Gaps: It would be 

# **Section 6: Model - ReAct** <a id="6"></a>

1. **Think** about each section (with ChatGPT)
2. Decide on and take an **action**
3. **Observe** the results and loop back to step 1 with new reasoning

## **6.1 Simple Agent - Predefined Actions** <a id="6.1"></a> ##

In [38]:
# Boolean to control cell execution
execute_cell = False

if execute_cell:
    # Choose a section to run ReAct on
    section_name = "Summary"
    section_text = report_sections.get(section_name, "")

    # Initialize the ReAct agent
    react_agent = ReActConsultantAgent(section_name, section_text)

    # Run the ReAct loop
    react_review_history = run_react_loop_static(react_agent)
else:
    print("Cell execution skipped.")


Cell execution skipped.


## **6.2 Agent 2 - Custom & Pre-Built Tools** <a id="6.2"></a> ##

In [39]:
## ReAct Loop with Tool Usage Tracking - one section at a time

# Boolean to control cell execution
execute_cell = False

if execute_cell:
    # Choose a section to run ReAct on
    section_name = "Key Recommendations"
    section_text = report_sections.get(section_name, "")

    # Initialize the ReAct agent
    react_agent = ReActConsultantAgent(section_name, section_text)

    # Run the ReAct loop
    react_review_history = run_react_loop_check_withTool(react_agent)

    # Print tool usage summary
    tool_usage = print_tool_usage(react_agent)
    plot_tool_usage(react_agent.tool_usage)
    
else:
    print("Cell execution skipped.")


Cell execution skipped.


In [40]:
## Run ReAct on the full report of all sections

# Boolean to control cell execution
execute_cell = True

if execute_cell:
    # Create a single agent to hold memory across sections
    agent = ReActConsultantAgent(section_name="Full Report", section_text="")

    # Loop through each report section and run ReAct
    for section_name, section_text in report_sections.items():
        agent.section_name = section_name
        agent.section_text = section_text
        run_react_loop_check_withTool(agent)

    # Generate the final summary based on the agent's memory
    summary = generate_final_summary(agent)
    print("\n🧾 Final Summary:\n")
    print(summary)
    agent.memory["final_summary"] = summary

    # Export the report to markdown and PDF
    export_report_to_markdown_and_pdf(agent)
    print("Report exported successfully.")
else:
    print("Cell execution skipped.")


🔁 Step 1
🧠 Prompt: [{'role': 'user', 'content': 'You are an expert IT strategy consultant reviewing a report section titled \'Header\'.\nYou are using ReAct (Reason + Act) to think through the review.\n\nFormat each response like this:\nThought: <your reasoning>\nAction: <one of: check_guideline, keyword_match_in_section, check_timeline_feasibility, search_report, ask_question, flag_risk, recommend_fix, summarize, tool_help, suggest_tool_for, search_web, check_for_jargon, generate_client_questions, highlight_missing_sections, check_alignment_with_goals, compare_with_other_section, final_summary\'\n\nAvailable tools:\n\n- check_guideline (vv1.0): Look up a best practice for a given topic\n  Usage: check_guideline["cloud security"]\n  Example: check_guideline["data governance"]\n  Example: check_guideline["migration strategy"]\n\n- keyword_match_in_section (vv1.0): Check if a keyword appears in the current section\n  Usage: keyword_match_in_section["encryption"]\n  Example: keyword_matc

FileNotFoundError: [Errno 2] No such file or directory: './outputs/./outputs/consultant_ai_report.md'