In [None]:
prompts = ["CHAIN_OF_THOUGHT", "ZERO_SHOT", "FEW_SHOT", "RAG"]
temperatures = [0.1,0.7]

output_data = []
for prompt in prompts:
    for temperature in temperatures:
        file_name = f"./../benchmarking_results/mistral-large-latest_{temperature}_PromptingStrategy.{prompt}.json"
        print(file_name)

        import json
        from collections import defaultdict
        import re

        # Load the JSON data
        try:
            with open(file_name, "r") as f:
                data = json.load(f)
        except FileNotFoundError:
            print(f"File not found: {file_name}. Skipping...")
            continue

        # Initialize stats dictionary
        stats = defaultdict(lambda: {"syntax_valid_count": 0, "total": 0, "pass_rate_sum": 0.0})

        # Helper to extract pass rate
        def extract_pass_rate(report):
            match = re.search(r"Pass Rate: ([0-9.]+)%", report)
            return float(match.group(1)) if match else 0.0

        # Process each entry
        for item in data:
            difficulty = item.get("Difficulty", "unknown")
            syntax_valid = item.get("Validation Summary", {}).get("syntax_valid", False)
            compliance_report = item.get("Compliance Report", "")

            stats[difficulty]["total"] += 1
            if syntax_valid:
                stats[difficulty]["syntax_valid_count"] += 1
            stats[difficulty]["pass_rate_sum"] += extract_pass_rate(compliance_report)

        # Print results
        temp = []
        for difficulty, values in sorted(stats.items()):
            total = values["total"]
            syntax_count = values["syntax_valid_count"]
            avg_pass_rate = values["pass_rate_sum"] / total if total else 0
            temp.append({
                "Difficulty": difficulty,
                "Syntax Valid Count": f"{syntax_count/total:.2f}%",
                "Avg Pass Rate": f"{avg_pass_rate:.2f}%"
            })
        output_data.append({
            "Prompt": prompt,
            "Temperature": temperature,
            "Results": temp
        })

output_file = f"summary_mistral_remaining.json"
with open(output_file, "w") as json_file:
    json.dump(output_data, json_file, indent=4)


./../benchmarking_results/mistral-large-latest_0.1_PromptingStrategy.CHAIN_OF_THOUGHT.json
File not found: ./../benchmarking_results/mistral-large-latest_0.1_PromptingStrategy.CHAIN_OF_THOUGHT.json. Skipping...
./../benchmarking_results/mistral-large-latest_0.7_PromptingStrategy.CHAIN_OF_THOUGHT.json
File not found: ./../benchmarking_results/mistral-large-latest_0.7_PromptingStrategy.CHAIN_OF_THOUGHT.json. Skipping...
./../benchmarking_results/mistral-large-latest_0.1_PromptingStrategy.ZERO_SHOT.json
./../benchmarking_results/mistral-large-latest_0.7_PromptingStrategy.ZERO_SHOT.json
File not found: ./../benchmarking_results/mistral-large-latest_0.7_PromptingStrategy.ZERO_SHOT.json. Skipping...
./../benchmarking_results/mistral-large-latest_0.1_PromptingStrategy.FEW_SHOT.json
./../benchmarking_results/mistral-large-latest_0.7_PromptingStrategy.FEW_SHOT.json
File not found: ./../benchmarking_results/mistral-large-latest_0.7_PromptingStrategy.FEW_SHOT.json. Skipping...
./../benchmarking_r

In [4]:
import json

with open('./../aws_resources.json', 'r') as file:
    aws_resources = json.load(file)

with open('./../aws_data_sources.json', 'r') as file:
    aws_data_sources = json.load(file)


print(len(aws_resources))

1000


In [5]:
aws_resource_names = set()
aws_subcategories = set()
for resource in aws_resources:
    aws_resource_names.add(resource['resource_name'])
    aws_subcategories.add(resource['metadata']['subcategory'])

for source in aws_data_sources:
    aws_resource_names.add(source['resource_name'])
    aws_subcategories.add(source['metadata']['subcategory'])
    

In [7]:
len(aws_subcategories)

214

In [6]:
# Write aws_resource_names to a txt file
# with open("aws_resource_names.txt", "w") as file:
#     for name in aws_resource_names:
#         file.write(f"{name}\n")


# Write aws_subcategories to a txt file
with open("aws_subcategories.txt", "w") as file:
    for subcategory in aws_subcategories:
        file.write(f"{subcategory},")

In [9]:
aws_subcategories

['IAM Access Analyzer',
 'IAM Access Analyzer',
 'Account Management',
 'Account Management',
 'Account Management',
 'ACM (Certificate Manager)',
 'ACM (Certificate Manager)',
 'ACM PCA (Certificate Manager Private Certificate Authority)',
 'ACM PCA (Certificate Manager Private Certificate Authority)',
 'ACM PCA (Certificate Manager Private Certificate Authority)',
 'ACM PCA (Certificate Manager Private Certificate Authority)',
 'ACM PCA (Certificate Manager Private Certificate Authority)',
 'EC2 (Elastic Compute Cloud)',
 'EC2 (Elastic Compute Cloud)',
 'EC2 (Elastic Compute Cloud)',
 'EC2 (Elastic Compute Cloud)',
 'Amplify',
 'Amplify',
 'Amplify',
 'Amplify',
 'Amplify',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gateway',
 'API Gatew

Load Documents

In [17]:
import bs4
from langchain_community.document_loaders import WebBaseLoader


bs4_strainer = bs4.SoupStrainer(class_=('post-title',"post-header", "post-content"))

loader = WebBaseLoader(web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/",), bs_kwargs = {"parse_only": bs4_strainer})

docs = loader.load()

assert len(docs) == 1
print(len(docs[0].page_content))

USER_AGENT environment variable not set, consider setting it to identify your requests.


43130


Splitting Documents

In [18]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True,
)

all_splits = text_splitter.split_documents(docs)

print(len(all_splits))

66


Storing Documents

In [21]:
import getpass
import os

if not os.environ.get("MISTRAL_API_KEY"):
  os.environ["MISTRAL_API_KEY"] = getpass.getpass("Enter API key for MistralAI: ")

from langchain_mistralai import MistralAIEmbeddings

embeddings = MistralAIEmbeddings(model="mistral-embed")

In [14]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("mistral-large-latest", model_provider="mistralai")

from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [20]:
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids)

An error occurred with MistralAI: Illegal header value b'Bearer '


LocalProtocolError: Illegal header value b'Bearer '

Retrieval and Generation

In [21]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_message =  prompt.invoke({
    "context": "(context goes here)",
    "question": "(question goes here)",
}).to_messages()

print(example_message)

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: (question goes here) \nContext: (context goes here) \nAnswer:", additional_kwargs={}, response_metadata={})]




In [26]:
question = "what is self reflection?"

retrieved_docs = vector_store.similarity_search(question)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
print(docs_content)
answer = llm.invoke(prompt.invoke({"question": question, "context": docs_content}))
answer.content

Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)
The heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.
Self-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.

Memory stream: is a long-term memory module (external database) that records a comprehensive list of agents’ experience in natural language.

Each element is an observation, an event directly provided by the agent.
- Inter-agent communication can trigger new natural language statements.


Re

'Self-reflection is a process that allows autonomous agents to improve by reviewing and learning from past actions and mistakes. It is particularly useful in real-world tasks where trial and error are common. In the context of the Reflexion framework, self-reflection is facilitated by providing examples of failed trajectories and ideal reflections to a language model, which then guides future planning.'

In [23]:
answer.content

'Task decomposition in AI agents is done through methods like Chain of Thought (CoT), where the agent is prompted to "think step by step" to break down complex tasks into smaller, manageable ones. This process transforms large tasks into multiple simpler tasks, making them easier to handle and providing insight into the model\'s thinking process. The agent can then plan and execute these smaller tasks in a logical order.'

Query Analysis

In [29]:
total_documents = len(all_splits)

third = total_documents // 3

for i, doc in enumerate(all_splits):
    if i < third:
        doc.metadata["section"] = "beginning"
    elif i < 2 * third:
        doc.metadata["section"] = "middle"
    else:
        doc.metadata["section"] = "end"


all_splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 8,
 'section': 'beginning'}

In [None]:
vector_store = InMemoryVectorStore(embeddings)
_ = vector_store.add_documents(all_splits)

Using LangGraph

In [34]:
from typing_extensions import TypedDict, List, Annotated, Literal
from langchain_core.documents import Document



# search
class Search(TypedDict):
    query: Annotated[str, ..., "Search query to run"]
    section: Annotated[Literal["beginning", "middle", "end"], ..., "Section of the document to search in"]

# state
class State(TypedDict):
    question: str
    context: List[Document]
    query: Search
    answer: str

# steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["query"]["query"],
                                                    filter=lambda doc: doc.metadata.get("section") == state["query"]["section"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    answer = llm.invoke(prompt.invoke({"question": state["question"], "context": docs_content}))
    return {"answer": answer.content}

def analyze_query(state: State):
    structured_llm = llm.with_structured_output(Search)
    query = structured_llm.invoke(state["question"])
    return {"query": query}


# control flow
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([analyze_query,retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [37]:
for step in graph.stream({"question": "what is self reflection?"}, stream_mode="updates"):
    print(step)

{'analyze_query': {'query': {'query': 'self reflection', 'section': 'beginning'}}}
{'retrieve': {'context': [Document(id='c5a72909-3486-456f-b89e-ba092cb4d46a', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 5221, 'section': 'beginning'}, page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying

Playground

In [4]:
import requests

response = requests.get(
    "https://api.github.com/repos/hashicorp/terraform-provider-aws/contents/website/docs/r"
)

doc_files = response.json()

In [2]:
doc_files

[{'name': 'accessanalyzer_analyzer.html.markdown',
  'path': 'website/docs/r/accessanalyzer_analyzer.html.markdown',
  'sha': '2de4c28ab27e2b29c5a1d74329fb295a2ffc6790',
  'size': 2869,
  'url': 'https://api.github.com/repos/hashicorp/terraform-provider-aws/contents/website/docs/r/accessanalyzer_analyzer.html.markdown?ref=main',
  'html_url': 'https://github.com/hashicorp/terraform-provider-aws/blob/main/website/docs/r/accessanalyzer_analyzer.html.markdown',
  'git_url': 'https://api.github.com/repos/hashicorp/terraform-provider-aws/git/blobs/2de4c28ab27e2b29c5a1d74329fb295a2ffc6790',
  'download_url': 'https://raw.githubusercontent.com/hashicorp/terraform-provider-aws/main/website/docs/r/accessanalyzer_analyzer.html.markdown',
  'type': 'file',
  '_links': {'self': 'https://api.github.com/repos/hashicorp/terraform-provider-aws/contents/website/docs/r/accessanalyzer_analyzer.html.markdown?ref=main',
   'git': 'https://api.github.com/repos/hashicorp/terraform-provider-aws/git/blobs/2de4

In [5]:
file = doc_files[0]
if file["name"].endswith(".markdown"):
    file_response = requests.get(file["download_url"])
    doc = file_response.text
    print(doc)

---
subcategory: "IAM Access Analyzer"
layout: "aws"
page_title: "AWS: aws_accessanalyzer_analyzer"
description: |-
  Manages an Access Analyzer Analyzer
---

# Resource: aws_accessanalyzer_analyzer

Manages an Access Analyzer Analyzer. More information can be found in the [Access Analyzer User Guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/what-is-access-analyzer.html).

## Example Usage

### Account Analyzer

```terraform
resource "aws_accessanalyzer_analyzer" "example" {
  analyzer_name = "example"
}
```

### Organization Analyzer

```terraform
resource "aws_organizations_organization" "example" {
  aws_service_access_principals = ["access-analyzer.amazonaws.com"]
}

resource "aws_accessanalyzer_analyzer" "example" {
  depends_on = [aws_organizations_organization.example]

  analyzer_name = "example"
  type          = "ORGANIZATION"
}
```

## Argument Reference

The following arguments are required:

* `analyzer_name` - (Required) Name of the Analyzer.

The following argume

In [7]:
import frontmatter

resource = frontmatter.loads(doc)
resource.metadata

{'subcategory': 'IAM Access Analyzer',
 'layout': 'aws',
 'page_title': 'AWS: aws_accessanalyzer_analyzer',
 'description': 'Manages an Access Analyzer Analyzer'}

In [8]:
import re


chunks = re.split(r"(##\s+.+)", doc)

print(chunks)

results = []
for i in range(1, len(chunks), 2):
    header = chunks[i].strip()
    body = chunks[i+1].strip()
    results.append({"header": header, "body": body})
    print({"header": header, "body": body})



['---\nsubcategory: "IAM Access Analyzer"\nlayout: "aws"\npage_title: "AWS: aws_accessanalyzer_analyzer"\ndescription: |-\n  Manages an Access Analyzer Analyzer\n---\n\n# Resource: aws_accessanalyzer_analyzer\n\nManages an Access Analyzer Analyzer. More information can be found in the [Access Analyzer User Guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/what-is-access-analyzer.html).\n\n', '## Example Usage', '\n\n#', '## Account Analyzer', '\n\n```terraform\nresource "aws_accessanalyzer_analyzer" "example" {\n  analyzer_name = "example"\n}\n```\n\n#', '## Organization Analyzer', '\n\n```terraform\nresource "aws_organizations_organization" "example" {\n  aws_service_access_principals = ["access-analyzer.amazonaws.com"]\n}\n\nresource "aws_accessanalyzer_analyzer" "example" {\n  depends_on = [aws_organizations_organization.example]\n\n  analyzer_name = "example"\n  type          = "ORGANIZATION"\n}\n```\n\n', '## Argument Reference', '\n\nThe following arguments are required:\n\

In [9]:
metadata = dict(resource.metadata)

# Extract main content
main_content = resource.content

# Separate sections
sections = {}
current_section = "main"
sections[current_section] = []

for line in main_content.split('\n'):
        if line.startswith('## '):
            current_section = line.replace('## ', '').strip()
            sections[current_section] = []
        else:
            sections[current_section].append(line)

structured_content = {
        "title": metadata.get("page_title", ""),
        "description": metadata.get("description", ""),
        "resource_name": re.search(r'# Resource: (.*)', main_content).group(1) if re.search(r'# Resource: (.*)', main_content) else "",
        "frontmatter": metadata,
        "sections": {k: '\n'.join(v) for k, v in sections.items()},
        "full_content": main_content
    }

# for k,v in structured_content['sections'].items():
#     print(f"{k}:",v)  

print(structured_content['full_content'])

# Resource: aws_accessanalyzer_analyzer

Manages an Access Analyzer Analyzer. More information can be found in the [Access Analyzer User Guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/what-is-access-analyzer.html).

## Example Usage

### Account Analyzer

```terraform
resource "aws_accessanalyzer_analyzer" "example" {
  analyzer_name = "example"
}
```

### Organization Analyzer

```terraform
resource "aws_organizations_organization" "example" {
  aws_service_access_principals = ["access-analyzer.amazonaws.com"]
}

resource "aws_accessanalyzer_analyzer" "example" {
  depends_on = [aws_organizations_organization.example]

  analyzer_name = "example"
  type          = "ORGANIZATION"
}
```

## Argument Reference

The following arguments are required:

* `analyzer_name` - (Required) Name of the Analyzer.

The following arguments are optional:

* `configuration` - (Optional) A block that specifies the configuration of the analyzer. [Documented below](#configuration-argument-referenc

In [12]:
for section_name, content in structured_content.get("sections", {}).items():
    # Extract code blocks and surrounding text
    code_blocks = re.findall(r"```terraform\n(.*?)\n```", content, re.DOTALL)
    text = re.sub(r"```terraform\n(.*?)\n```", "", content, flags=re.DOTALL).strip()

print(code_blocks)
print(text)

['import {\n  to = aws_accessanalyzer_analyzer.example\n  id = "example"\n}']
In Terraform v1.5.0 and later, use an [`import` block](https://developer.hashicorp.com/terraform/language/import) to import Access Analyzer Analyzers using the `analyzer_name`. For example:



Using `terraform import`, import Access Analyzer Analyzers using the `analyzer_name`. For example:

```console
% terraform import aws_accessanalyzer_analyzer.example example
```
