In [1]:
%load_ext autoreload
%autoreload 2

In [11]:
import os
from langchain.chat_models import ChatOpenAI
from talk_openai import MyOpenAI
from langchain.schema import HumanMessage
import os
from utils import *
import re
from langchain_core.tools import tool
from langchain.agents import initialize_agent, AgentType


In [14]:
import sys

In [20]:
subprocess.check_output(['python', '--version']).decode('utf-8').strip()

'Python 3.10.12'

In [21]:
subprocess.check_output([sys.executable, '--version']).decode('utf-8').strip()

'Python 3.10.12'

In [None]:
training_script_path = "iris_model_inference/inference.py"
MODEL_NAME = "gpt-4o-mini"
# Create a ChatOpenAI object 
inference_script_generator_llm = MyOpenAI(model=MODEL_NAME)
inference_script_validator_llm = MyOpenAI(model="gpt-4o-mini")
inference_script_dir = "/Users/neel/Developer/sagemaker_test/inference"
terraform_dir = "/Users/neel/Developer/sagemaker_test/terraform"
model_type = "MultiClass Classification"
data_shape = "(,4)"
python_version = get_python_version()
packages = get_installed_packages()

In [72]:
inference_script_dir="/Users/neel/Developer/sagemaker_test/inference"
terraform_dir = "/Users/neel/Developer/sagemaker_test/terraform"

In [43]:
## generaring inference script plus other artifacts

In [71]:
def generate_sagemaker_inference_bundle(training_script, model_type, data_shape, feedback=None):
    """Use an LLM to generate inference.py + serve for a SageMaker endpoint."""
    if feedback is None:
        messages = [
            {"role": "system", "content": "You are an expert Python developer."},
            {"role": "user", "content": f'''
        You are to generate *two* files for AWS SageMaker custom inference:

        1) **inference.py**  
        - From the training script identity the name of the model file and the format in which the model is saved. And use similar name and format for loading the model
        - Use FastAPI, listen on 0.0.0.0:8080  
        - **GET /ping** → return JSON {{ "status": "OK" }} with HTTP status code **200**  
        - **POST /invocations** → accept `application/json` with key `"instances"`,  
            preprocess into the shape {data_shape},  
            load the trained model from `/opt/ml/model`,  
            call `model.predict(...)`,  
            and return `{{"predictions": [...]}}`.  
        - Follow best practices: exception handling, logging, and specify `status_code=200` on ping.
        - The model will be saved is `/opt/ml/model/`, you will understand how the model is saved from the training script.

        2) **serve**  
        - A bash‐executable script that SageMaker will invoke to launch your FastAPI app  
        - e.g. `uvicorn inference:app --host 0.0.0.0 --port 8080`

        ### Context:
        The training script  used is:
        \"\"\"
        {training_script}
        \"\"\"
        Model type: {model_type}.

        Provide the *complete* contents of both files (with proper shebang for `serve`).
        ''' }
                ]
    else:
        messages = feedback
    response = inference_script_generator_llm.invoke(messages)
    return response
    
def evaluate_inference_script_with_llm(inference_script):
    """Use an LLM to evaluate the generated inference script and provide feedback."""
    messages = [
        {"role": "system", "content": "You are a code reviewer and expert in Python API development."},
        {"role": "user", "content": f"""
        You are reviewing an inference script generated for exposing a trained ML model as an API on AWS SageMaker.

        ### Task:
        - Analyze the script for correctness, best practices, and completeness. (it should have a /ping and /invocations endpoint)
        - Identify any missing components or improvements.
        - should load the model from /opt/ml/model/ and call model.predict(...)
        - If the script is perfect, respond with 'No changes needed.'
        - If changes are required, specify what needs to be improved.
        - there should be a serve script that is executable and launches the FastAPI app.

        ### Script to review:
        ```
        {inference_script}
        ```

        ### Expected Output:
        - Either 'No changes needed.' OR a detailed improvement plan.
        """}
    ]
    
    return inference_script_validator_llm.invoke(messages)

def actor_critic_inference_script(training_script, model_type, data_shape):
        """
        Runs the actor-critic loop:
        - Actor generates an inference script.
        - Critic evaluates the script.
        - If the script is sufficient, exit early.
        - Otherwise, Actor refines the script based on feedback.
        """
        feedback = None  # No feedback initially

        for _ in range(2):  # Max iterations: 1
            # Step 1: Actor generates inference script
            inference_script = generate_sagemaker_inference_bundle(training_script, model_type, data_shape, feedback)

            # Step 2: Critic evaluates the script
            critic_feedback = evaluate_inference_script_with_llm(inference_script)

            if "no changes needed" in critic_feedback.lower():
                print("✅ Inference script is satisfactory. Exiting early.")
                return inference_script  # Early exit if script is sufficient

            # Step 3: Actor refines script using critic's feedback
            feedback = [
                {"role": "assistant", "content": inference_script},
                {"role": "user", "content": f"Revise based on this feedback:\n{critic_feedback}"}
            ]

        print("🔄 Max iterations reached. Returning final script.")
        return inference_script

In [73]:
response = actor_critic_inference_script(training_script_path, model_type, data_shape)

🔄 Max iterations reached. Returning final script.


In [31]:
response

'Here’s the revised version of the `inference.py` and `serve` script based on your feedback:\n\n### 1) `inference.py`\n\n```python\nimport os\nimport json\nimport logging\nimport numpy as np\nimport joblib  # Corrected import for joblib\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\n# Set up logging\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n# Create FastAPI app\napp = FastAPI()\n\n# Define the request model\nclass PredictionRequest(BaseModel):\n    instances: list\n\n# Load the model\nmodel_path = os.getenv(\'MODEL_PATH\', \'/opt/ml/model/model.joblib\')  # Make model path configurable\ntry:\n    model = joblib.load(model_path)\n    logger.info("Model loaded successfully.")\nexcept Exception as e:\n    logger.error(f"Failed to load model: {e}")\n    raise\n\n@app.get("/ping", status_code=200)\nasync def ping():\n    """Health check endpoint."""\n    return {"status": "OK"}\n\n@app.post("/invocations")\nasync def in

In [39]:
from langchain_core.tools import tool
from langchain.agents import initialize_agent, AgentType

from langchain_core.tools import StructuredTool
from pydantic import BaseModel
class SaveFileInput(BaseModel):
    file_path: str
    content: str

# Define the saving tool
@tool(args_schema=SaveFileInput)

def save_file_tool(file_path: str, content: str) -> str:
    """Save content to a file at the given path."""
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'w') as f:
        f.write(content)
    return f"Saved to {file_path}"

def extract_and_save_inference_files(response_str: str, save_dir: str = ''):
        llm = ChatOpenAI(model="gpt-4o-mini")

        tools = [save_file_tool]

        agent = initialize_agent(
            tools=tools,
            llm=llm,
            agent=AgentType.OPENAI_FUNCTIONS,
            verbose=True,
        )

        response = agent.invoke(
            f"""
            1. Extract the inference script and serve script from the response string.
            2. For each extracted script:
                - If it is the inference script, save it as '{inference_script_dir}/inference.py'.
                - If it is the serve script, save it as '{inference_script_dir}/serve'.
                - Otherwise, save it as '{inference_script_dir}/<appropriate file_name and extension>'.
            3. Use the `save_file_tool` tool to save the content.
            4. Do not output anything else except confirming tool usage.

            ### Text:
            {response_str}
            """
        )
        return response

In [74]:
raw_files = extract_and_save_inference_files(response, inference_script_dir)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `save_file_tool` with `{'file_path': '/Users/neel/Developer/sagemaker_test/inference/inference.py', 'content': 'import os\nimport logging\nimport numpy as np\nimport joblib  # Updated import for joblib\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\n# Set up logging\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n# Define the FastAPI app\napp = FastAPI()\n\n# Load the model\nmodel_path = os.path.join("/opt/ml/model", "iris_model.pkl")\ntry:\n    model = joblib.load(model_path)\n    logger.info("Model loaded successfully.")\nexcept Exception as e:\n    logger.error(f"Failed to load model: {str(e)}")\n    raise RuntimeError("Model loading failed.")\n\n# Define the request body for the POST /invocations endpoint\nclass PredictionRequest(BaseModel):\n    instances: list\n\n@app.get("/ping", status_code=200)\nasync def ping():\n    """Health check endpoint

## Generating the requirements

In [None]:
def extract_imported_libraries(python_script):
        """
        Extracts imported libraries from a Python script.

        Args:
            python_script (str): The Python script content.

        Returns:
            list: A list of unique libraries found in the script.
        """
        matches = re.findall(r"^\s*(?:import|from)\s+([\w\d_\.]+)", python_script, re.MULTILINE)
        return list(set(matches))  # Remove duplicates


def get_requirements_txt():
    inference_script_dir = "/Users/neel/Developer/sagemaker_test/inference"
    inference_script_path = os.path.join(inference_script_dir, "inference.py")
    inference_script = file_reader(inference_script_path)
    libraries_str = extract_imported_libraries(inference_script)
    messages = [
            {"role": "system", "content": "You are an expert Python package manager."},
            {"role": "user", "content": f"""
            for the inference file you generated earlier, I need to create a `requirements.txt` file.
            ### Task:
            - Generate a `requirements.txt` file based on the following libraries used in the script:
             - Also include known libraries which are not in the script but are commonly used for model inference.
            - Ensure the libraries are listed in a format suitable for `pip install`.
            {libraries_str}
            - Python version is {python_version}.
            - To check which version to install refer to the" this list of all installed packages in the environment: {packages}
    """}
        ]

    return inference_script_generator_llm.invoke(messages)



In [45]:
response = get_requirements_txt()

In [46]:
print(response)

Based on the libraries you provided and the common libraries used for model inference, here is a `requirements.txt` file that includes the necessary packages along with their versions:

```plaintext
fastapi==0.115.6
joblib==1.4.2
numpy==1.26.4
pydantic==2.9.2
requests==2.32.3
scikit-learn==1.0.2
scipy==1.10.1
uvicorn==0.22.0
python-dotenv==1.0.1
```

### Explanation:
- **fastapi**: For building APIs.
- **joblib**: For saving and loading models.
- **numpy**: For numerical operations.
- **pydantic**: For data validation and settings management.
- **requests**: For making HTTP requests, often used in inference scenarios.
- **scikit-learn**: For machine learning utilities.
- **scipy**: For scientific computing.
- **uvicorn**: ASGI server for running FastAPI applications.
- **python-dotenv**: For loading environment variables from a `.env` file, useful for configuration.

You can create a `requirements.txt` file with the above content and use it to install the necessary packages with `pip i

In [48]:
def extract_requirements_txt(llm_response):
    """
    Extracts the main contents of the `requirements.txt` file from an LLM response.

    Args:
        llm_response (str): The response text containing the `requirements.txt` section.

    Returns:
        str: The extracted `requirements.txt` content as a string.
    """
    match = re.search(r"```(?:\w+\n)?(.*?)\n```", llm_response, re.DOTALL)
    return match.group(1).strip() if match else ""

In [50]:
requirements_txt_path = os.path.join(inference_script_dir, "requirements.txt")
requirements_txt = extract_requirements_txt(response)
write_to_file(requirements_txt_path, requirements_txt)

## Dockerization

In [65]:
docker_generator_llm = MyOpenAI(model="gpt-4o")

In [52]:
options = {'python_version': python_version}

In [66]:
def generate_dockerfile(options, feedback=None):
    docker_template_file = "/Users/neel/Developer/deploy_wizard/templates/aws_sagemaker_endpoint/Dockerfile"
    docker_template = file_reader(docker_template_file)
    if feedback is None:
        messages = [
            {
                "role": "system",
                "content": "You are a Docker expert."
            },
            {
                "role": "user",
                "content": f'''
        You are an expert Dockerfile Generation Agent. Your task is to generate a **Dockerfile** for a containerized an inference script and its artifacts for AWS SageMaker Endpoint.

        
        ### Context:
        - **Working Directory**: The Dockerfile is located inside the project directory where the model inference script, requirements file, and a serve script are located.
        - **Python Version**: {options['python_version']}
        - I am providing you a template for the Dockerfile, you can use it as a reference.
        - {docker_template}


        ### Additional Constraints:
        - **Do not use absolute paths.**  
        - **Assume all files are within the same directory as the Dockerfile when running `docker build .`.**
        - **Ensure the COPY commands properly reflect this.**
        
        ### Expected Output:
        Provide a complete **Dockerfile** as a code block with Dockerfile syntax highlighting.
        No need to include entrypoint or command for copying the model file
        '''
            }
        ]
    else:
        messages = feedback
    response = docker_generator_llm.invoke(messages)
    return response


In [67]:
raw_dockerfile = generate_dockerfile(options)

In [68]:
print(raw_dockerfile)

```Dockerfile
# Use an appropriate base image
FROM python:3.10.12-slim

# Set environment variables
ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/program:${PATH}"

# Copy the current directory contents into the container at /opt/program
COPY . /opt/program

# Set the working directory
WORKDIR /opt/program

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Make the serve script executable
RUN chmod +x serve

# Expose the port for the FastAPI application
EXPOSE 8080

# Note: The command to run the inference script is not included as per instructions
```


In [77]:
def extract_dockerfile(llm_response):
    """
    Extracts the main Python script from an LLM response.

    Args:
        llm_response (str): The response text containing a Python code block.

    Returns:
        str: The extracted Dockerfile, or an empty string if no script is found.
    """
    match = re.search(r"```Dockerfile\n(.*?)\n```", llm_response, re.DOTALL)
    return match.group(1) if match else ""


In [78]:
docker_file_path = os.path.join(inference_script_dir, "Dockerfile")
dockerfile = extract_dockerfile(raw_dockerfile)
write_to_file(docker_file_path, dockerfile)
print(f"Dockerfile saved to {docker_file_path}")    
print(dockerfile)

Dockerfile saved to /Users/neel/Developer/sagemaker_test/inference/Dockerfile
# Use an appropriate base image
FROM python:3.10.12-slim

# Set environment variables
ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/program:${PATH}"

# Copy the current directory contents into the container at /opt/program
COPY . /opt/program

# Set the working directory
WORKDIR /opt/program

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Make the serve script executable
RUN chmod +x serve

# Expose the port for the FastAPI application
EXPOSE 8080

# Note: The command to run the inference script is not included as per instructions


In [79]:
docker_options = {
    'model_dir': '/Users/neel/Developer/models',
    'image_name': 'iris_model',
}
def docker_validation(options):
    print("Dockerfile validation")
    msg = f"""Now I want to validate the dockerfile you generated. 
    I need you help to provide me instrustion on 
    1. How to build the docker image, with the name {options['image_name']}, 
    2. How to run the docker image with 
        - by mountinng my model file present inside {options['model_dir']} to /opt/ml/model
        - providing entrypoint as the serve script
    3. Once that is done, I want to test the docker image by running a curl command to the /ping endpoint
    4. I also want to test the /invocations endpoint by providing a sample input
    5. If I face any issues, I will ask you for help
    Please provide me the instructions step by step."""
    while True:
        response = docker_generator_llm.invoke(msg)
        print("AI", response)
        input_text = input("Human (gg to exit): ")
        print("Human: ", input_text)
        if input_text.lower() == "gg":
            break

In [80]:
docker_validation(docker_options)

Dockerfile validation
AI Certainly! Here are the detailed instructions to build, run, and test your Docker image:

### Step 1: Build the Docker Image

1. **Open a terminal** and navigate to the directory containing your Dockerfile and other project files.

2. **Build the Docker image** with the name `iris_model` using the following command:

   ```bash
   docker build -t iris_model .
   ```

### Step 2: Run the Docker Image

1. **Run the Docker container** with the model file mounted and the `serve` script as the entry point. Use the following command, replacing `/Users/neel/Developer/models` with the path to your model file on your host machine:

   ```bash
   docker run -d -p 8080:8080 --name iris_container \
   -v /Users/neel/Developer/models:/opt/ml/model \
   --entrypoint /opt/program/serve iris_model
   ```

   - `-d` runs the container in detached mode.
   - `-p 8080:8080` maps port 8080 of the container to port 8080 on your host machine.
   - `-v /Users/neel/Developer/models:/o

## Pushing to AWS ECR

In [82]:
ecr_options = {
    'region': 'us-west-2',
    'account_id': '407683973276',
    'ecr_repository_name': 'sagemaker_models/iris',
    'local_image_name': 'sagemaker-test',
    'image_tag': 'latest'
}
def provide_commmands_to_push_ecr(options):
    print("Make sure you have the AWS CLI installed and configured with the necessary permissions.")
    print("Make sure ou have an ECR repository created. In the same region as your Sagemaker")
    print("Make sure you have Docker installed and running.")
    print("Login to ECR")
    print(f"aws ecr get-login-password --region {options['region']} | docker login --username AWS --password-stdin {options['account_id']}.dkr.ecr.{options['region']}.amazonaws.com")
    print("Tagging the locally build image for ECR")
    print(f"docker tag {options['local_image_name']}:latest {options['account_id']}.dkr.ecr.{options['region']}.amazonaws.com/{options['ecr_repository_name']}:latest")
    print("Pushing the image to ECR")
    print(f"docker push {options['account_id']}.dkr.ecr.{options['region']}.amazonaws.com/{options['ecr_repository_name']}:latest")

provide_commmands_to_push_ecr(ecr_options)

Make sure you have the AWS CLI installed and configured with the necessary permissions.
Make sure ou have an ECR repository created. In the same region as your Sagemaker
Make sure you have Docker installed and running.
Login to ECR
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 407683973276.dkr.ecr.us-west-2.amazonaws.com
Tagging the locally build image for ECR
docker tag sagemaker-test:latest 407683973276.dkr.ecr.us-west-2.amazonaws.com/sagemaker_models/iris:latest
Pushing the image to ECR
docker push 407683973276.dkr.ecr.us-west-2.amazonaws.com/sagemaker_models/iris:latest


## Uploading compressing and uploading the model to s3 

In [86]:
import subprocess
import os

def compress_and_upload_to_s3(model_path, bucket_name, s3_key):
    """
    Compress a model file to tar.gz and upload it to an S3 bucket.

    Args:
        model_path (str): Path to the model file or directory.
        bucket_name (str): Name of the S3 bucket.
        s3_key (str): Key (path) in the S3 bucket where the file will be uploaded.

    Returns:
        str: The S3 URI of the uploaded file.
    """
    print("Uploading model to S3...")
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"The specified model path does not exist: {model_path}")
    
    # Compress the model to a tar.gz file
    compressed_file = f"{os.path.splitext(model_path)[0]}.tar.gz"
    try:
        subprocess.run(['tar', '-czf', compressed_file, '-C', os.path.dirname(model_path), os.path.basename(model_path)],
                       check=True)
        print(f"Model compressed to: {compressed_file}")
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Error compressing the model: {e}")
    
    # Upload the compressed file to S3
    try:
        s3_uri = f"s3://{bucket_name}/{s3_key}"
        subprocess.run(['aws', 's3', 'cp', compressed_file, s3_uri], check=True)
        print(f"File uploaded to S3: {s3_uri}")
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Error uploading the file to S3: {e}")
    finally:
        # Clean up the compressed file
        if os.path.exists(compressed_file):
            os.remove(compressed_file)
            print(f"Temporary file removed: {compressed_file}")
    
    return s3_uri

In [87]:
s3_bucket_name = "neel-sagemaker-models"
s3_prefix = "iris_model"
s3_key = f"{s3_prefix}/model2.tar.gz"
model_path = "/Users/neel/Developer/models/iris_model.pkl"
s3_uri = compress_and_upload_to_s3(model_path, s3_bucket_name, s3_key)
print(f"Model uploaded to S3 at: {s3_uri}")


Uploading model to S3...
Model compressed to: /Users/neel/Developer/models/iris_model.tar.gz
upload: ../models/iris_model.tar.gz to s3://neel-sagemaker-models/iris_model/model2.tar.gz
File uploaded to S3: s3://neel-sagemaker-models/iris_model/model2.tar.gz
Temporary file removed: /Users/neel/Developer/models/iris_model.tar.gz
Model uploaded to S3 at: s3://neel-sagemaker-models/iris_model/model2.tar.gz


## Terraform

In [121]:
terrraform_generator_llm = MyOpenAI(model="gpt-4o")
terraform_dir = "/Users/neel/Developer/sagemaker_test/terraform"
terraform_critical_llm = MyOpenAI(model="gpt-4o")

In [122]:
saggemaker_terraform_options = {
    'region': 'us-west-2',
    's3_uri': s3_uri,
    'instance_type': 'ml.t2.mdeium',
    'model_name': 'iris-model',
    'ecr_image': '407683973276.dkr.ecr.us-west-2.amazonaws.com/sagemaker_models/iris:latest',
    "model_in_s3": s3_uri,
}


def terraform_generator_actor(options, feedback=None):
    terraform_template_file = "/Users/neel/Developer/deploy_wizard/templates/aws_sagemaker_endpoint/main.tf"
    terraform_template = file_reader(terraform_template_file)
    if feedback is None:
        messages = [
            {
                "role": "system",
                "content": "You are a Terraform expert."
            },
            {
                "role": "user",
                "content": f'''
        You are an expert Terraform Generation Agent. Your task is to generate a **Terraform** script for deploying a model on AWS SageMaker Endpoint.
        ### Context:
        I need a terraform file to deploy a model on AWS SageMaker Endpoint. The terraform code should be able to deploy the following:
        1. An IAM role with the necessary permissions for SageMaker and S3, this role will be be used by the model.
        2. A SageMaker model with name {options['model_name']} that uses the ECR image {options['ecr_image']} and S3 URI {options['model_in_s3']}.
        3. An endpoint configuration for the SageMaker model.
        4. An endpoint for the SageMaker model.
        5. For IAM role and its polcies, use what is there in the template. You have to give it a name that is similar to the model name.
        6. Use the instance type {options['instance_type']} for the endpoint.
        7. Use the region {options['region']} for the resources.
        8. Overwrite the values in the template with the values provided in the options.

        Here is a template for the terraform file, you can use it as a reference.
        {terraform_template}'''}
        ]
    else:
        messages = feedback
    response = terrraform_generator_llm.invoke(messages)
    return response

def terraform_critic_evaluator(options):
    messages = [
        {"role": "system", "content": "You are a Terraform expert."},
        {"role": "user", "content": f"""
        You are reviewing a terraform script generated for deploying a model on AWS SageMaker Endpoint.

        ### Task:
        - Analyze the script for correctness, best practices, and completeness.
        - Identify any missing components or improvements.
        - If the script is perfect, respond with 'No changes needed.'
        - If changes are required, specify what needs to be improved.
        - Dont menion the IAM role, it is already created and will be used by the model.
        

        ### Script to review:
        ```
        {response}
        ```

        ### Expected Output:
        - Either 'No changes needed.' OR a detailed improvement plan.
        """}
    ]
    
    return terraform_critical_llm.invoke(messages)

def actor_critic_terraform_script(options):
        """
        Runs the actor-critic loop:
        - Actor generates a terraform script.
        - Critic evaluates the script.
        - If the script is sufficient, exit early.
        - Otherwise, Actor refines the script based on feedback.
        """
        feedback = None  # No feedback initially

        for _ in range(1):  # Max iterations: 1
            # Step 1: Actor generates inference script
            terraform_script = terraform_generator_actor(options, feedback)

            # Step 2: Critic evaluates the script
            # critic_feedback = terraform_critic_evaluator(terraform_script)

            # if "no changes needed" in critic_feedback.lower():
            #     print("✅ Terraform script is satisfactory. Exiting early.")
            #     return terraform_script  # Early exit if script is sufficient

            # # Step 3: Actor refines script using critic's feedback
            # feedback = [
            #     {"role": "assistant", "content": terraform_script},
            #     {"role": "user", "content": f"Revise based on this feedback:\n{critic_feedback}"}
            # ]

        print("🔄 Max iterations reached. Returning final script.")
        return terraform_script



In [123]:
response = actor_critic_terraform_script(saggemaker_terraform_options)

🔄 Max iterations reached. Returning final script.


In [124]:
print(response)

Below is the Terraform script tailored to your requirements for deploying a model on AWS SageMaker Endpoint. The script includes the creation of an IAM role with necessary permissions, a SageMaker model, an endpoint configuration, and an endpoint. The values have been customized based on your specifications.

```hcl
terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = ">= 5.45.0"
    }
  }
}

###############################################################################
# VARIABLES
###############################################################################

variable "aws_region" {
  description = "AWS region to deploy resources in"
  type        = string
  default     = "us-west-2"
}

variable "execution_role_name" {
  description = "IAM role name that SageMaker will assume"
  type        = string
  default     = "iris-model-execution-role"
}

variable "model_name" {
  description = "Name of the SageMaker Model resource"
  type        = st

In [None]:
@tool()
def save_file_tool(file_path: str, content: str) -> str:
    """Save content to a file at the given path."""
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'w') as f:
        f.write(content)
    return f"Saved to {file_path}"

def extract_and_save_inference_files(response_str: str, save_dir: str = ''):
        llm = ChatOpenAI(model="gpt-4o-mini")

        tools = [save_file_tool]

        agent = initialize_agent(
            tools=tools,
            llm=llm,
            agent=AgentType.OPENAI_FUNCTIONS,
            verbose=True,
        )

        response = agent.invoke(
            f"""
            1. Extract all terraform code from the response string.
            2. For each extracted script:
                 - save it with appropriate name and extension like main.tf, etc. at {terraform_dir}
            3
            4. Use the `save_file_tool` tool to save the content.
            5. Do not output anything else except confirming tool usage.

            ### Text:
            {response_str}
            """
        )
        return response

In [120]:
extract_and_save_inference_files(response, terraform_dir)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `save_file_tool` with `{'file_path': '/Users/neel/Developer/sagemaker_test/terraform/main.tf', 'content': '###############################################################################\n# Terraform script for deploying a model on AWS SageMaker Endpoint\n###############################################################################\n\ntf {\n  required_providers {\n    aws = {\n      source  = "hashicorp/aws"\n      version = ">= 5.45.0"\n    }\n  }\n}\n\n###############################################################################\n# VARIABLES\n###############################################################################\n\nvariable "aws_region" {\n  description = "AWS region to deploy resources in"\n  type        = string\n  default     = "us-west-2"\n}\n\nvariable "execution_role_name" {\n  description = "IAM role name that SageMaker will assume"\n  type        = string\n  default     = "iris-model-executio

{'input': '\n            1. Extract all terraform code from the response string.\n            2. For each extracted script:\n                 - save it with appropriate name and extension like main.tf, etc. at /Users/neel/Developer/sagemaker_test/terraform\n            3. Use the `save_file_tool` tool to save the content.\n            4. Do not output anything else except confirming tool usage.\n\n            ### Text:\n            Below is the Terraform script tailored to your requirements for deploying a model on AWS SageMaker Endpoint:\n\n```hcl\n###############################################################################\n# Terraform script for deploying a model on AWS SageMaker Endpoint\n###############################################################################\n\nterraform {\n  required_providers {\n    aws = {\n      source  = "hashicorp/aws"\n      version = ">= 5.45.0"\n    }\n  }\n}\n\n###############################################################################\n# 

In [127]:
def instructions_to_run_terraform():
    message = f"Can you provide me the instructions to run the terraform script you generated? I want to know the commands to run in order to deploy the model on AWS SageMaker Endpoint. I also want to know how to destroy the resources once I am done."
    response=terrraform_generator_llm.invoke(message)
    print(response)

In [128]:
instructions_to_run_terraform()

Certainly! Below are the step-by-step instructions to run the Terraform script and deploy your model on AWS SageMaker Endpoint, as well as how to destroy the resources when you're done.

### Prerequisites

1. **Install Terraform**: Ensure that Terraform is installed on your machine. You can download it from the [Terraform website](https://www.terraform.io/downloads.html).

2. **AWS CLI Configuration**: Make sure you have the AWS CLI installed and configured with the necessary credentials. You can configure it using:
   ```bash
   aws configure
   ```
   This will prompt you to enter your AWS Access Key, Secret Key, region, and output format.

3. **Terraform Initialization**: Ensure you have a working directory where your Terraform script (`main.tf`) is saved.

### Running the Terraform Script

1. **Navigate to the Directory**: Open your terminal and navigate to the directory containing your Terraform script.
   ```bash
   cd path/to/your/terraform/directory
   ```

2. **Initialize Terr

## Integration testing

In [140]:
from aws_helper.sagemaker_endpoint import DeploySagemakerEndpoint

In [141]:
a = DeploySagemakerEndpoint(actor_critic_iterations=1)

In [None]:
a.get_inferece_script()

In [146]:
a.get_requirements_txt()

Generating requirements.txt...


In [148]:
a.dockerizing_the_model()

Generating Dockerfile...
Saving Dockerfile to: /Users/neel/Developer/sagemaker_test2/inference/Dockerfile
Dockerfile generated successfully.
Dockerfile validation
AI Certainly! Here are the step-by-step instructions to build, run, and test your Docker image:

### Step 1: Build the Docker Image

1. Open a terminal and navigate to the directory containing your Dockerfile and other project files.
2. Run the following command to build the Docker image with the name `iris_sagemaker`:

   ```bash
   docker build -t iris_sagemaker .
   ```

### Step 2: Run the Docker Image

1. Use the following command to run the Docker image, mounting your model file directory and setting the entrypoint to the `serve` script:

   ```bash
   docker run -d --name iris_container -p 8080:8080 -v /Users/neel/Developer/models:/opt/ml/model --entrypoint /opt/program/serve iris_sagemaker
   ```

   - `-d`: Runs the container in detached mode.
   - `--name iris_container`: Names the running container `iris_container`

In [149]:
a.ecr_image_full_name = "407683973276.dkr.ecr.us-west-2.amazonaws.com/sagemaker_models/iris:latest"

In [150]:
a.ecr_repo_name

In [151]:
a.upload_model_to_s3()

To upload the model to AWS S3, please provide the following information:
Uploading model to S3...
Model compressed to: /Users/neel/Developer/models/iris_model.tar.gz
upload: ../models/iris_model.tar.gz to s3://neel-sagemaker-models/iris_model2/model.tar.gz
File uploaded to S3: s3://neel-sagemaker-models/iris_model2/model.tar.gz
Temporary file removed: /Users/neel/Developer/models/iris_model.tar.gz
Model uploaded to S3 at: s3://neel-sagemaker-models/iris_model2/model.tar.gz


In [157]:
a.orchestrate_terraform_deployment()

To deploy the model on AWS SageMaker Endpoint, please provide the following information:
🔄 Max iterations reached. Returning final script.


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `save_file_tool` with `{'file_path': '/Users/neel/Developer/sagemaker_test2/terraform/main.tf', 'content': '###############################################################################\n# VARIABLES\n###############################################################################\n\nvariable "aws_region" {\n  description = "AWS region to deploy resources in"\n  type        = string\n  default     = "us-west-2"\n}\n\nvariable "execution_role_name" {\n  description = "IAM role name that SageMaker will assume"\n  type        = string\n  default     = "iris2-sagemaker-execution-role"\n}\n\nvariable "model_name" {\n  description = "Name of the SageMaker Model resource"\n  type        = string\n  default     = "iris2"\n}\n\nvariable "ecr_image_uri" {\n  description = "ECR image URI f