# RAS-Commander Code Assistant

Alpha, this only works with Claude 3.5 Sonnet for now

Future devlopement will include multi-turn support and ability to select between different models

Provide your own API key to make this work.


In [1]:
# User query
user_query = """   Make a table for each class file in the library with all functions, their arguments (with typing/expected input), and a short summary of the function's purpose.

"""

In [2]:
# Define files, folders and extensions to omit
omit_folders = [
    "Bald Eagle Creek", 
    "__pycache__", 
    ".git", 
    ".github", 
    "tests", 
    "build", 
    "dist", 
    "ras_commander.egg-info", 
    "venv", 
    "example_projects", 
    "llm_summary", "misc", "future", "ai_tools"
]

# Define file extensions to omit
omit_extensions = [
    # Common image file extensions
    '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg', '.ico',
    # Other binary file extensions
    '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
    '.zip', '.rar', '.7z', '.tar', '.gz',
    '.exe', '.dll', '.so', '.dylib',
    '.pyc', '.pyo', '.pyd',  # Python bytecode and compiled files
    '.class',  # Java bytecode
    '.log', '.tmp', '.bak', '.swp',  # Temporary and backup files
    '.bat', '.sh',  # Script files
]

# Define files to omit based on keywords
omit_files = [
    'FunctionList.md',
    'DS_Store',
    'Thumbs.db',
    'llmsummarize'
    'example_projects.zip',
    '11_accessing_example_projects.ipynb',
    'Example_Projects_6_5.zip'
    'github_code_assistant.ipynb',
    'example_projects.ipynb',
    '11_Using_RasExamples.ipynb',
    'example_projects.csv',
    'rascommander_code_assistant.ipynb',
    'RasExamples.py'
]

In [3]:
# Install necessary packages
def install_and_import(package_name, import_name=None):
    import subprocess
    import sys
    if import_name is None:
        import_name = package_name
    try:
        __import__(import_name)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
        __import__(import_name)
    print(f"Successfully imported {import_name}")

install_and_import("pandas")
install_and_import("anthropic")
install_and_import("tiktoken")
install_and_import("IPython", "IPython.display")
install_and_import("astor")

import os
from pathlib import Path
import pandas as pd
import anthropic
import tiktoken
import astor
from IPython.display import display, clear_output

print("All required packages have been installed and imported successfully.")

Successfully imported pandas
Successfully imported anthropic
Successfully imported tiktoken
Successfully imported IPython.display
Successfully imported astor
All required packages have been installed and imported successfully.


In [4]:
# Set API Key
#os.environ["ANTHROPIC_API_KEY"] = 'YOUR KEY HERE'


# Alternately, you can set the key from the file Anthropic_API_Key.txt
with open('C:\SCRATCH\Anthropic_API_Key.txt', 'r') as file:
    os.environ["ANTHROPIC_API_KEY"] = file.read().strip()

In [5]:
# Define system message from the ras_commander .cursorrules file
from pathlib import Path

def read_system_message():
    # Get the current notebook's directory
    current_dir = Path.cwd()
    
    # Path to the .cursorrules file (assuming it's in the parent directory)
    cursor_rules_path = current_dir.parent / '.cursorrules'

    # Check if .cursorrules exists
    if not cursor_rules_path.exists():
        raise FileNotFoundError("This notebook expects to be in a directory within the ras_commander repo which has a .cursorrules file in its parent directory.")

    # Read the .cursorrules file as plain text
    with open(cursor_rules_path, 'r') as f:
        system_message = f.read().strip()

    if not system_message:
        raise ValueError("No system message found in .cursorrules file.")

    return system_message

# Read the system message from .cursorrules
system_message = read_system_message()

print("System message loaded successfully.")

System message loaded successfully.


In [6]:
# Define folder as the parent folder (since this notebook lives in the ai_tools folder)
# Get the current notebook's directory
current_dir = Path.cwd()

# Set the context folder to the parent of the current directory
context_folder = current_dir.parent

print(f"Context folder set to: {context_folder}")


Context folder set to: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7


In [7]:
# Function to compile codebase, omitting specified folders, extensions, and files

import tiktoken
from pathlib import Path

def strip_code_from_functions(content):
    """
    Strip the code from functions, leaving only function signatures and docstrings.
    
    Args:
    content (str): The content of a Python file.
    
    Returns:
    str: The content with function bodies removed.
    """
    import ast
    import astor

    class FunctionStripper(ast.NodeTransformer):
        def visit_FunctionDef(self, node):
            # Keep the function signature and docstring (if present)
            new_node = ast.FunctionDef(
                name=node.name,
                args=node.args,
                body=[ast.Pass()],  # Replace the body with a pass statement
                decorator_list=node.decorator_list,
                returns=node.returns
            )
            # If there's a docstring, keep it
            if (len(node.body) > 0 and isinstance(node.body[0], ast.Expr) and
                isinstance(node.body[0].value, ast.Str)):
                new_node.body = [node.body[0], ast.Pass()]
            return new_node

    try:
        tree = ast.parse(content)
        stripped_tree = FunctionStripper().visit(tree)
        return astor.to_source(stripped_tree)
    except SyntaxError:
        # If parsing fails, return the original content
        return content

def combine_files(summarize_subfolder, omit_folders, omit_extensions, omit_files, strip_code=False):
    combined_text = ""
    file_token_counts = {}
    
    # Get the name of this notebook
    this_notebook = Path.cwd().name
    print(f"Notebook name: {this_notebook}")

    # Ensure summarize_subfolder is a Path object
    summarize_subfolder = Path(summarize_subfolder)
    print(f"Subfolder to summarize: {summarize_subfolder}")

    # Define the output file name based on the folder name
    output_file_name = f"{summarize_subfolder.name}_code_only{'_stripped' if strip_code else ''}.txt"
    output_file_path = Path.cwd().parent / "llm_summary" / output_file_name
    print(f"Output file path: {output_file_path}")

    # Ensure the output directory exists
    output_file_path.parent.mkdir(parents=True, exist_ok=True)
    print(f"Output directory ensured to exist: {output_file_path.parent}")

    # Initialize tokenizer
    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")

    # Open the output file
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        print(f"Opened output file: {output_file_path}")
        # Iterate over all files and subfolders in the summarize_subfolder directory
        for filepath in summarize_subfolder.rglob('*'):
            # Check if the file is not this notebook, not in the omit_folders, not in omit_extensions, and not in omit_files
            if (filepath.name != this_notebook and 
                not any(omit_folder in filepath.parts for omit_folder in omit_folders) and
                filepath.suffix.lower() not in omit_extensions and
                not any(omit_file in filepath.name for omit_file in omit_files)):
                # Write the filename or folder name
                if filepath.is_file():
                    outfile.write(f"File: {filepath}\n")
                else:
                    outfile.write(f"Folder: {filepath}\n")
                outfile.write("="*50 + "\n")  # Separator
                
                # If it's a file, open and read the contents of the file
                if filepath.is_file():
                    try:
                        with open(filepath, 'r', encoding='utf-8') as infile:
                            content = infile.read()
                    except UnicodeDecodeError:
                        with open(filepath, 'rb') as infile:
                            content = infile.read()
                            content = content.decode('utf-8', errors='ignore')
                    
                    # Strip code if the option is enabled and it's a Python file
                    if strip_code and filepath.suffix.lower() == '.py':
                        content = strip_code_from_functions(content)
                    
                    # Write the contents to the output file
                    outfile.write(content)
                    
                    # Count tokens for this file
                    file_tokens = len(enc.encode(content))
                    file_token_counts[str(filepath)] = file_tokens
                
                # Write a separator after the file contents or folder name
                outfile.write("\n" + "="*50 + "\n\n")
            else:
                dummy = 0

    print(f"All files and folders have been combined into '{output_file_path}'")

    # Count total tokens
    with open(output_file_path, 'r', encoding='utf-8') as f:
        combined_text = f.read()
    token_count = len(enc.encode(combined_text))
    
    return combined_text, token_count, file_token_counts


# Combine files while keeping code
combined_text, token_count, file_token_counts = combine_files(context_folder, omit_folders, omit_extensions, omit_files)

# Combine files while stripping code
combined_text, token_count, file_token_counts = combine_files(context_folder, omit_folders, omit_extensions, omit_files, strip_code=True)

Notebook name: ai_tools
Subfolder to summarize: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7
Output file path: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\llm_summary\ras_commander workspace7_code_only.txt
Output directory ensured to exist: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\llm_summary
Opened output file: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\llm_summary\ras_commander workspace7_code_only.txt
All files and folders have been combined into 'c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\llm_summary\ras_commander workspace7_code_only.txt'
Notebook name: ai_tools
Subfolder to summarize: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7
Output file path: c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ra

In [8]:
# Sort files by token count and get top 20
top_20_files = sorted(file_token_counts.items(), key=lambda x: x[1], reverse=True)[:20]

print("\nTop 20 files by token count:")
for file, count in top_20_files:
    print(f"{file}: {count} tokens")


Top 20 files by token count:
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\ras_commander\RasPlan.py: 3796 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\README.md: 3099 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\Comprehensive_Library_Guide.md: 2391 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\ras_commander\RasPrj.py: 1711 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\ras_commander\RasUtils.py: 1507 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\STYLE_GUIDE.md: 1461 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\ras_commander\RasCommander.py: 1392 tokens
c:\Users\billk\Desktop\AWS Webinar AI for HEC-RAS\ras_commander\ras_commander workspace7\ras_co

In [9]:
# Check the total token count
print(f"Combined text token count: {token_count}")

Combined text token count: 22963


In [10]:
# Set up Anthropic client
def stream_response(client, full_prompt, max_tokens=4096):
    response_text = ""
    with client.messages.stream(
        max_tokens=max_tokens,
        messages=[
            {"role": "user", "content": full_prompt}
        ],
        model="claude-3-sonnet-20240229"
    ) as stream:
        for text in stream.text_stream:
            response_text += text
            clear_output(wait=True)
            print("Claude's response:")
            print(response_text)
            
    return response_text

def estimate_cost(input_tokens, output_tokens, pricing_df):
    model = "Claude 3.5 Sonnet"
    input_cost = (input_tokens / 1e6) * pricing_df.loc[pricing_df['Model'] == model, 'Input ($/MTok)'].values[0]
    output_cost = (output_tokens / 1e6) * pricing_df.loc[pricing_df['Model'] == model, 'Output ($/MTok)'].values[0]
    return input_cost + output_cost


# Set up Anthropic client
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))

# Create pricing dataframe
pricing_data = {
    "Model": ["Claude 3.5 Sonnet"],
    "Input ($/MTok)": [3],
    "Output ($/MTok)": [15],
    "Prompt Caching Write ($/MTok)": [3.75],
    "Prompt Caching Read ($/MTok)": [0.30],
    "Context Window": [200000]
}

pricing_df = pd.DataFrame(pricing_data)
print("Pricing DataFrame:")
display(pricing_df)

# Combine system message, context, and user query
full_prompt = f"{system_message}\n\nContext:\n{combined_text}\n\n\n\n\nUser Query: {user_query}"

# Estimate cost
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
input_tokens = token_count + len(enc.encode(user_query))
output_tokens = 8192 # assuming full response
estimated_cost = estimate_cost(input_tokens, output_tokens, pricing_df)

print(f"\nEstimated cost: ${estimated_cost:.4f}")

Pricing DataFrame:


Unnamed: 0,Model,Input ($/MTok),Output ($/MTok),Prompt Caching Write ($/MTok),Prompt Caching Read ($/MTok),Context Window
0,Claude 3.5 Sonnet,3,15,3.75,0.3,200000



Estimated cost: $0.1919


#### See the cost per message above, and add additional file/folder filters if desired

In [11]:
# raise error to prevent automatically querying model and incurring costs
raise ValueError("This is a test error to prevent automatically querying model and incurring costs")


ValueError: This is a test error to prevent automatically querying model and incurring costs

In [12]:
# Stream Claude's response
response_text = stream_response(client, full_prompt)

Claude's response:
Sure, here's a table for each class file in the ras_commander library, with all functions, their arguments (with typing/expected input), and a short summary of the function's purpose.

1. `RasCommander.py`

| Function | Arguments | Summary |
|----------|-----------|---------|
| `compute_plan` | `plan_number: str`, `compute_folder: Optional[Union[str, Path]] = None`, `ras_object: Optional[RasPrj] = None` | Execute a HEC-RAS plan. |
| `compute_test_mode` | `plan_numbers: Optional[List[str]] = None`, `folder_suffix: str = '[Test]'`, `clear_geompre: bool = False`, `max_cores: Optional[int] = None`, `ras_object: Optional[RasPrj] = None` | Execute HEC-RAS plans in test mode. |
| `compute_parallel` | `plan_numbers: Optional[List[str]] = None`, `max_workers: int = 2`, `cores_per_run: int = 2`, `ras_object: Optional[RasPrj] = None`, `dest_folder: Optional[Union[str, Path]] = None` | Execute HEC-RAS plans in parallel using multiple worker threads. |
| `worker_thread` | `worker

-----

# Example Response

Claude's response:
Sure, I'll review the code and identify any duplicate or redundant sections that can be removed or consolidated to reduce the overall size of the library.

1. `RasFolderInit.py`:
   - The `get_plan_entries`, `get_flow_entries`, `get_unsteady_entries`, and `get_geom_entries` functions follow a similar pattern. They can be consolidated into a single function that takes a file type as an argument.
   - The `find_ras_prj` function could potentially be merged with the `get_project_name` function, as they both operate on project file paths.

2. `RasPlan.py`:
   - The `get_plan_path`, `get_flow_path`, `get_unsteady_path`, and `get_geom_path` functions are very similar and could be combined into a single function that takes a file type and number as arguments.
   - The `set_geom`, `set_flow`, and `set_unsteady` functions have a lot of overlapping code, particularly for updating plan files. These could be consolidated into a single function that takes the file type and number as arguments.

3. `RasUtils.py`:
   - The `create_backup` and `restore_from_backup` functions have some overlap in their logic and could potentially be combined into a single function with an additional argument to specify whether to create or restore a backup.
   - The `remove_file` function could be merged with `remove_folder_with_retry` by adding an optional argument to handle file removal.

4. `RasCommander.py`:
   - The `compute_plan` and `compute_plan_from_folder` functions have a lot of overlapping code. These could be combined into a single function with an optional argument to specify the folder path.

5. `RasPrj.py`:
   - The `find_ras_prj` function is already present in `RasFolderInit.py`, so it could be removed from this module.
   - The `load_project_data` function is not used anywhere else in the codebase, so it could be removed unless it has a planned future use.

After consolidating these functions and removing any redundant code, the overall size of the library should be reduced. However, it's important to ensure that the functionality remains intact and that any changes are thoroughly tested.

Estimated cost: $0.2435
