In [3]:
import runpod #pip install runpod
import time
import os
import subprocess
from dotenv import load_dotenv
import pyperclip #pip install pyperclip==1.9.0

In [14]:
load_dotenv(os.path.join(os.path.dirname(os.getcwd()), "env"))  # Load env file from parent directory
runpod.api_key = os.getenv('RUNPOD_API_KEY') #Load API key from environment variable
files_path = os.path.join(os.path.dirname(os.getcwd())) #Path of the files to copy to the pod
save_path = os.path.join(os.path.dirname(os.getcwd()), "LLM_outputs") #Path of the files to copy to the pod

Create Pod - Had to modify the files to take 'start_jupyter' as input, as stated in [this](https://github.com/runpod/runpod-python/pull/328/commits/071484c10438f546666667c7a2f38ad143beb435) issue

In [18]:
# Create a pod - https://github.com/runpod/runpod-python/blob/main/runpod/api/ctl_commands.py
pod = runpod.create_pod(name="testnew", #Set a name for the pod
                        image_name="runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04", #Set the image to use
                        gpu_type_id="NVIDIA A40", #runpod.get_gpus() for all gpu types #"NVIDIA A40","NVIDIA RTX A4500", "NVIDIA GeForce RTX 3080"
                        cloud_type="COMMUNITY", #"ALL", "COMMUNITY", "SECURE"
                        support_public_ip=True, #This is the default
                        # country_code="FR", #Data should remain within the EU
                        container_disk_in_gb=100, #Set the disk size of the container
                        volume_in_gb=200, #Set the volume size of the pod
                        ports="8888/http,22/tcp", #Set the ports to expose
                        volume_mount_path="/workspace", #Set the volume mount path - Should modified below if changed here
                        start_jupyter=True, #Start the Jupyter notebook - Not exist in official documentation, found through issues.
                        )

In [None]:
# Get all my pods
pods = runpod.get_pods()
pods #Confirm the pod was created

In [None]:
time.sleep(90) #Wait for the pod to be ready and get the http url
exposed_pod = runpod.get_pods()[0]
exposed_pod

In [21]:
port_number=int(exposed_pod['ports'].split(',')[1].split('/')[0]) # Extract 22 from '8888/http,22/tcp'

ip, public_port = [port for port in exposed_pod['runtime']['ports'] if port['privatePort'] == port_number][0]['ip'], \
                    [port for port in exposed_pod['runtime']['ports'] if port['privatePort'] == port_number][0]['publicPort']

Connect via SSH to the Pod

In [None]:
# First connect via SSH to accept the host key
ssh_command = f"ssh -p {public_port} -i ~/.ssh/id_ed25519 -o StrictHostKeyChecking=accept-new root@{ip} exit"
subprocess.run(ssh_command, shell=True, check=True)

Copy files from local PC to the Pod

In [None]:
source_dir = files_path.replace('\\', '/')
# Copy all files in source directory
scp_command = f"scp -P {public_port} -i ~/.ssh/id_ed25519 {source_dir}/* root@{ip}:/workspace/"

try:
    result = subprocess.run(scp_command, shell=True, check=True, capture_output=True, text=True)
    print("Successfully copied all files.")
except subprocess.CalledProcessError as e:
    print(f"Error copying files: {e.stderr}")

# Copy llm_evaluation_framework folder recursively 
scp_framework_command = f"scp -r -P {public_port} -i ~/.ssh/id_ed25519 {source_dir}/llm_evaluation_framework root@{ip}:/workspace/"

try:
    result = subprocess.run(scp_framework_command, shell=True, check=True, capture_output=True, text=True)
    print("Successfully copied llm_evaluation_framework folder.")
except subprocess.CalledProcessError as e:
    print(f"Error copying llm_evaluation_framework folder: {e.stderr}")

Create environment and install dependencies inside the Pod

In [None]:
# SSH command to connect and execute multiple commands - Takes ~16mins to complete
ssh_commands = [
    "python -m venv /workspace/myenv",
    "source /workspace/myenv/bin/activate", 
    "cd /workspace",
    "pip install --upgrade ipykernel",
    "python -m ipykernel install --name myenv --user --display-name 'Python (myenv)'",
    "pip install -r requirements_rag.txt && pip install flash-attn==2.6.3", #requirements.txt copied in the previous step
    "pip install autoawq==0.2.8", #Not compatible with colpali transformers version
    "pip install triton==3.2.0", 
    "pip install transformers==4.49.0",
    "pip install langchain-google-genai==2.1.3",

    # #Use below for Visual RAG with colpali - Also copy pdf and img folder with imgs from pdf - deactivate above autoawq and transformers installation
    # "pip install cohere==5.15.0",
    # "pip install --upgrade byaldi",
    # "apt-get update",
    # "apt-get install -y poppler-utils",  # not working in macOS
    # "pip install -q pdf2image transformers==4.51.3 qwen-vl-utils",  # needed to get Qwen - git+https://github.com/huggingface/transformers.git
    # "pip uninstall flash-attn -y",
    # "pip install ninja",
    # "pip install packaging",
    # "git clone https://github.com/Dao-AILab/flash-attention",
    # "cd flash-attention",
    # "pip install .",
    # "pip uninstall torch torchvision -y",
    # "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121",
    # "pip install ipywidgets",
    # "cd ..",
]

# Join commands with semicolons for sequential execution
command_string = "; ".join(ssh_commands)
ssh_command = f"""ssh root@{ip} -p {public_port} -i ~/.ssh/id_ed25519 "{command_string}" """

# Execute the SSH command with live output streaming
process = subprocess.Popen(ssh_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Stream output in real-time
while True:
    output = process.stdout.readline()
    if output == '' and process.poll() is not None:
        break
    if output:
        print(output.strip())

# Print any errors after completion
if process.returncode != 0:
    print("Error executing commands:")
    print(process.stderr.read())

Install environment and dependencies for executing code to evaluate water networks questions

In [None]:
# Create a new virtual environment for testing
venv_name = "test_LLM"
command_install = f"ssh root@{ip} -p {public_port} -i ~/.ssh/id_ed25519 \"python -m venv /workspace/{venv_name}\""
print("Creating test environment...")

try:
    # Execute the command and capture the output
    result_install = subprocess.check_output(command_install, shell=True, stderr=subprocess.STDOUT, text=True)
    print("Venv installation:", result_install)
except subprocess.CalledProcessError as e:
    result_install = f"Error in installation: {e.output}"
    print(result_install)

# Activate the virtual environment and install requirements
activate_cmd = f"source /workspace/{venv_name}/bin/activate"
requirements_file = "requirements_code_execution.txt"

try:
    install_cmd = f"bash -c '{activate_cmd} && pip install -r /workspace/{requirements_file}'"
    ssh_command = f"ssh root@{ip} -p {public_port} -i ~/.ssh/id_ed25519 \"{install_cmd}\""
    
    # Execute the SSH command with live output streaming
    process = subprocess.Popen(ssh_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    
    # Stream output in real-time
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    
    # Check for errors
    if process.returncode != 0:
        print("Error installing requirements:")
        print(process.stderr.read())
    else:
        print(f"Successfully installed requirements in {venv_name} environment \n")
        
except Exception as e:
    print(f"Error in requirements installation: {str(e)} \n")



# Install WNTR and other required packages
try:
    install_cmd = f"bash -c '{activate_cmd} && pip install --upgrade pip setuptools wheel && pip install --upgrade --force-reinstall numpy && pip install --upgrade --force-reinstall wntr'"
    ssh_command = f"ssh root@{ip} -p {public_port} -i ~/.ssh/id_ed25519 \"{install_cmd}\""
    
    # Execute the SSH command with live output streaming
    process = subprocess.Popen(ssh_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    
    # Stream output in real-time
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    
    # Check for errors
    if process.returncode != 0:
        print("Error installing WNTR:")
        print(process.stderr.read())
    else:
        print("Successfully installed WNTR and dependencies \n")
        
except Exception as e:
    print(f"Error in WNTR installation: {str(e)} \n")

In [None]:
# Extract Jupyter password from env variables
jupyter_password = [env.split('=')[1] for env in exposed_pod['env'] if env.startswith('JUPYTER_PASSWORD')][0]
jupyter_port = exposed_pod['ports'].split("/")[0]  # Extract 8888 from "8888/http,22/tcp"
jupyter_url = "https://" + exposed_pod['id'] + "-" + jupyter_port +".proxy.runpod.net/?token=" + jupyter_password
jupyter_url

In [None]:
#Copy the above variable to the clipboard - Best time to run until here should be 3.5mins
pyperclip.copy(jupyter_url)
print("Jupyter URL copied to clipboard!")

Copy files from Pod to local PC

In [None]:
# Copy output files from remote to local
file_patterns = ["*.xlsx*", "*.png", "*.json", "*.txt"]  # File patterns to copy
local_dest = save_path

# Ensure local destination directory exists
os.makedirs(local_dest, exist_ok=True)

# Copy each file pattern
for pattern in file_patterns:
    # Construct scp command to copy files from remote to local
    scp_command = f"scp -P {public_port} -i ~/.ssh/id_ed25519 root@{ip}:/workspace/{pattern} {local_dest}"

    try:
        result = subprocess.run(scp_command, shell=True, check=True, capture_output=True, text=True)
        print(f"Successfully copied {pattern} files to {local_dest}")
    except subprocess.CalledProcessError as e:
        print(f"Error copying {pattern} files: {e.stderr}")

# Copy llm_evaluation_framework folder
scp_command = f"scp -r -P {public_port} -i ~/.ssh/id_ed25519 root@{ip}:/workspace/llm_evaluation_framework {local_dest}"

try:
    result = subprocess.run(scp_command, shell=True, check=True, capture_output=True, text=True)
    print(f"Successfully copied llm_evaluation_framework folder to {local_dest}")
except subprocess.CalledProcessError as e:
    print(f"Error copying llm_evaluation_framework folder: {e.stderr}")

Terminate the Pod

In [29]:
# runpod.terminate_pod(pods[0]['id'])
# runpod.get_pods()