In [None]:
# Import pieces from codeflare-sdk
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication

In [None]:
# Create authentication object for user permissions
# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config
# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually
# Replace TOKEN and SERVER with the actual values
auth = TokenAuthentication(
    token = "TOKEN",
    server = "SERVER",
    skip_tls=True
)
auth.login()

In [None]:
# Create and configure our cluster object (and appwrapper)
cluster_name="terrestrial-raytest"
cluster = Cluster(ClusterConfiguration(
    name=cluster_name,
    head_cpus=1,
    head_memory=4,
    num_workers=2,
    min_cpus=1,
    max_cpus=1,
    min_memory=4,
    max_memory=4,
    num_gpus=0,
    image="quay.io/modh/ray:2.35.0-py311-cu121"
))

In [None]:
# Bring up the cluster
cluster.up()
cluster.wait_ready()

In [None]:
cluster.status()

In [None]:
from codeflare_sdk import generate_cert
# Create required TLS cert and export the environment variables to enable TLS
generate_cert.generate_tls_cert(cluster_name, cluster.config.namespace)
generate_cert.export_env(cluster_name, cluster.config.namespace)

In [None]:
!pip install numpy
!pip install ray[default]==2.35.0
!pip install onnxruntime
!pip install --upgrade pyarrow fsspec

In [None]:
import ray

ray_cluster_uri = cluster.cluster_uri()
ray.shutdown()

In [None]:
# Additional libs
runtime_env = {"pip": ["ipython", "torch" , "onnx", "ray[train]", "protobuf==3.20.1"]}

ray.init(address=ray_cluster_uri, runtime_env=runtime_env,_system_config={"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python"} )

print("Ray cluster is up and running: ", ray.is_initialized())

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from ray import tune
import time
import os
import tempfile

# Import ONNX-related libraries
import torch.onnx
import onnx
from onnxruntime import InferenceSession

os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Define a function to train and evaluate the model
def train_evaluate(config):
    input_size = 10
    output_size = 1

    # Instantiate the neural network with the hyperparameters
    model = SimpleNet(input_size, config["hidden_size"], output_size)

    # Define a dummy dataset for illustration purposes
    X = torch.randn(100, input_size)
    y = torch.randn(100, output_size)

    # Dummy DataLoader
    dataset = TensorDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

    # Define loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])
    
    time.sleep(10)
    
    # Training loop
    for epoch in range(10):  # Adjust as needed
        for inputs, targets in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

    # Evaluate the model (for simplicity, just return a dummy accuracy)
    accuracy = torch.rand(1).item()

    # Return a dictionary containing the accuracy and the model
    return {"accuracy": accuracy, "model": model}

# Define the hyperparameter search space
search_space = {
    "hidden_size": tune.choice([5, 10, 20]),
    "lr": tune.loguniform(1e-4, 1e-1),
}

# Run the raytune
analysis = tune.run(
    train_evaluate,
    config=search_space,
    num_samples=2,  # Number of trials
    resources_per_trial={"cpu": 1},
    name="raytune_hyperparameter_tuning_example",
)

# Get the best configuration and result
best_trial = analysis.get_best_trial("accuracy", "max", "last")
best_config = best_trial.config
best_accuracy = best_trial.last_result["accuracy"]
best_model = best_trial.last_result["model"]

print(f"Best hyperparameters: {best_config}")
print(f"Best accuracy: {best_accuracy}")


In [None]:
## Save the best model
# Create a directory to save the optimal HPO model
hpo_folder = "models/hpo/"
os.makedirs(hpo_folder, exist_ok=True)
onnx_model_path = os.path.join(hpo_folder, "model.onnx")

# Save the best model to a file in ONNX format
dummy_input = torch.tensor([[0.3111400080477545, 1.9459399775518593, 1.0, 0.0, 0.0, 1.2, 3.4, -0.5, 0.8, -2.0]])
torch.onnx.export(best_model, dummy_input, onnx_model_path, verbose=True)

print(f"Best model saved to {onnx_model_path} in ONNX format")


In [None]:
import os
import boto3
import botocore

aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
endpoint_url = os.environ.get('AWS_S3_ENDPOINT')
region_name = os.environ.get('AWS_DEFAULT_REGION')
bucket_name = os.environ.get('AWS_S3_BUCKET')

session = boto3.session.Session(aws_access_key_id=aws_access_key_id,
                                aws_secret_access_key=aws_secret_access_key)

s3_resource = session.resource(
    's3',
    config=botocore.client.Config(signature_version='s3v4'),
    endpoint_url=endpoint_url,
    region_name=region_name)

bucket = s3_resource.Bucket(bucket_name)
print(bucket)

def upload_directory_to_s3(local_directory, s3_prefix):
    for root, dirs, files in os.walk(local_directory):
        for filename in files:
            file_path = os.path.join(root, filename)
            relative_path = os.path.relpath(file_path, local_directory)
            s3_key = os.path.join(s3_prefix, relative_path)
            print(f"{file_path} -> {s3_key}")
            bucket.upload_file(file_path, s3_key)
    return True

def list_objects(prefix):
    filter = bucket.objects.filter(Prefix=prefix)
    for obj in filter.all():
        print(obj.key)

In [None]:
# List the objects from
list_objects("models")

In [None]:
# Upload the model to the S3 directory
upload_directory_to_s3("models", "models")

In [246]:
# Details to access the model through REST API
deployed_model_name = "hpo"
rest_url = "http://modelmesh-serving.pcelesti:8008"
infer_url = f"{rest_url}/v2/models/{deployed_model_name}/infer"

In [None]:
## Get the input_names from the model
# Load the ONNX model
onnx_model = onnx.load("models/hpo/model.onnx")

# Print input names
input_names = [input.name for input in onnx_model.graph.input]
print("Input Names:", input_names)

In [None]:
import requests
import numpy as np

def onnx_rest_request(data, infer_url):
    # Convert the input data to a numpy array
    input_array = np.array(data, dtype=np.float32).reshape(1, 10)

    # Convert the numpy array to a list for JSON serialization
    input_list = input_array.tolist()

    # Create the JSON payload for the REST request
    json_data = {
        "inputs": [
            {
                "name": "onnx::Gemm_0",
                "shape": input_array.shape,
                "datatype": "FP32",
                "data": input_list
            }
        ]
    }

    # Make the REST request
    response = requests.post(infer_url, json=json_data)
    print(response.content)

    # Check for successful response (status code 200)
    if response.status_code == 200:
        response_dict = response.json()
        # Extract and return the predictions from the response
        return response_dict['outputs'][0]['data']
    else:
        # Print an error message for unsuccessful requests
        print(f"Error: {response.status_code}")
        return None

In [None]:
# Predict for the given data
data = [0.3111400080477545, 1.9459399775518593, 1.0, 2.0, 3.0, 1.2, 0.4, 0.5, 0.8, 2.0]
prediction = onnx_rest_request(data,infer_url)
print("Model Prediction:", prediction)