From ecf874f1a7857d858e4baaadafa4ab8a7bb897c5 Mon Sep 17 00:00:00 2001 From: Donny Greenberg Date: Thu, 16 May 2024 00:09:58 -0400 Subject: [PATCH] Update cloud quickstart and notebook warning (#801) (cherry picked from commit ec9e9a0e17a4bdb903b4929bdedbdbe34aacab61) --- docs/tutorials/api-envs.rst | 2 +- docs/tutorials/api-modules.rst | 2 +- docs/tutorials/api-secrets.rst | 4 +- docs/tutorials/async.rst | 4 +- docs/tutorials/quick-start-cloud.rst | 135 ++++++++++++----------- docs/tutorials/quick-start-den.rst | 89 ++++++++++----- docs/tutorials/quick-start-local.rst | 2 +- runhouse/resources/functions/function.py | 7 +- 8 files changed, 147 insertions(+), 98 deletions(-) diff --git a/docs/tutorials/api-envs.rst b/docs/tutorials/api-envs.rst index 7d1404de8..c2366c74a 100644 --- a/docs/tutorials/api-envs.rst +++ b/docs/tutorials/api-envs.rst @@ -120,7 +120,7 @@ vars/secrets synced over, and cached on the cluster. .. parsed-literal:: :class: code-output - INFO | 2024-02-28 21:24:52.915177 | Writing out function to /Users/caroline/Documents/runhouse/notebooks/docs/np_sum_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2024-02-28 21:24:52.915177 | Because this function is defined in a notebook, writing it out to /Users/caroline/Documents/runhouse/notebooks/docs/np_sum_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). INFO | 2024-02-28 21:25:03.923658 | SSH tunnel on to server's port 32300 via server's ssh port 22 already created with the cluster. INFO | 2024-02-28 21:25:04.162828 | Server rh-cluster is up. INFO | 2024-02-28 21:25:04.166104 | Copying package from file:///Users/caroline/Documents/runhouse/notebooks to: rh-cluster diff --git a/docs/tutorials/api-modules.rst b/docs/tutorials/api-modules.rst index 9d9eec1a2..2dc3dffcd 100644 --- a/docs/tutorials/api-modules.rst +++ b/docs/tutorials/api-modules.rst @@ -66,7 +66,7 @@ are set up. .. parsed-literal:: :class: code-output - INFO | 2024-02-27 20:21:54.329646 | Writing out function to /Users/caroline/Documents/runhouse/notebooks/docs/np_sum_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2024-02-27 20:21:54.329646 | Because this function is defined in a notebook, writing it out to a file to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). Functions defined in Python files can be used normally. INFO | 2024-02-27 20:21:55.378194 | Server rh-cluster is up. INFO | 2024-02-27 20:21:55.384844 | Copying package from file:///Users/caroline/Documents/runhouse/notebooks to: rh-cluster INFO | 2024-02-27 20:22:06.614361 | Calling base_env.install diff --git a/docs/tutorials/api-secrets.rst b/docs/tutorials/api-secrets.rst index 25477e188..b11f203d9 100644 --- a/docs/tutorials/api-secrets.rst +++ b/docs/tutorials/api-secrets.rst @@ -415,7 +415,7 @@ them as part of a ``rh.env()``. .. parsed-literal:: :class: code-output - INFO | 2023-12-20 17:43:16.529605 | Writing out function to /Users/caroline/Documents/runhouse/runhouse/docs/notebooks/api/_get_env_var_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2023-12-20 17:43:16.529605 | Because this function is defined in a notebook, writing it out to a file to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). Functions defined in Python files can be used normally. INFO | 2023-12-20 17:43:16.540215 | Setting up Function on cluster. INFO | 2023-12-20 17:43:16.543037 | Copying package from file:///Users/caroline/Documents/runhouse/runhouse to: example-cluster INFO | 2023-12-20 17:43:16.544655 | Running command: ssh -T -i ~/.ssh/sky-key -o Port=22 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o IdentitiesOnly=yes -o ExitOnForwardFailure=yes -o ServerAliveInterval=5 -o ServerAliveCountMax=3 -o ConnectTimeout=30s -o ForwardAgent=yes -o ControlMaster=auto -o ControlPath=/tmp/skypilot_ssh_caroline/41014bb4d3/%C -o ControlPersist=300s ubuntu@44.201.245.202 'bash --login -c -i '"'"'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (mkdir -p ~/runhouse/)'"'"' 2>&1' @@ -541,7 +541,7 @@ from function and system calls running in the environment. .. parsed-literal:: :class: code-output - INFO | 2023-12-20 17:48:29.631094 | Writing out function to /Users/caroline/Documents/runhouse/runhouse/docs/notebooks/api/_get_env_var_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2023-12-20 17:48:29.631094 | Because this function is defined in a notebook, writing it out to a file to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). Functions defined in Python files can be used normally. INFO | 2023-12-20 17:48:29.662722 | Setting up Function on cluster. INFO | 2023-12-20 17:48:29.664560 | Copying package from file:///Users/caroline/Documents/runhouse/runhouse to: example-cluster INFO | 2023-12-20 17:48:29.665912 | Running command: ssh -T -i ~/.ssh/sky-key -o Port=22 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o IdentitiesOnly=yes -o ExitOnForwardFailure=yes -o ServerAliveInterval=5 -o ServerAliveCountMax=3 -o ConnectTimeout=30s -o ForwardAgent=yes -o ControlMaster=auto -o ControlPath=/tmp/skypilot_ssh_caroline/41014bb4d3/%C -o ControlPersist=300s ubuntu@44.201.245.202 'bash --login -c -i '"'"'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (mkdir -p ~/runhouse/)'"'"' 2>&1' diff --git a/docs/tutorials/async.rst b/docs/tutorials/async.rst index bd89d5cfa..f6226443f 100644 --- a/docs/tutorials/async.rst +++ b/docs/tutorials/async.rst @@ -73,7 +73,7 @@ function: .. parsed-literal:: :class: code-output - INFO | 2024-04-30 18:50:35.023995 | Writing out function to /Users/rohinbhasin/work/notebooks/docs/async_test_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2024-04-30 18:50:35.023995 | Because this function is defined in a notebook, writing it out to a file to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). Functions defined in Python files can be used normally. INFO | 2024-04-30 18:50:35.060478 | Sending module async_test of type to local Runhouse daemon @@ -130,7 +130,7 @@ of this: .. parsed-literal:: :class: code-output - INFO | 2024-04-30 18:57:00.533012 | Writing out function to /Users/rohinbhasin/work/notebooks/docs/synchronous_sleep_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2024-04-30 18:57:00.533012 | Because this function is defined in a notebook, writing it out to a file to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). Functions defined in Python files can be used normally. INFO | 2024-04-30 18:57:00.577673 | Sending module synchronous_sleep of type to local Runhouse daemon diff --git a/docs/tutorials/quick-start-cloud.rst b/docs/tutorials/quick-start-cloud.rst index 5998ff3bf..298729e5a 100644 --- a/docs/tutorials/quick-start-cloud.rst +++ b/docs/tutorials/quick-start-cloud.rst @@ -6,13 +6,16 @@ Cloud Quick Start

Open In Colab

-Runhouse lets you quickly and easily deploy your Python code as -production-grade applications on your own infra. +Runhouse lets you easily deploy and run Python subroutines on remote +infrastructure, granting you access to boundless compute from inside +your Python interpreter. This tutorial demonstrates how to -- Start a cloud VM with the Runhouse API server running on it -- Send a locally defined function onto the VM to serve it as a service. +- Connect to an existing remote IP, fresh cloud VM, or fresh Kubernetes + pod in Python as a Runhouse cluster +- Send a locally defined function onto the remote compute and call it + as a service Installing Runhouse ------------------- @@ -32,6 +35,10 @@ for launching fresh VMs through your cloud provider. !pip install "runhouse[sky]" +.. code:: ipython3 + + import runhouse as rh + Local Python Function --------------------- @@ -41,9 +48,9 @@ wrappers, or configs are necessary. .. code:: ipython3 - def get_pid(a = 0): - import os - return os.getpid() + def get_platform(a = 0): + import platform + return platform.platform() Runhouse Cluster ---------------- @@ -52,10 +59,11 @@ In Runhouse, a “cluster” is a unit of compute, somewhere you can send code, data, or requests to execute. We define a Runhouse cluster using the ``rh.cluster`` factory function. -This requires having access to a cluster or a cloud provider account. If -you do not have access to a cluster, you can try the `local -version `__ of this -tutorial, which sets up and deploys the Python function to a local +This requires having access to an existing box (via SSH), a cloud +provider account, or a Kubernetes cluster (~/.kube/config). If you do +not have access to a cluster, you can try the `local +version `__ of +this tutorial, which sets up and deploys the Python function to a local server, rather than a remote cluster. To use a cluster that’s already running: @@ -69,10 +77,10 @@ To use a cluster that’s already running: ) If you do not have a cluster up, but have cloud credentials (e.g. aws, -gcp, azure) for launching clusters, you can set up and launch an -on-demand cluster with ``rh.ondemand_cluster``. This uses SkyPilot under -the hood, so run ``sky check`` on CLI first to set up the cloud -credentials locally. +gcp, azure) for launching clusters or a Kubernetes cluster, you can set +up and launch an on-demand cluster with ``rh.ondemand_cluster``. This +uses SkyPilot under the hood, so run ``sky check`` in a CLI first to +make sure credentials are set up properly. .. code:: ipython3 @@ -85,61 +93,56 @@ credentials locally. # terminate this cluster with `cluster.teardown()` in Python, or `sky down rh-cluster` in CLI +There are a number of options to specify the resources more finely, such +as GPUs (``instance_type="A10G:4"``), cloud provider names +(``instance_type="m5.xlarge"``), ``num_instances=n`` for multiple +instances, ``memory``, ``disk_size``, ``region``, ``image_id``, +``open_ports``, ``spot``, and more. See the `on_demand_cluster +docs `__. +You can also omit the provider argument to allocate from the cheapest +available source for which you have credentials. + Deployment ---------- -For the function, simply wrap it in ``rh.function``, then send it to the -cluster with ``.to``. This sets up the function on the cluster as a -proper service, by syncing over the code and setting up and specified -dependencies. Furthermore, it runs through SSH, and no additional auth, -port, or manual setup is necessary. - -Modules, or classes, are also supported. For finer control of where the -function/module runs, you will also be able to specify the environment -(a list of package requirements, a Conda env, or Runhouse env) where it -runs. These are covered in more detail in the API tutorials. +Simply wrap the function in ``rh.function`` and send it to the cluster +with ``.to``. This deploys the function to the cluster as a proper +service by syncing over the code, setting up any specified dependencies +(see ``Envs``), and importing and serving it in the Runhouse API server. +We’re connected via an SSH tunnel here, so the service is secure, but we +can also open ports and secure it with Runhouse’s out-of-the-box +authentication and HTTPS. + +Classes, or ``Modules`` are also supported, opening up a world of +possibilities through persistent state. ``Envs`` allow you to specify +the environment in which the function or class is served, such as +dependencies, environment variables, secrets, conda environments, and +more, and allow you to easily achieve powerful parallelism across the +cluster. These are covered in more detail in the API tutorials. .. code:: ipython3 - remote_fn = rh.function(get_pid).to(cluster) - - -.. parsed-literal:: - :class: code-output - - INFO | 2024-02-26 21:01:50.579156 | Writing out function to /Users/caroline/Documents/runhouse/notebooks/docs/get_pid_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). - INFO | 2024-02-26 21:01:50.584346 | Copying package from file:///Users/caroline/Documents/runhouse/notebooks to: rh-cluster - INFO | 2024-02-26 21:01:54.745264 | Calling base_env.install - - -.. parsed-literal:: - :class: code-output + remote_get_platform = rh.function(get_platform).to(cluster) - Installing Package: notebooks with method reqs. - reqs path: notebooks/requirements.txt - notebooks/requirements.txt not found, skipping -  .. parsed-literal:: :class: code-output - INFO | 2024-02-26 21:01:56.116714 | Time to call base_env.install: 1.37 seconds - INFO | 2024-02-26 21:02:04.892297 | Sending module get_pid to rh-cluster - - - -.. raw:: html - -
-    
- + INFO | 2024-05-16 03:20:53.066103 | Because this function is defined in a notebook, writing it out to /Users/donny/code/notebooks/docs/get_platform_fn.py to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). This restriction does not apply to functions defined in normal Python files. + INFO | 2024-05-16 03:20:53.079931 | Port 32300 is already in use. Trying next port. + INFO | 2024-05-16 03:20:53.081995 | Forwarding port 32301 to port 32300 on localhost. + INFO | 2024-05-16 03:20:54.215570 | Server rh-cluster is up. + INFO | 2024-05-16 03:20:54.224806 | Copying package from file:///Users/donny/code/notebooks to: rh-cluster + INFO | 2024-05-16 03:20:55.395007 | Calling _cluster_default_env.install + INFO | 2024-05-16 03:20:55.948421 | Time to call _cluster_default_env.install: 0.55 seconds + INFO | 2024-05-16 03:20:55.960756 | Sending module get_platform of type to rh-cluster -The function we defined above, ``get_pid``, now exists remotely on the -cluster, and can be called remotely using ``remote_fn``. You can call -this remote function just as you would any other Python function, with -``remote_fn()``, and it runs on the cluster and returns the result to -our local environment. +The function we defined above, ``get_platform``, now exists remotely on +the cluster, and can be called remotely using ``remote_fn``. You can +call this remote function just as you would any other Python function, +with ``remote_fn()``, and it runs on the cluster and returns the result +to our local environment. Below, we run both the local and remote versions of this function, which give different results and confirms that the functions are indeed being @@ -147,26 +150,32 @@ run on different processes. .. code:: ipython3 - print(f"Local PID {get_pid()}") - print(f"Remote PID {remote_fn()}") + print(f"Local Platform: {get_platform()}") + print(f"Remote Platform: {remote_get_platform()}") .. parsed-literal:: :class: code-output - INFO | 2024-02-26 21:02:43.117612 | Calling get_pid.call - INFO | 2024-02-26 21:02:44.228964 | Time to call get_pid.call: 1.11 seconds + INFO | 2024-05-16 03:21:03.941205 | Calling get_platform.call +.. parsed-literal:: + :class: code-output + + Local Platform: macOS-14.4.1-arm64-arm-64bit .. parsed-literal:: :class: code-output - Local PID 27818 - Remote PID 33366 + INFO | 2024-05-16 03:21:04.513689 | Time to call get_platform.call: 0.57 seconds +.. parsed-literal:: + :class: code-output + + Remote Platform: Linux-5.15.0-1049-aws-x86_64-with-glibc2.31 If you launched an on-demand cluster, you can terminate it by calling diff --git a/docs/tutorials/quick-start-den.rst b/docs/tutorials/quick-start-den.rst index 40c6e8789..bfea91708 100644 --- a/docs/tutorials/quick-start-den.rst +++ b/docs/tutorials/quick-start-den.rst @@ -6,12 +6,12 @@ Den Quick Start

Open In Colab

-`Runhouse Den `__ let’s you save and -keep track of your Runhouse resources (cluster, function, data, etc). -These resources can be easily reloaded from any environment, and ready -to be used without additional setup, or even shared with another user or -teammate. Then, in the Web UI, access, visualize, and manage your -resources, along with version history. +`Runhouse Den `__ lets you manage and +track your infra, services, and resources (clusters, functions, secrets, +etc). These resources can be easily reloaded from any environment, and +ready to be used without additional setup, or even shared with another +user or teammate. Then, in the Web UI, access, visualize, and manage +your resources, along with version history. Account Creation & Login ------------------------ @@ -22,28 +22,29 @@ website, or by calling the login command in Python or CLI. To login on your dev environment, call ``rh.login()`` in Python or ``runhouse login`` in CLI. +.. code:: ipython3 + + import runhouse as rh + .. code:: ipython3 rh.login() -As part of logging in, Runhouse also optionally offers secrets -management, where it can automatically detect locally set up provider -secrets, and gives you the option to upload them securely into your -account. For more information on Secrets management, refer to the -`Secrets +As you’ll see in the login prompts, Runhouse also optionally offers +secrets management, where it can automatically detect local AI provider +secrets (e.g. clouds, Hugging Face, OpenAI, etc.), and gives you the +option to upload them securely into your account to use on remote +clusters or in other environments. For more information on Secrets +management, refer to the `Secrets Tutorial `__. Saving ------ Let’s start by constructing some runhouse resources that we’d like to -save down. These resources are taking from our `Cloud Quick Start +save down. These resources were first defined in our `Cloud Quick Start Tutorial `__. -.. code:: ipython3 - - import runhouse as rh - .. code:: ipython3 cluster = rh.ondemand_cluster( @@ -52,20 +53,57 @@ Tutorial `__. provider="aws" ) + + +.. parsed-literal:: + :class: code-output + + Output() + + + +.. raw:: html + +

+
+
+
+
+.. raw:: html
+
+    
+    
+ + + .. code:: ipython3 - def get_pid(a = 0): - import os - return os.getpid() + def get_platform(a = 0): + import platform + return platform.platform() + + remote_get_platform = rh.function(get_platform).to(cluster) + + +.. parsed-literal:: + :class: code-output + + INFO | 2024-05-16 03:51:58.483032 | Because this function is defined in a notebook, writing it out to /Users/donny/code/notebooks/docs/get_platform_fn.py to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). This restriction does not apply to functions defined in normal Python files. + INFO | 2024-05-16 03:51:58.493093 | Port 32300 is already in use. Trying next port. + INFO | 2024-05-16 03:51:58.494347 | Forwarding port 32301 to port 32300 on localhost. + INFO | 2024-05-16 03:51:59.587613 | Server rh-cluster is up. + INFO | 2024-05-16 03:51:59.595752 | Copying package from file:///Users/donny/code/notebooks to: rh-cluster + INFO | 2024-05-16 03:52:00.716693 | Calling _cluster_default_env.install + INFO | 2024-05-16 03:52:01.235732 | Time to call _cluster_default_env.install: 0.52 seconds + INFO | 2024-05-16 03:52:01.252665 | Sending module get_platform of type to rh-cluster - remote_fn = rh.function(get_pid).to(cluster) You can save the resources we created above with: .. code:: ipython3 cluster.save() - remote_fn.save() + remote_get_platform.save() Reloading --------- @@ -82,17 +120,18 @@ Python script: import runhouse as rh if __name__ == "__main__": - reloaded_fn = rh.function(name="get_pid") + reloaded_fn = rh.function(name="get_platform") print(reloaded_fn()) """ Sharing ------- -You can also share your resource with another user, and choose which +You can also share your resource with collaborators, and choose which level of access to give. Once shared, they will be able to see the resource in their dashboard as well, and be able to load and use the -shared resource. +shared resource. They’ll need to load the resource using its full name, +which includes your username (``/your_username/get_platform``). .. code:: ipython3 @@ -106,7 +145,7 @@ Web UI After saving your resources, you can log in and see them on your `Den dashboard `__, labeled as -``/rh-cluster`` and ``/get_pid``. +``/rh-cluster`` and ``/get_platform``. Clicking into the resource provides information about your resource. You can view the resource metadata, previous versions, and activity, or add diff --git a/docs/tutorials/quick-start-local.rst b/docs/tutorials/quick-start-local.rst index 554e7498a..94ed42c4f 100644 --- a/docs/tutorials/quick-start-local.rst +++ b/docs/tutorials/quick-start-local.rst @@ -84,7 +84,7 @@ API. Wrap the function with ``rh.function``, and then use .. parsed-literal:: :class: code-output - INFO | 2024-02-26 22:14:53.460361 | Writing out function to /Users/caroline/Documents/runhouse/notebooks/docs/get_pid_fn.py. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). + INFO | 2024-02-26 22:14:53.460361 | Because this function is defined in a notebook, writing it out to a file to make it importable. Please make sure the function does not rely on any local variables, including imports (which should be moved inside the function body). Functions defined in Python files can be used normally. INFO | 2024-02-26 22:14:53.523591 | Sending module get_pid to local Runhouse daemon diff --git a/runhouse/resources/functions/function.py b/runhouse/resources/functions/function.py index 678910f4c..b462843b8 100644 --- a/runhouse/resources/functions/function.py +++ b/runhouse/resources/functions/function.py @@ -274,9 +274,10 @@ def _handle_nb_fn(fn, fn_pointers, serialize_notebook_fn, name): else: module_path = Path.cwd() / (f"{name}_fn.py" if name else "sent_fn.py") logging.info( - f"Writing out function to {str(module_path)}. Please make " - f"sure the function does not rely on any local variables, " - f"including imports (which should be moved inside the function body)." + f"Because this function is defined in a notebook, writing it out to {str(module_path)} " + f"to make it importable. Please make sure the function does not rely on any local variables, " + f"including imports (which should be moved inside the function body). " + f"This restriction does not apply to functions defined in normal Python files." ) if not name: logging.warning(