|
89 | 89 | "source": [ |
90 | 90 | "# Create our cluster and submit\n", |
91 | 91 | "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", |
92 | | - "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", |
| 92 | + "cluster_name= \"hfgputest\"\n", |
| 93 | + "cluster = Cluster(ClusterConfiguration(name=cluster_name, \n", |
93 | 94 | " head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n", |
94 | 95 | " num_gpus=1,\n", |
95 | 96 | " num_workers=1,\n", |
|
287 | 288 | "ray_cluster_uri = cluster.cluster_uri()" |
288 | 289 | ] |
289 | 290 | }, |
| 291 | + { |
| 292 | + "cell_type": "markdown", |
| 293 | + "id": "64d65c3c", |
| 294 | + "metadata": {}, |
| 295 | + "source": [ |
| 296 | + "Now we can connect directly to our Ray cluster via the Ray python client:" |
| 297 | + ] |
| 298 | + }, |
| 299 | + { |
| 300 | + "cell_type": "code", |
| 301 | + "execution_count": null, |
| 302 | + "id": "60276d86", |
| 303 | + "metadata": {}, |
| 304 | + "outputs": [], |
| 305 | + "source": [ |
| 306 | + "from codeflare_sdk import generate_cert\n", |
| 307 | + "# Create required TLS cert and export the environment variables to enable TLS\n", |
| 308 | + "generate_cert.generate_tls_cert(cluster_name, cluster.config.namespace)\n", |
| 309 | + "generate_cert.export_env(cluster_name, cluster.config.namespace)" |
| 310 | + ] |
| 311 | + }, |
290 | 312 | { |
291 | 313 | "cell_type": "markdown", |
292 | 314 | "id": "44dba6a0-8275-4726-8911-6b6ec467b6a3", |
|
432 | 454 | "\n", |
433 | 455 | " ray_trainer = TorchTrainer(\n", |
434 | 456 | " train_func,\n", |
435 | | - " scaling_config=ScalingConfig(num_workers=3, use_gpu=True),\n", |
| 457 | + " scaling_config=ScalingConfig(num_workers=2, use_gpu=True),\n", |
436 | 458 | " # Configure persistent storage that is accessible across \n", |
437 | 459 | " # all worker nodes.\n", |
438 | 460 | " # Uncomment and update the RunConfig below to include your storage details.\n", |
|
0 commit comments