In [None]:
# Assuming the notebook is executed behind a proxy
# Below is for when the environment variable is not already set
import os
os.environ["http_proxy"] = "http://squid-proxy:3128" # Change to your proxy
os.environ["HTTP_PROXY"] = "http://squid-proxy:3128" # Change to your proxy
os.environ["https_proxy"] = "http://squid-proxy:3128" # Change to your proxy
os.environ["HTTPS_PROXY"] = "http://squid-proxy:3128" # Change to your proxy
# Unfortunately no_proxy is not standardized and is different for different tools
# i.e: CIDR notation is not supported for most tools
# Therefore this might need to be tweaked depending on the tool and the network
# i.e: 10.152.183.1 is the default ip for kubernetes API for microk8s but it might be different in your cluster.
os.environ["no_proxy"] = "10.0.0.0/8,10.152.183.1,192.168.0.0/16,127.0.0.1,172.16.0.0/16,.svc,localhost,.kubeflow"
os.environ["NO_PROXY"] = "10.0.0.0/8,10.152.183.1,192.168.0.0/16,127.0.0.1,172.16.0.0/16,.svc,localhost,.kubeflow"

In [None]:
%pip install dask dask-kubernetes requests

In [None]:
import requests
from dask.distributed import Client, as_completed
from dask_kubernetes.operator import KubeCluster

In [None]:
def fetch_url(url):
    try:
        response = requests.get(url, timeout=1)
        return url, response.status_code
    except Exception as e:
        return "error", str(e)

In [None]:
websites = [
    "http://facebook.com",
    "http://google.com",
    "http://microsoft.com",
    "http://apple.com",
    "http://amazon.com"
]

In [None]:
# This example is written for python version 3.11. Update the below image if your python version is different
dask_image="ghcr.io/dask/dask:2025.2.0-py3.11"

In [None]:
from platform import python_version
version=[int(v) for v in python_version().split(".")]
if version[0] != 3 or version[1] != 11:
    print("Your python version is not 3.11.x, make sure to update dask_image accordingly to avoid missmatch")

In [None]:

# Cluster with no proxy configuration
cluster_no_proxy=KubeCluster(name="no-proxy", namespace="kubeflow-user-example-com", image=dask_image)

# Cluster with proxy configuration by setting the environment
cluster_with_proxy = KubeCluster(name="proxy", namespace="kubeflow-user-example-com", image=dask_image, 
                      # Assuming the proxy settings are same for every machine and pod in the cluster
                      # Since we set up above, we can just pass the environment variables
                      env={
                          "http_proxy": os.environ.get("http_proxy"),
                          "HTTP_PROXY": os.environ.get("HTTP_PROXY"),
                          "https_proxy": os.environ.get("https_proxy"),
                          "HTTPS_PROXY": os.environ.get("HTTPS_PROXY"),
                          "no_proxy": os.environ.get("no_proxy"),
                          "NO_PROXY": os.environ.get("NO_PROXY")
                      }
                     )

In [None]:
cluster_no_proxy.scale(5)
cluster_with_proxy.scale(5)

In [None]:
client_no_proxy = Client(cluster_no_proxy)
client_with_proxy = Client(cluster_with_proxy)

In [None]:
futures_no_proxy = [client_no_proxy.submit(fetch_url, site) for site in websites]
futures_with_proxy = [client_with_proxy.submit(fetch_url, site) for site in websites]

In [None]:
print("Result from workers with no proxy configuration")
for future in as_completed(futures_no_proxy):
    print(future.result())

In [None]:
print("Result of workers with proxy configured")
for future in as_completed(futures_with_proxy):
    print(future.result())

In [None]:
client_no_proxy.close()
client_with_proxy.close()
cluster_no_proxy.close()
cluster_with_proxy.close()