In [20]:
%env RAY_verbose_spill_logs=0
%env RAY_SESSION_DIR="/home/ray/"

env: RAY_verbose_spill_logs=0
env: RAY_SESSION_DIR="/home/ray/"


In [21]:
import ray
import os
import boto3
import botocore
import time
from concurrent.futures import ThreadPoolExecutor

# Number of workers to use
num_loops = 10

# Number to calculate sum of squares up to
N = 100

#access-details:
cnf_internal_object_source = {
  "access_key": str(os.environ.get("AUTH_TOKEN")),
  "secret_key": "s3",
  "region": "us-east-2",
  "PROXY_SERVICE_SUFFIX_URL": "-service.ezdata-system.svc.cluster.local:30000",
  "ON_PREM_S3_SERVICE_NAME": "local-s3",
  "on_prem_bucket_name": "mlflow",
  "S3_SERVICE_NAME": "local-s3",
  "bucket_name": "mlflow"
}

# Custom endpoint URL for S3-compatible service (e.g., MinIO)
on_prem_proxy_endpoint_url = 'http://' + cnf_internal_object_source.get('ON_PREM_S3_SERVICE_NAME') + cnf_internal_object_source.get('PROXY_SERVICE_SUFFIX_URL')
print("=====:", on_prem_proxy_endpoint_url)

# Replace with your actual endpoint URL
aws_proxy_endpoint_url = 'http://' + cnf_internal_object_source.get('S3_SERVICE_NAME') + cnf_internal_object_source.get('PROXY_SERVICE_SUFFIX_URL')
print("=====:", aws_proxy_endpoint_url)

s3_client = boto3.client('s3', aws_access_key_id=cnf_internal_object_source.get('access_key'), aws_secret_access_key=cnf_internal_object_source.get('secret_key'), endpoint_url=aws_proxy_endpoint_url)
print("=====:", s3_client)

#write the processed data
s3_client_onprem = boto3.client('s3', aws_access_key_id=cnf_internal_object_source.get('access_key'), aws_secret_access_key=cnf_internal_object_source.get('secret_key'), endpoint_url=on_prem_proxy_endpoint_url)
print("=====:", s3_client_onprem)

# Initialize the RAY cluster
ray.init(address="ray://kuberay-head-svc.kuberay:10001", ignore_reinit_error=True)

# Define the function that calculates the sum of squares
def calculate_sum_of_squares(n):
    return sum(i * i for i in range(1, n + 1))

# Function to distribute the computation and write results
def distributed_task(worker_id, n):
    try:
        print("=====:", worker_id, n)
        # Calculate the result
        result = calculate_sum_of_squares(n)
        # Save the result to a local file
        result_file = f"result_{worker_id}.txt"
        with open(result_file, "w") as f:
            f.write(str(result))
        # Upload the result file to S3
        # s3_key = f"{s3_bucket}/{result_file}"
        s3_key = f"{result_file}"
        print("result_file:", result_file, "||", "s3_bucket:", cnf_internal_object_source.get('bucket_name'), "||", "s3_key:", s3_key)
        
        with open(f"{result_file}", 'rb') as data:
            s3_client.upload_fileobj(data, s3_bucket, s3_key)   
        
        return s3_key
    except Exception as e:
        return str(e)

# Launch distributed tasks using ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=num_loops) as executor:
    task_futures = [executor.submit(distributed_task, i, N) for i in range(num_loops)]
    print("=====:task_futures:", list(task_futures))


def download_file_from_s3(bucket_name, file_key, local_file_path):
    start_time = time.time()
    try:
        s3_client.download_file(bucket_name, file_key, local_file_path)
        end_time = time.time()
        download_time = end_time - start_time
        print(f"File '{file_key}' downloaded to '{local_file_path}' in {download_time:.2f} seconds.")
        print("Downloading from AWS Bucket Complete...")
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print(f"The object '{file_key}' does not exist in the bucket '{bucket_name}'.")
        else:
            print(f"An error occurred while downloading '{file_key}': {str(e)}")
            
def upload_file_to_s3(local_file_path, bucket_name, s3_file_key):
    start_time = time.time()
    try:
        s3_client_onprem.upload_file(local_file_path, bucket_name, s3_file_key)
        end_time = time.time()
        upload_time = end_time - start_time
        print(f"File '{local_file_path}' uploaded to '{bucket_name}/{s3_file_key}' in {upload_time:.2f} seconds.")
        print("Uploading to On Prem S3 Bucket Complete...")
    except botocore.exceptions.ClientError as e:
        print(f"An error occurred while uploading '{local_file_path}': {str(e)}")
        
        
print("CUSTOM S3 PATH JOB ENDED")
# Shutdown the RAY cluster
ray.shutdown()


2023-10-26 10:43:35,397	INFO client_builder.py:237 -- Passing the following kwargs to ray.init() on the server: ignore_reinit_error


=====: http://local-s3-service.ezdata-system.svc.cluster.local:30000
=====: http://local-s3-service.ezdata-system.svc.cluster.local:30000
=====: <botocore.client.S3 object at 0x7f9c3f4235e0>
=====: <botocore.client.S3 object at 0x7f9c61612ee0>
=====: 0 100
=====: 1 100
=====: 2 100
=====: 3 100
=====: 4 100
=====: 5 100
=====: 6 100
=====: 7 100
=====: 8 100
=====: 9 100
=====:task_futures: [<Future at 0x7f9c3f618af0 state=running>, <Future at 0x7f9c3f773250 state=running>, <Future at 0x7f9c48242dc0 state=running>, <Future at 0x7f9c4825fb20 state=running>, <Future at 0x7f9c3f56ac40 state=running>, <Future at 0x7f9c4824cf10 state=running>, <Future at 0x7f9c483ad550 state=running>, <Future at 0x7f9c483ad2e0 state=running>, <Future at 0x7f9c483b0160 state=running>, <Future at 0x7f9c3f43c760 state=running>]
result_file: result_0.txt || s3_bucket: mlflow || s3_key: result_0.txt
result_file: result_6.txt || s3_bucket: mlflow || s3_key: result_6.txt
result_file: result_5.txt || s3_bucket: mlf