# Supporting Notebook for native spark app (wx.data SaaS) submission via UI
The main steps include:
- `spark-processing.py` script that contains spark app is copied to COS bucket connected to iceberg catalog {WXD_BUCKET}
- we prepare JSON input file that needs to be provided for app submission in watsonx.data UI

## Import libraries and set files with env variables

In [None]:
import json
import os
from base64 import b64encode
from dotenv import load_dotenv
import getpass

import ibm_boto3
from ibm_botocore.client import ClientError, Config

## Load env.txt file with configuration

In [None]:
with open('.env_all', 'wb') as env_file:
    env_file.write(wslib.load_data('env.txt').read())
# environmental variables store credentials and configuration
load_dotenv('.env_all')

## Reading credentials and configuration

In [None]:
CLOUD_API = getpass.getpass("Enter watsonx.data backend Cloud API key: ")
print("Cloud API Key received")

In [None]:
cos_conn = wslib.get_connection('ATT_Enablement_cos_connection')

In [None]:
# spark script locally
spark_script_name = 'spark-processing.py'

# spark scrit in COS bucket location
spark_script_path = f'spark-scripts/{spark_script_name}'

# wx.data credentials
WXD_USER = "ibmlhapikey"
CLOUD_USER_ID = os.getenv("CLOUD_USER_ID")

# connected COS bucket name
cos_bucket_name = os.getenv("WXD_BUCKET")

In [None]:
# Constants for IBM COS values
COS_ENDPOINT = f"https://s3.ca-tor.cloud-object-storage.appdomain.cloud"
COS_API_KEY_ID = cos_conn['api_key']
COS_INSTANCE_CRN = cos_conn['resource_instance_id']
HIVE_BUCKET = os.environ["HIVE_BUCKET"]
WXD_BUCKET = os.environ["WXD_BUCKET"]

### make sure that COS_ENDPOINT is the same as for buckets and the same for all buckets, if not -> replace

In [None]:
print(COS_ENDPOINT)
# COS_ENDPOINT = "enter https-prepended endpoint and uncomment"

In [None]:
# Create client
client = ibm_boto3.client("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_INSTANCE_CRN,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT
)

#### Encoded api string for payload

In [None]:
bytes_string = f"{WXD_USER}_{CLOUD_USER_ID}:{CLOUD_API}".encode('utf-8')
base64_bytes = b64encode(bytes_string)
base64_string = base64_bytes.decode('utf-8')


#### Upload spark app script to COS bucket

In [None]:
# download py file locally
with open(f'./{spark_script_name}', 'wb') as env_file:
    env_file.write(wslib.load_data(spark_script_name).read())
# upload py file to watsonx.data bucket
client.upload_file(f'./{spark_script_name}', cos_bucket_name, spark_script_path)

### Generate payload for spark app

In [None]:
data = {
    "application_details": {
        "conf": {
            "spark.hadoop.wxd.apikey": f"Basic {base64_string}",
            "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
            "spark.myenv.hive_bucket": os.getenv("HIVE_BUCKET"),
            "spark.myenv.iceberg_catalog": os.getenv("ICEBERG_CATALOG"),
            "spark.myenv.hive_catalog": "spark_catalog",
            "spark.myenv.schema_data_hive": os.getenv("SCHEMA_DATA_H"),
            "spark.myenv.schema_data_iceberg": os.getenv("SCHEMA_DATA_I"),
            "spark.myenv.schema_netezza_offload": os.getenv("SCHEMA_DWH_OFFLOAD"),
    },
        "application": f"s3a://{cos_bucket_name}/{spark_script_path}"
    }
}

In [None]:
print(json.dumps(data, indent=4))