## More CDE with Python

In [2]:
import os
import json
import sys
import re
import requests
from requests_toolbelt import MultipartEncoder

#### You can use existing environment variables or set them at session level. Notice os.environ["WORKLOAD_USER"] is already set for you as a CDP User and can just be referenced directly. 

In [4]:
os.environ["WORKLOAD_USER"] = "pauldefusco"
os.environ["JOBS_API_URL"] = "https://tk5p4pn9.cde-6fr6l74r.go01-dem.ylcu-atmi.cloudera.site/dex/api/v1"

#### Python Wrappers to CDE API 

In [6]:
# Set user token to interact with CDE Service remotely
def set_cde_token():
    rep = os.environ["JOBS_API_URL"].split("/")[2].split(".")[0]
    os.environ["GET_TOKEN_URL"] = os.environ["JOBS_API_URL"].replace(rep, "service").replace("dex/api/v1", "gateway/authtkn/knoxtoken/api/v1/token")
    token_json = !curl -u $WORKLOAD_USER:$WORKLOAD_PASSWORD $GET_TOKEN_URL
    os.environ["ACCESS_TOKEN"] = json.loads(token_json[5])["access_token"]
    
    return json.loads(token_json[5])["access_token"]

In [59]:
# Create CDE Resource to upload Spark CDE Job files
def create_cde_resource(tok, resource_name):

    url = os.environ["JOBS_API_URL"] + "/resources"
    myobj = {"name": str(resource_name)}
    data_to_send = json.dumps(myobj).encode("utf-8")

    headers = {
        'Authorization': f"Bearer {tok}",
        'accept': 'application/json',
        'Content-Type': 'application/json',
    }

    x = requests.post(url, data=data_to_send, headers=headers)
    print(x.status_code)
    print(x.text)

In [13]:
#Upload Spark CDE Job file to CDE Resource
def put_files(resource_name, jobs_path, tok):
    
    for job in [jobs_path+file for file in os.listdir(jobs_path)]:
        
        print("Working on Job: {}".format(job.split("/")[-1].split(".")[0]))
        
        m = MultipartEncoder(
            fields={
                    'file': ('filename', open(job, 'rb'), 'text/plain')}
            )

        PUT = '{jobs_api_url}/resources/{resource_name}/{file_name}'.format(jobs_api_url=os.environ["JOBS_API_URL"], 
                                                                                          resource_name=resource_name, 
                                                                                          file_name=job.split("/")[2])

        x = requests.put(PUT, data=m, headers={'Authorization': f"Bearer {tok}",'Content-Type': m.content_type})
        
        print("Response Status Code {}".format(x.status_code))
        print(x.text)

In [18]:
def create_jobs_from_resource(resource_name, jobs_path, tok):
    
    for job in [jobs_path+file for file in os.listdir(jobs_path)]:
        
        print("Working on Job: {}".format(job.split("/")[-1].split(".")[0]))
        
        headers = {
        'Authorization': f"Bearer {tok}",
        'accept': 'application/json',
        'Content-Type': 'application/json',
        }

        data = '{ "name": "job_name", "type": "spark", "retentionPolicy": "keep_indefinitely", "mounts": [ { "dirPrefix": "/", "resourceName": "resource_name" } ], "spark": { "file": "file_name", "conf": { "spark.pyspark.python": "python3" } }, "schedule": { "enabled": true, "user": "pdefusco", "cronExpression": "30 */1 * * *", "start": "2022-08-18", "end": "2022-08-18" } }'

        data = re.sub("job_name", job.split("/")[-1].split(".")[0], data, count=0, flags=0)
        data = re.sub("resource_name", resource_name, data, count=0, flags=0)
        data = re.sub("file_name", job.split("/")[-1], data, count=0, flags=0)

        PUT = '{}/jobs'.format(os.environ["JOBS_API_URL"])
    
        x = requests.post(PUT, headers=headers, data=data)
        
        print("Response Status Code {}".format(x.status_code))
        print(x.text)
        print("\n")
        

#### Execute the workflow in this order. A 201 means the request was successful. If you don't get a 201 you will see the response message.

In [7]:
tok = set_cde_token()

In [60]:
#Create CDE Resource for all Spark CDE Jobs
create_cde_resource(tok, "cde_migration_resource")

409
{"status":"error","message":"resource with name already exists"}


In [15]:
put_files("python2cde", "example_spark_jobs/jobs/", tok)

Working on Job: movieALS
Response Status Code 201

Working on Job: auction-analysis
Response Status Code 201

Working on Job: kmeans
Response Status Code 201

Working on Job: pi
Response Status Code 201

Working on Job: wordcount
Response Status Code 201



In [19]:
create_jobs_from_resource("python2cde", "example_spark_jobs/jobs/", tok)

Working on Job: movieALS
Response Status Code 500
{"status":"error","message":"job with name already exists"}


Working on Job: auction-analysis
Response Status Code 500
{"status":"error","message":"job with name already exists"}


Working on Job: kmeans
Response Status Code 500
{"status":"error","message":"job with name already exists"}


Working on Job: pi
Response Status Code 500
{"status":"error","message":"job with name already exists"}


Working on Job: wordcount
Response Status Code 500
{"status":"error","message":"job with name already exists"}


