### OCI Data Science - ML Pipelines
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
import oci
import os
from os import environ
import ads

In [None]:
if 'NB_SESSION_COMPARTMENT_OCID' in os.environ:
    # using the notebook's compartment if running on OCI Data Science Notebook Session
    compartment_id = os.environ['NB_SESSION_COMPARTMENT_OCID']
else:
    # set the compartment OCID if you are working locally
    compartment_id = '<YOUR_COMPARTMENT_OCID>'
print('compartment OCID = ', compartment_id)

In [None]:
# this will work whether working on a local machine (using API key) or in OCI Notebook Session (using Resource Principal)
if "OCI_RESOURCE_PRINCIPAL_VERSION" in os.environ:
    # Use resource principal
    print("using Resource Principal for auth")
    ads.set_auth(auth="resource_principal")
else:
    # Use api_key with config file
    print("using API key for auth")
    ads.set_auth(auth="api_key")    

Fill in your resources details:

In [None]:
project_id = "<YOUR_PROJECT_OCID>"

In [None]:
log_group_id = "<YOUR_LOGGROUP_OCID>"

In [None]:
# create a data science client to communicate with the service
config = oci.config.from_file()
data_science_client = oci.data_science.DataScienceClient(config)

In [None]:
# list all pipelines in the project. This tests the new APIs are available and working. If you recieve an error, check that you installed the latest Beta OCI Python SDK and CLI
res = data_science_client.list_pipelines(compartment_id=compartment_id)

In [None]:
print(res.data)

In [None]:
pipeline_name = "pipeline_sample_1"

## create a new pipeline

In [None]:
pipeline_payload = {
    "projectId": project_id,
    "compartmentId": compartment_id,
    "displayName": pipeline_name,
    "infrastructureConfigurationDetails": {
        "shapeName": "VM.Standard2.4",
        "blockStorageSizeInGBs": "50"
    },
    "logConfigurationDetails": {
        "enableLogging": True,
        "logGroupId": log_group_id,
        "enableAutoLogCreation": True   # log will be automatically created
    },
    "configurationDetails": {
        "type": "DEFAULT",
        "maximumRuntimeInMinutes": 30,
        "environmentVariables": {
            "CONDA_ENV_TYPE": "service",
            "CONDA_ENV_SLUG": "onnx110_p37_cpu_v1"            
        }
    },
    "stepDetails": [
        {
            "stepName": "step1",
            "description": "Pass data by value",
            "stepType": "CUSTOM_SCRIPT",
            "stepInfrastructureConfigurationDetails": {
                "shapeName": "VM.Standard2.4",
                "blockStorageSizeInGBs": "50"
            },
            "stepConfigurationDetails": {
                "type": "DEFAULT",
                "maximumRuntimeInMinutes": 30,
                "environmentVariables": {
                    "PIPELINE_STEP_RUN_ENTRYPOINT": "mlpipeline_step1.py",
                    "CONDA_ENV_TYPE": "service",
                    "CONDA_ENV_SLUG": "onnx110_p37_cpu_v1"
                }
            }
        },
        {
            "stepName": "step2",
            "description": "pass data by reference",
            "stepType": "CUSTOM_SCRIPT",
            "stepInfrastructureConfigurationDetails": {
                "shapeName": "VM.Standard2.4",
                "blockStorageSizeInGBs": "50"
            },
            "stepConfigurationDetails": {
                "type": "DEFAULT",
                "maximumRuntimeInMinutes": 30,
                "environmentVariables": {
                    "PIPELINE_STEP_RUN_ENTRYPOINT": "mlpipeline_step2.py",
                    "CONDA_ENV_TYPE": "service",
                    "CONDA_ENV_SLUG": "onnx110_p37_cpu_v1"
                }
            },
            "dependsOn": ["step1"]
        },
        {
            "stepName": "step3",
            "description": "read data by reference",
            "stepType": "CUSTOM_SCRIPT",
            "stepInfrastructureConfigurationDetails": {
                "shapeName": "VM.Standard2.4",
                "blockStorageSizeInGBs": "50"
            },
            "stepConfigurationDetails": {
                "type": "DEFAULT",
                "maximumRuntimeInMinutes": 30,
                "environmentVariables": {
                    "PIPELINE_STEP_RUN_ENTRYPOINT": "mlpipeline_step3.py",
                    "CONDA_ENV_TYPE": "service",
                    "CONDA_ENV_SLUG": "onnx110_p37_cpu_v1"
                }
            },
            "dependsOn": ["step2"]
        }
    ],
    "freeformTags": {
        "freeTags": "testing pipeline"
    }
}
pipeline_res = data_science_client.create_pipeline(pipeline_payload)
pipeline_id = pipeline_res.data.id

In [None]:
print(pipeline_id)

In [None]:
# the pipeline will be in CREATING state until all steps have their artifacts uploaded
print(pipeline_res.data.lifecycle_state)

In [None]:
# upload steps artifacts
file1 = open("mlpipeline_step1.zip", "rb")
ret1 = data_science_client.create_step_artifact(pipeline_id, "step1", file1, content_disposition=f"attachment; filename={'mlpipeline_step1.zip'}")
print("OK" if ret1.status==204 else ret1.status)  # 204 is ok

In [None]:
file2 = open("mlpipeline_step2.zip", "rb")
ret2 = data_science_client.create_step_artifact(pipeline_id, "step2", file2, content_disposition=f"attachment; filename={'mlpipeline_step2.zip'}")
print("OK" if ret2.status==204 else ret2.status)  # 204 is ok

In [None]:
file3 = open("mlpipeline_step3.zip", "rb")
ret3 = data_science_client.create_step_artifact(pipeline_id, "step3", file3, content_disposition=f"attachment; filename={'mlpipeline_step3.zip'}")
print("OK" if ret3.status==204 else ret3.status)  # 204 is ok

In [None]:
# pipeline should be in ACTIVE state now
res = data_science_client.get_pipeline(pipeline_id)
print(res.data.lifecycle_state)

In [None]:
#enable service logs - to catch issues during step provisioning
logging_client = oci.logging.LoggingManagementClient(config)
service_log_name = pipeline_name + "-service_log"
create_log_response = logging_client.create_log(
    log_group_id=log_group_id,
    create_log_details=oci.logging.models.CreateLogDetails(
        display_name=service_log_name,
        log_type="SERVICE",
        is_enabled=True,
        configuration=oci.logging.models.Configuration(
            source=oci.logging.models.OciService(
                source_type="OCISERVICE",
                service="datascience",
                resource=pipeline_id,
                category="pipelinerunlog"),
            compartment_id=compartment_id)))

# Check the response
print(create_log_response.status)

## Run the pipeline

In [None]:
pipeline_run_name = "pipeline-run-1"

In [None]:
# uncomment the next line and set to the OCI Object Storage location to use for passing data. Make sure you have proper permissions.
#data_location = "<YOUR_OBJECT_STORAGE_BUCKET>"  # use: 'oci://<bucket>@<workspace>/'

In [None]:

pipeline_run_payload = {
    "projectId": project_id,
    "displayName": pipeline_run_name,
    "pipelineId": pipeline_id,
    "compartmentId": compartment_id,
    "configurationOverrideDetails": {
        "type": "DEFAULT",
        "environmentVariables": {
            "DATA_LOCATION": data_location
        }
    }
}
run_res = data_science_client.create_pipeline_run(pipeline_run_payload)

In [None]:
# check pipeline run status
run_status = data_science_client.get_pipeline_run(run_res.data.id)
print(run_status.data.lifecycle_state)

In [None]:
#check pipeline step run status
print(run_status.data.step_runs["step_name"=="step1"].lifecycle_state)

## view logs

In [None]:
# wait a few seconds after the pipeline run is created to make sure the log was generated
run1 = data_science_client.get_pipeline_run(run_res.data.id)
log_id = run1.data.log_details.log_id

In [None]:
from datetime import datetime, timedelta
now = datetime.now()
loggingsearch_client = oci.loggingsearch.LogSearchClient(config)
search_logs_response = loggingsearch_client.search_logs(
    search_logs_details=oci.loggingsearch.models.SearchLogsDetails(
        time_start= now - timedelta(days=1),
        time_end=now,
        search_query=f"search \"{compartment_id}/{log_group_id}/{log_id}\"",
        is_return_field_info=False,
    ),    
    limit=10
)
 
# Get the data from response
print(search_logs_response.data)

## View the pipeline run form the OCI console UI

In [None]:
print("Ctrl-Click the hyperlink to open the pipeline run page in the OCI console UI")
print("https://cloud.oracle.com/data-science/pipeline-runs/{}".format(run_res.data.id))

## Delete the pipeline

In [None]:
#delete the pipeline when done
data_science_client.delete_pipeline(pipeline_id)