# Bert Pipeline

In [13]:
! pip uninstall -y kfp
! pip install kfp captum torchvision matplotlib pillow pytorch-lightning flask flask-compress ipywidgets minio

Found existing installation: kfp 1.6.3
Uninstalling kfp-1.6.3:
  Successfully uninstalled kfp-1.6.3
Collecting kfp
  Using cached kfp-1.6.3-py3-none-any.whl
Installing collected packages: kfp
Successfully installed kfp-1.6.3


In [46]:
# Node Version
! node --version

# Install yarn
! npm install -g npm
! npm install --global yarn
! yarn --version

v14.16.0
[K[?25h              [27m] - reify:npm: [32;40mtiming[0m [35mreify:createSparse[0m Completed in 0ms[0m[Kms[0m[K
changed 14 packages, and audited 255 packages in 2s

11 packages are looking for funding
  run `npm fund` for details

found [32m[1m0[22m[39m vulnerabilities
[K[?25h              [27m] \ reify:yarn: [7msill[0m [35maudit[0m bulk request { yarn: [ '1.22.10'[0m[K0'[0m[K
changed 1 package, and audited 2 packages in 766ms

found [32m[1m0[22m[39m vulnerabilities
1.22.10


In [47]:
# Install Jupyter Notebook Widgets
! jupyter nbextension install --py --symlink --sys-prefix captum.insights.attr_vis.widget
! jupyter nbextension install jupyter-matplotlib

Installing /opt/conda/lib/python3.8/site-packages/captum/insights/attr_vis/widget/static -> jupyter-captum-insights
- Validating: [32mOK[0m

    To initialize this nbextension in the browser every time the notebook (or other app) loads:
    
          jupyter nbextension enable captum.insights.attr_vis.widget --py --sys-prefix
    
Traceback (most recent call last):
  File "/opt/conda/bin/jupyter", line 8, in <module>
    sys.exit(main())
  File "/opt/conda/lib/python3.8/site-packages/jupyter_core/command.py", line 285, in main
    command = _jupyter_abspath(subcommand)
  File "/opt/conda/lib/python3.8/site-packages/jupyter_core/command.py", line 124, in _jupyter_abspath
    raise Exception(
Exception: Jupyter command `jupyter-nbbextension` not found.


In [48]:
# Enable Jupyter Notebook Extensions
! jupyter nbextension enable --py widgetsnbextension
! jupyter nbextension enable captum.insights.attr_vis.widget --py --sys-prefix

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m
Enabling notebook extension jupyter-captum-insights/extension...
      - Validating: [32mOK[0m


In [20]:
import kfp
import json
import os
from kfp.onprem import use_k8s_secret
from kfp import components
from kfp.components import load_component_from_file, load_component_from_url
from kfp import dsl
from kfp import compiler

import numpy as np
import logging

from PIL import Image
import torchvision.transforms as transforms

from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F

import captum
from captum.attr import IntegratedGradients, Occlusion, LayerGradCam, LayerAttribution
from captum.attr import visualization as viz

kfp.__version__

'1.6.3'

# Enter your gateway and the cookie
[Use this extension on chrome to get token]( https://chrome.google.com/webstore/detail/editthiscookie/fngmhnnpilhplaeedifhccceomclgfbg?hl=en)

![image.png](./image.png)

## Update values for the ingress gateway and auth session

In [4]:
INGRESS_GATEWAY='http://istio-ingressgateway.istio-system.svc.cluster.local'
AUTH="MTYyMzEzOTA4M3xOd3dBTkVNeU4xQTBVek5WUVVoYVVra3pNalEzU2paYVEwSTBRVU5FUkZSUFJFWkNRVTVMTTFreVZFVkNSRWhTTnpKR1UwNVdWRkU9fGKXic0yyJFpWhuQhq9LjvsTTQaBS_TnwSJfF4kSKCgE"
NAMESPACE="kubeflow-user-example-com"
COOKIE="authservice_session="+AUTH
EXPERIMENT="Default"

## Set Log bucket and Tensorboard Image

In [5]:
MINIO_ENDPOINT="http://minio-service.kubeflow:9000"
LOG_BUCKET="mlpipeline"
TENSORBOARD_IMAGE="public.ecr.aws/y1x1p2u5/tboard:latest"

In [17]:
client = kfp.Client(host=INGRESS_GATEWAY+"/pipeline", cookies=COOKIE)

In [18]:
client.create_experiment(EXPERIMENT)
experiments = client.list_experiments(namespace=NAMESPACE)
my_experiment = experiments.experiments[0]
my_experiment

{'created_at': datetime.datetime(2021, 4, 22, 8, 44, 39, tzinfo=tzlocal()),
 'description': None,
 'id': 'aac96a63-616e-4d88-9334-6ca8df2bb956',
 'name': 'Default',
 'resource_references': [{'key': {'id': 'kubeflow-user-example-com',
                                  'type': 'NAMESPACE'},
                          'name': None,
                          'relationship': 'OWNER'}],
 'storage_state': 'STORAGESTATE_AVAILABLE'}

## Set Inference parameters

In [6]:
DEPLOY_NAME="bertserve"
MODEL_NAME="bert"

In [20]:
prepare_tensorboard_op = load_component_from_file("common/tensorboard/component.yaml")
prep_op = components.load_component_from_file(
    "bert/yaml/pre_process/component.yaml"
)
train_op = components.load_component_from_file(
    "bert/yaml/train/component.yaml"
)
deploy_op = load_component_from_file("common/deploy/component.yaml")
minio_op = components.load_component_from_file(
    "common/minio/component.yaml"
)

## Define pipeline

In [21]:
@dsl.pipeline(name="Training pipeline", description="Sample training job test")
def pytorch_bert(
    minio_endpoint=MINIO_ENDPOINT,
    log_bucket=LOG_BUCKET,
    log_dir=f"tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}",
    mar_path=f"mar/{dsl.RUN_ID_PLACEHOLDER}/model-store",
    config_prop_path=f"mar/{dsl.RUN_ID_PLACEHOLDER}/config",
    model_uri=f"s3://mlpipeline/mar/{dsl.RUN_ID_PLACEHOLDER}",
    tf_image=TENSORBOARD_IMAGE,
    deploy=DEPLOY_NAME,
    namespace=NAMESPACE,
    confusion_matrix_log_dir=f"confusion_matrix/{dsl.RUN_ID_PLACEHOLDER}/",
    num_samples=1000
):

    prepare_tb_task = prepare_tensorboard_op(
        log_dir_uri=f"s3://{log_bucket}/{log_dir}",
        image=tf_image,
        pod_template_spec=json.dumps(
            {
                "spec": {
                    "containers": [
                        {
                            "env": [
                                {
                                    "name": "AWS_ACCESS_KEY_ID",
                                    "valueFrom": {
                                        "secretKeyRef": {
                                            "name": "mlpipeline-minio-artifact",
                                            "key": "accesskey",
                                        }
                                    },
                                },
                                {
                                    "name": "AWS_SECRET_ACCESS_KEY",
                                    "valueFrom": {
                                        "secretKeyRef": {
                                            "name": "mlpipeline-minio-artifact",
                                            "key": "secretkey",
                                        }
                                    },
                                },
                                {"name": "AWS_REGION", "value": "minio"},
                                {"name": "S3_ENDPOINT", "value": f"{minio_endpoint}"},
                                {"name": "S3_USE_HTTPS", "value": "0"},
                                {"name": "S3_VERIFY_SSL", "value": "0"},
                            ]
                        }
                    ]
                }
            }
        ),
    ).set_display_name("Visualization")

    prep_task = prep_op().after(prepare_tb_task).set_display_name("Preprocess & Transform")
    train_task = (
        train_op(
            input_data=prep_task.outputs["output_data"],
            profiler="pytorch",
            confusion_matrix_url=f"minio://{log_bucket}/{confusion_matrix_log_dir}",
            num_samples=num_samples,
            # For GPU set gpu count and accelerator type
            gpus=0,
            accelerator='None'
        )
        .after(prep_task)
        .set_display_name("Training")
    )
    # For GPU uncomment below line and set GPU limit and node selector
    # ).set_gpu_limit(1).add_node_selector_constraint('cloud.google.com/gke-accelerator','nvidia-tesla-p4')

    minio_tb_upload = (
        minio_op(
            bucket_name="mlpipeline",
            folder_name=log_dir,
            input_path=train_task.outputs["tensorboard_root"],
            filename="",
        )
        .after(train_task)
        .set_display_name("Tensorboard Events Pusher")
    )
    minio_mar_upload = (
        minio_op(
            bucket_name="mlpipeline",
            folder_name=mar_path,
            input_path=train_task.outputs["checkpoint_dir"],
            filename="bert_test.mar",
        )
        .after(train_task)
        .set_display_name("Mar Pusher")
    )
    minio_config_upload = (
        minio_op(
            bucket_name="mlpipeline",
            folder_name=config_prop_path,
            input_path=train_task.outputs["checkpoint_dir"],
            filename="config.properties",
        )
        .after(train_task)
        .set_display_name("Conifg Pusher")
    )

    model_uri = str(model_uri)
    isvc_yaml = """
    apiVersion: "serving.kubeflow.org/v1beta1"
    kind: "InferenceService"
    metadata:
      name: {}
      namespace: {}
    spec:
      predictor:
        serviceAccountName: sa
        pytorch:
          storageUri: {}
          resources:
            limits:
              memory: 4Gi   
    """.format(
        deploy, namespace, model_uri
    )

    # For GPU inference use below yaml with gpu count and accelerator
    gpu_count = "1"
    accelerator = "nvidia-tesla-p4"
    isvc_gpu_yaml = """
    apiVersion: "serving.kubeflow.org/v1beta1"
    kind: "InferenceService"
    metadata:
      name: {}
      namespace: {}
    spec:
      predictor:
        serviceAccountName: sa
        pytorch:
          storageUri: {}
          resources:
            limits:
              memory: 4Gi   
              nvidia.com/gpu: {}
          nodeSelector:
            cloud.google.com/gke-accelerator: {}
""".format(
        deploy, namespace, model_uri, gpu_count, accelerator
    )
    # Update inferenceservice_yaml for GPU inference
    deploy_task = (
        deploy_op(action="apply", inferenceservice_yaml=isvc_yaml)
        .after(minio_mar_upload)
        .set_display_name("Deployer")
    )

    dsl.get_pipeline_conf().add_op_transformer(
        use_k8s_secret(
            secret_name="mlpipeline-minio-artifact",
            k8s_secret_key_to_env={
                "secretkey": "MINIO_SECRET_KEY",
                "accesskey": "MINIO_ACCESS_KEY",
            },
        )
    )


In [22]:
# Compile pipeline
compiler.Compiler().compile(pytorch_bert, 'pytorch.tar.gz', type_check=True)

In [23]:
# Execute pipeline
run = client.run_pipeline(my_experiment.id, 'pytorch-bert', 'pytorch.tar.gz')

## Wait for inference service below to go to `READY True` state.

In [1]:
!kubectl get isvc $DEPLOY

NAME         URL                                                      READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                 AGE
bertserve    http://bertserve.kubeflow-user-example-com.example.com   True           100                              bertserve-predictor-default-f5s8c   6m26s
torchserve                                                            False                                                                               173m


# Get Inferenceservice name

In [8]:
INFERENCE_SERVICE_LIST = ! kubectl get isvc $DEPLOY_NAME -n $NAMESPACE -o json | jq .status.url | tr -d '"'| cut -d "/" -f 3
INFERENCE_SERVICE_NAME = INFERENCE_SERVICE_LIST[0]
INFERENCE_SERVICE_NAME

'/bin/bash: jq: command not found'

# Prediction Request

In [24]:
!curl -v -H "Host: $INFERENCE_SERVICE_NAME" -H "Cookie: $COOKIE" "$INGRESS_GATEWAY/v1/models/$MODEL_NAME:predict" -d @./bert/sample.txt > bert_prediction_output.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 10.100.251.14:80...
* TCP_NODELAY set
* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.251.14) port 80 (#0)
> POST /v1/models/bert:predict HTTP/1.1
> Host: bertserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.68.0
> Accept: */*
> Cookie: authservice_session=MTYyMzEzOTA4M3xOd3dBTkVNeU4xQTBVek5WUVVoYVVra3pNalEzU2paYVEwSTBRVU5FUkZSUFJFWkNRVTVMTTFreVZFVkNSRWhTTnpKR1UwNVdWRkU9fGKXic0yyJFpWhuQhq9LjvsTTQaBS_TnwSJfF4kSKCgE
> Content-Length: 84
> Content-Type: application/x-www-form-urlencoded
> 
} [84 bytes data]
* upload completely sent off: 84 out of 84 bytes
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-length: 33
< content-type: application/json; charset=UTF-8
< date: Tue, 08 Jun 202

In [25]:
! cat bert_prediction_output.json

{"predictions": ["\"Sci/Tech\""]}

# Explanation Request

In [17]:
!curl -v -H "Host: $INFERENCE_SERVICE_NAME" -H "Cookie: $COOKIE" "$INGRESS_GATEWAY/v1/models/$MODEL_NAME:explain" -d @./bert/sample.txt  > bert_explaination_output.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 10.100.251.14:80...
* TCP_NODELAY set
* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.251.14) port 80 (#0)
> POST /v1/models/bert:explain HTTP/1.1
> Host: bertserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.68.0
> Accept: */*
> Cookie: authservice_session=MTYyMzEzOTA4M3xOd3dBTkVNeU4xQTBVek5WUVVoYVVra3pNalEzU2paYVEwSTBRVU5FUkZSUFJFWkNRVTVMTTFreVZFVkNSRWhTTnpKR1UwNVdWRkU9fGKXic0yyJFpWhuQhq9LjvsTTQaBS_TnwSJfF4kSKCgE
> Content-Length: 84
> Content-Type: application/x-www-form-urlencoded
> 
} [84 bytes data]
* upload completely sent off: 84 out of 84 bytes
100    84    0     0  100    84      0      1  0:01:24  0:00:44  0:00:40     0* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-leng

In [18]:
! cat bert_explaination_output.json

{"explanations": [{"words": ["[CLS]", "bloomberg", "has", "reported", "on", "the", "economy", "[SEP]"], "importances": [0.49803253502586686, -0.042289041470624116, -0.2269101439114476, 0.15573707990586028, 0.0867725310070807, 0.17919607383818434, 0.5255456841947312, -0.5988271940782108], "delta": 0.12081503337965546}]}

In [21]:
explanations_json = json.loads(open("./bert_explaination_output.json", "r").read())
explanations_json

{'explanations': [{'words': ['[CLS]',
    'bloomberg',
    'has',
    'reported',
    'on',
    'the',
    'economy',
    '[SEP]'],
   'importances': [0.49803253502586686,
    -0.042289041470624116,
    -0.2269101439114476,
    0.15573707990586028,
    0.0867725310070807,
    0.17919607383818434,
    0.5255456841947312,
    -0.5988271940782108],
   'delta': 0.12081503337965546}]}

In [26]:
prediction_json = json.loads(open("./bert_prediction_output.json", "r").read())

In [91]:
import torch
attributions = explanations_json["explanations"][0]['importances']
tokens = explanations_json["explanations"][0]['words']
delta = explanations_json["explanations"][0]['delta']

attributions = torch.tensor(attributions)
pred_prob = 0.75
pred_class = prediction_json["predictions"][0]
true_class = "Business"
attr_class ="world"

In [92]:
from captum.attr import visualization
vis_data_records =[]
vis_data_records.append(visualization.VisualizationDataRecord(
                            attributions,
                            pred_prob,
                            pred_class,
                            true_class,
                            attr_class,
                            attributions.sum(),       
                            tokens,
                            delta))

In [93]:
visualization.visualize_text(vis_data_records)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Business,"""Sci/Tech"" (0.75)",world,0.58,[CLS] bloomberg has reported on the economy [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Business,"""Sci/Tech"" (0.75)",world,0.58,[CLS] bloomberg has reported on the economy [SEP]
,,,,


## Cleanup Script

In [56]:
! kubectl delete --all isvc -n $NAMESPACE

In [None]:
! kubectl delete pod --field-selector=status.phase==Succeeded -n $NAMESPACE