In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Gemma (Deployment)

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
Open in Vertex AI Workbench
    </a> (A Python-3 CPU notebook is recommended)
  </td>
</table>

## Overview

This notebook demonstrates deploying Gemma models
 * on TPU using **Hex-LLM**, a **H**igh-**E**fficiency **L**arge **L**anguage **M**odel serving solution built with **XLA** that is being developed by Google Cloud, and
 * on GPU using [vLLM](https://github.com/vllm-project/vllm), the state-of-the-art open source LLM serving solution on GPU.

This notebook also showcases how to use the [Text moderation API](https://cloud.google.com/natural-language/docs/moderating-text) to analyze model predictions against a predefined list of safety attributes.


### Objective

- Deploy Gemma with Hex-LLM on TPU
- Deploy Gemma with [vLLM](https://github.com/vllm-project/vllm) on GPU

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage
* Cloud NL APIs

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing) and [Cloud NL API pricing](https://cloud.google.com/natural-language/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

### Install dependencies

Run the following commands to install dependencies.

In [20]:
!pip install --upgrade google-cloud-aiplatform

In [20]:
!pip3 install --upgrade google-cloud-aiplatform
!pip3 install ipython pandas[output_formatting] google-cloud-language==2.10.0

# Restart the notebook kernel after installing dependencies.
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.47.0-py2.py3-none-any.whl.metadata (30 kB)
Downloading google_cloud_aiplatform-1.47.0-py2.py3-none-any.whl (4.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: google-cloud-aiplatform
  Attempting uninstall: google-cloud-aiplatform
    Found existing installation: google-cloud-aiplatform 1.46.0
    Uninstalling google-cloud-aiplatform-1.46.0:
      Successfully uninstalled google-cloud-aiplatform-1.46.0
Successfully installed google-cloud-aiplatform-1.47.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




{'status': 'ok', 'restart': True}

### Colab only
Run the following commands for Colab and skip this section if you are using Workbench or Colab Enterprise.

In [None]:
# import sys

# if "google.colab" in sys.modules:
#     from google.colab import auth as google_auth

#     google_auth.authenticate_user()

### Setup Google Cloud project

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable the Vertex AI API, Compute Engine API and Cloud Natural Language API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component,language.googleapis.com).

1. [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs.

1. [Create a service account](https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console) with `Vertex AI User` and `Storage Object Admin` roles for deploying fine tuned model to Vertex AI endpoint.

### Request for TPU quota

By default, the quota for TPU deployment `Custom model serving TPU v5e cores per region` is 0. If you would like to use Hex-LLM TPU deployment, please request TPU quota following the instructions at ["Request a higher quota"](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota) if you haven't done so already. Please note that vLLM GPU deployment does not need this step.

### Import the necessary packages

In [3]:
import os
import sys
from datetime import datetime
from typing import Tuple

import pandas as pd
from google.cloud import aiplatform, language

Set the following variables for the experiment environment. The specified Cloud Storage bucket (`BUCKET_URI`) should be located in the specified region (`REGION`). Note that a multi-region bucket (eg. "us") is not considered a match for a single region covered by the multi-region range (eg. "us-central1").

In [4]:
# Cloud project ID.
PROJECT_ID = "1024619852873"  # @param {type:"string"}

# Region for launching jobs.
# TPU deployment is only supported in us-west1.
REGION = "us-central1"  # @param {type:"string"}

# Cloud Storage bucket for storing experiment outputs.
# Start with gs:// prefix, e.g. gs://foo_bucket.
BUCKET_URI = "gs://t1sbucket"  # @param {type:"string"}
assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "gemma")

# Initialize Vertex AI API.
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# The service account looks like:
# '@.iam.gserviceaccount.com'
# Please visit https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# and create a service account with `Vertex AI User` and `Storage Object Admin` roles.
SERVICE_ACCOUNT = "1024619852873-compute@developer.gserviceaccount.com"  # @param {type:"string"}

# Provision GCS bucket permissions to the SERVICE_ACCOUNT.
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

! gcloud config set project $PROJECT_ID
! gcloud services enable language.googleapis.com

# @markdown ### Access Gemma Models

# @markdown #### Hex-LLM TPU deployment
# @markdown Kaggle credentials required for downloading Kaggle weights for Hex-LLM TPU deployment. The credentials are not needed if you have the PyTorch model checkpoint and tokenizer in a GCS bucket.
# @markdown Generate the Kaggle username and key by following [these instructions](https://github.com/Kaggle/kaggle-api?tab=readme-ov-file#api-credentials).
# @markdown You will need to review and accept the model license.
#KAGGLE_USERNAME = ""  # @param {type:"string", isTemplate:true}
#KAGGLE_KEY = ""  # @param {type:"string", isTemplate:true}

# @markdown ---

# @markdown #### vLLM GPU deployment
# @markdown If you already obtained access to Gemma models on [Hugging Face](https://huggingface.co/), you can load models from there.
# @markdown Alternatively, you can also load the original Gemma models for serving from Vertex AI after accepting the agreement.
# @markdown **Please only select and fill one of the two following sections.**
#LOAD_MODEL_FROM = "Hugging Face"  # @param ["Hugging Face", "Google Cloud"] {isTemplate:true}

# @markdown ##### Access Gemma models on HuggingFace
# @markdown You must provide a Hugging Face User Access Token (read) to access the Gemma models. You can follow the [Hugging Face documentation](https://huggingface.co/docs/hub/en/security-tokens) to create a **read** access token and put it in the `HF_TOKEN` field below.
#HF_TOKEN = ""  # @param {type:"string", isTemplate:true}
#if LOAD_MODEL_FROM == "Hugging Face":
#    assert (
#        HF_TOKEN
#    ), "Please provide a read HF_TOKEN to load models from Hugging Face, or select a different model source."


# @markdown *--- Or ---*
# @markdown ##### Access Gemma models on Vertex AI
# @markdown Accept the model agreement to access the models:
# @markdown 1. Open the [Gemma model card](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/335) from [Vertex AI Model Garden](https://cloud.google.com/model-garden).
# @markdown 2. Review the agreement on the model card page.
# @markdown 3. After accepting the agreement of Gemma, a `https://` link containing Gemma pretrained and finetuned models will be shared.
# @markdown 4. Paste the link in the `VERTEX_MODEL_GARDEN_GEMMA` field below.
# @markdown **Note:** This will unzip and copy the Gemma model artifacts to your Cloud Storage bucket, which will take around 1 hour.

#"https://storage.googleapis.com/vertex-ai/generative-ai/model-garden/gemma.tar.gz?GoogleAccessId=service-689411112969@gcp-sa-aiplatform.iam.gserviceaccount.com&Expires=1712193008&Signature=WPShl1bPFSS6QWgM7XwD9RPvVWiOGhHx%252ByrJK17NwDX6EyTBNJTxb9pUofAieaJa4yhSKN3qSD3fgh7%252FckX1ev9fqQl%252BE7dZyP%252FtkB0ZSqyxEWnAyJcuvOP8CCkiknLT19MoaIbLI5Llk2%252B7KW9YXe%252Bb4i2sdbqKjqqvC5V%252BX65udHRVajRhkv%252B0McHo8pqMxC8i8Rawf%252BPQR7xM96il6thA4tGdRcAIodW4I%252BF%252Fp0X9ngQRFUkcFdywZkWaj7H67SKcYy3E8xhSnHuxbHxnBR9%252FjlBMaqHkhLJ%252Fm29kshrxfFT6naHpkvGA6C5nAzwmMDCuAc83BaqXmTAklQm%252FYA%253D%253D"
#"https://storage.googleapis.com/vertex-ai/generative-ai/model-garden/gemma.tar.gz?GoogleAccessId=service-689411112969@gcp-sa-aiplatform.iam.gserviceaccount.com&amp;Expires=1712372138&amp;Signature=mFTPRJcLNCnDKXSrQ539d0AedHFjaNxGNXbg7%252Ff9qjcU1ovND%252Bkp1TKl9aAQpToag5LlUyzq7iYMRvcJgICd8HCoKR2C1F8DwdO7O6BsfvN6F4GDdqU8D8KmGvvB503qPX41LOfu4zAr8o4jtROw09DSwTFpAEiCuxdUW45hjMcCx4XLZ3mTk%252FXdk0LIZiHArXOWxdRAnTJEdqFO8mDDCab7%252FLcWzuSYu6GpWeH%252FzI0PjXO3Q60fQ4VvfcwEaF651%252FEBeJ8U8TTLYeJTxDKn1poCk8ILsAo1u9S7XCJOjIcyBxaohTCMIbaSijCJZR%252BChswLHMXos9XVyiJCD3XGyg%253D%253D"


VERTEX_MODEL_GARDEN_GEMMA = "https://storage.googleapis.com/vertex-ai/generative-ai/model-garden/gemma.tar.gz?GoogleAccessId=service-689411112969@gcp-sa-aiplatform.iam.gserviceaccount.com&Expires=1712193008&Signature=WPShl1bPFSS6QWgM7XwD9RPvVWiOGhHx%252ByrJK17NwDX6EyTBNJTxb9pUofAieaJa4yhSKN3qSD3fgh7%252FckX1ev9fqQl%252BE7dZyP%252FtkB0ZSqyxEWnAyJcuvOP8CCkiknLT19MoaIbLI5Llk2%252B7KW9YXe%252Bb4i2sdbqKjqqvC5V%252BX65udHRVajRhkv%252B0McHo8pqMxC8i8Rawf%252BPQR7xM96il6thA4tGdRcAIodW4I%252BF%252Fp0X9ngQRFUkcFdywZkWaj7H67SKcYy3E8xhSnHuxbHxnBR9%252FjlBMaqHkhLJ%252Fm29kshrxfFT6naHpkvGA6C5nAzwmMDCuAc83BaqXmTAklQm%252FYA%253D%253D"
  # @param {type:"string", isTemplate:true}

    
    
# Duplicating below, but removing the 'if' AND just using the url directly 
# ----------------------------------------------------------------------------------
# LOAD_MODEL_FROM = ""    
# if LOAD_MODEL_FROM == "Google Cloud":
#     assert (
#         VERTEX_MODEL_GARDEN_GEMMA
#     ), "Please click the agreement of Gemma in Vertex AI Model Garden, and get the URL to Gemma model artifacts."

#     # Only use the last part in case a full command is pasted.
#     signed_url = VERTEX_MODEL_GARDEN_GEMMA.split(" ")[-1].strip('"')

#     ! mkdir -p ./gemma
#     ! curl -X GET "{signed_url}" | tar -xzvf - -C ./gemma/
#     ! gsutil -m cp -R ./gemma/* {MODEL_BUCKET}

#     model_path_prefix = MODEL_BUCKET
# else:
#     model_path_prefix = "google/"
# ----------------------------------------------------------------------------------


#if LOAD_MODEL_FROM == "Google Cloud":
assert (
    VERTEX_MODEL_GARDEN_GEMMA
), "Please click the agreement of Gemma in Vertex AI Model Garden, and get the URL to Gemma model artifacts."

# Only use the last part in case a full command is pasted.
#signed_url = VERTEX_MODEL_GARDEN_GEMMA.split(" ")[-1].strip('"')
signed_url = "https://storage.googleapis.com/vertex-ai/generative-ai/model-garden/gemma.tar.gz?GoogleAccessId=service-689411112969@gcp-sa-aiplatform.iam.gserviceaccount.com&amp;Expires=1712372138&amp;Signature=mFTPRJcLNCnDKXSrQ539d0AedHFjaNxGNXbg7%252Ff9qjcU1ovND%252Bkp1TKl9aAQpToag5LlUyzq7iYMRvcJgICd8HCoKR2C1F8DwdO7O6BsfvN6F4GDdqU8D8KmGvvB503qPX41LOfu4zAr8o4jtROw09DSwTFpAEiCuxdUW45hjMcCx4XLZ3mTk%252FXdk0LIZiHArXOWxdRAnTJEdqFO8mDDCab7%252FLcWzuSYu6GpWeH%252FzI0PjXO3Q60fQ4VvfcwEaF651%252FEBeJ8U8TTLYeJTxDKn1poCk8ILsAo1u9S7XCJOjIcyBxaohTCMIbaSijCJZR%252BChswLHMXos9XVyiJCD3XGyg%253D%253D"



! mkdir -p ./gemma
! curl -X GET "{signed_url}" | tar -xzvf - -C ./gemma/
! gsutil -m cp -R ./gemma/* {MODEL_BUCKET}

model_path_prefix = MODEL_BUCKET

No changes made to gs://t1sbucket/
Updated property [core/project].
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0gemma/
gemma/gemma-2b-it/
gemma/gemma-2b-it/config.json
gemma/gemma-2b-it/tokenizer.model
gemma/gemma-2b-it/generation_config.json
gemma/gemma-2b-it/model.safetensors.index.json
gemma/gemma-2b-it/model-00001-of-00002.safetensors
  0 32.7G    0  146M    0     0  42.4M      0  0:13:08  0:00:03  0:13:05 42.4M^C
Copying file://./gemma/gemma/gemma-7b-it/README.md [Content-Type=text/markdown]...
Copying file://./gemma/gemma/gemma-7b-it/special_tokens_map.json [Content-Type=application/json]...
Copying file://./gemma/gemma/gemma-7b-it/tokenizer.model [Content-Type=application/octet-stream]...
Copying file://./gemma/gemma/gemma-7b-it/config.json [Content-Type=application/json]...
Copying file://

### Define docker images

In [6]:
# Serving docker images.
#HEXLLM_DOCKER_URI = "us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20240220_0936_RC01"
VLLM_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20240220_0936_RC01"

### Define common functions

In [7]:
def get_job_name_with_datetime(prefix: str) -> str:
    """Gets the job name with date time when triggering deployment jobs."""
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def deploy_model_hexllm(
    model_name: str,
    model_id: str,
    service_account: str,
    machine_type: str = "ct5lp-hightpu-1t",
    max_num_batched_tokens: int = 11264,
    tokens_pad_multiple: int = 1024,
    seqs_pad_multiple: int = 32,
) -> Tuple[aiplatform.Model, aiplatform.Endpoint]:
    """Deploys models with Hex-LLM on TPU in Vertex AI."""
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-endpoint")

    num_tpu_chips = int(machine_type[-2])
    hexllm_args = [
        "--host=0.0.0.0",
        "--port=7080",
        "--log_level=INFO",
        f"--model={model_id}",
        f"--tensor_parallel_size={num_tpu_chips}",
        "--num_nodes=1",
        "--use_ray",
        "--batch_mode=continuous",
        f"--max_num_batched_tokens={max_num_batched_tokens}",
        f"--tokens_pad_multiple={tokens_pad_multiple}",
        f"--seqs_pad_multiple={seqs_pad_multiple}",
    ]

    env_vars = {
        "MODEL_ID": model_id,
        "PJRT_DEVICE": "TPU",
        "RAY_DEDUP_LOGS": "0",
        "RAY_USAGE_STATS_ENABLED": "0",
    }
    if KAGGLE_USERNAME and KAGGLE_KEY:
        env_vars["KAGGLE_USERNAME"] = KAGGLE_USERNAME
        env_vars["KAGGLE_KEY"] = KAGGLE_KEY

    model = aiplatform.Model.upload(
        display_name=model_name,
        serving_container_image_uri=HEXLLM_DOCKER_URI,
        serving_container_command=["python", "-m", "hex_llm.entrypoints.api_server"],
        serving_container_args=hexllm_args,
        serving_container_ports=[7080],
        serving_container_predict_route="/generate",
        serving_container_health_route="/ping",
        serving_container_environment_variables=env_vars,
        serving_container_shared_memory_size_mb=(16 * 1024),  # 16 GB
        serving_container_deployment_timeout=7200,
    )

    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        deploy_request_timeout=1800,
        service_account=service_account,
    )
    return model, endpoint


def deploy_model_vllm(
    model_name: str,
    model_id: str,
    service_account: str,
    machine_type: str = "g2-standard-12",
    accelerator_type: str = "NVIDIA_L4",
    accelerator_count: int = 1,
    max_model_len: int = 8192,
    dtype: str = "bfloat16",
) -> Tuple[aiplatform.Model, aiplatform.Endpoint]:
    """Deploys models with vLLM on GPU in Vertex AI."""
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-endpoint")

    vllm_args = [
        "--host=0.0.0.0",
        "--port=7080",
        f"--model={model_id}",
        f"--tensor-parallel-size={accelerator_count}",
        "--swap-space=16",
        "--gpu-memory-utilization=0.9",
        f"--max-model-len={max_model_len}",
        f"--dtype={dtype}",
        "--disable-log-stats",
    ]

    env_vars = {
        "MODEL_ID": model_id,
    }
    # if HF_TOKEN:
    #     env_vars["HF_TOKEN"] = HF_TOKEN

    model = aiplatform.Model.upload(
        display_name=model_name,
        serving_container_image_uri=VLLM_DOCKER_URI,
        serving_container_command=["python", "-m", "vllm.entrypoints.api_server"],
        serving_container_args=vllm_args,
        serving_container_ports=[7080],
        serving_container_predict_route="/generate",
        serving_container_health_route="/ping",
        serving_container_environment_variables=env_vars,
        serving_container_shared_memory_size_mb=(16 * 1024),  # 16 GB
        serving_container_deployment_timeout=7200,
    )

    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        service_account=service_account,
    )
    return model, endpoint


def moderate_text(text: str) -> language.ModerateTextResponse:
    """Performs text moderation using Vertex AI."""
    client = language.LanguageServiceClient()
    document = language.Document(
        content=text,
        type_=language.Document.Type.PLAIN_TEXT,
    )
    return client.moderate_text(document=document)


def show_text_moderation(
    text: str,
    response: language.ModerateTextResponse,
) -> None:
    """Shows text moderation results."""

    def confidence(category: language.ClassificationCategory) -> float:
        return category.confidence

    categories = sorted(
        response.moderation_categories,
        key=confidence,
        reverse=True,
    )
    data = ((category.name, category.confidence) for category in categories)
    df = pd.DataFrame(columns=["category", "confidence"], data=data)

    print(f"Text analyzed:\n{text}\n")
    print("Text moderation results:")
    print(df.to_markdown(index=False, tablefmt="presto", floatfmt=".0%"))

## Deploy Gemma models with Hex-LLM on TPU and apply Google Cloud Text Moderation

**Hex-LLM** is a **H**igh-**E**fficiency **L**arge **L**anguage **M**odel (LLM) TPU serving solution built with **XLA**, which is being developed by Google Cloud.

To request TPU quota, please follow the instructions at [Request a higher quota](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota).

Set the model ID. Model weights can be loaded from Kaggle [google/gemma](https://www.kaggle.com/models/google/gemma/frameworks/pyTorch) or from a GCS bucket.

In [None]:
# # Select one of the four model variations.
# MODEL_ID = "google/gemma-2b-it"  # @param ["google/gemma-2b", "google/gemma-2b-it", "google/gemma-7b", "google/gemma-7b-it"]

# # Alternatively, you can specify a GCS folder that contains the original or
# # customized PyTorch model checkpoint and tokenizer. In this case, the GCS
# # folder is expected to contain "gemma-2b" or "gemma-7b" in either the GCS
# # folder path or the name of the model checkpoint file. The model checkpoint
# # file is expected to have the suffix ".ckpt" and the tokenizer file is
# # is expected to have the name "tokenizer.model" (same as the Kaggle files).
# # An example structure for the GCS folder gs://my-deployment-bucket/pytorch is:
# # - gs://my-deployment-bucket/pytorch-files/gemma-2b.ckpt
# # - gs://my-deployment-bucket/pytorch-files/tokenizer.model
# # Specify the GCS folder below:
# # MODEL_ID = "gs://"  # @param {type:"string"}
# # import re
# # MODEL_BUCKET_URI = re.search("gs://(.*?)/", MODEL_ID).group()
# # ! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $MODEL_BUCKET_URI

In [None]:
# # Finds Vertex AI prediction TPUv5e machine types in
# # https://cloud.google.com/vertex-ai/docs/predictions/use-tpu#deploy_a_model.

# # More machines as the default one doesn't work
# #https://cloud.google.com/tpu/docs/tpus-in-gke
# if "2b" in MODEL_ID:
#     # Sets ct5lp-hightpu-1t (1 TPU chip) to deploy Gemma 2B models.
#     #machine_type = "ct5lp-hightpu-1t"
#     #machine_type = "ct5lp-hightpu-4t"
#     #machine_type = "ct5lp-hightpu-8t"
#     machine_type = "tpu-v4-16"
# else:
#     # Sets ct5lp-hightpu-4t (4 TPU chips) to deploy Gemma 7B models.
#     machine_type = "ct5lp-hightpu-4t"

# # Note that a larger max_num_batched_tokens will require more TPU memory.
# max_num_batched_tokens = 11264
# # Multiple of tokens for padding alignment. A higher value can reduce
# # re-compilation but can also increase the waste in computation.
# tokens_pad_multiple = 1024
# # Multiple of sequences for padding alignment. A higher value can reduce
# # re-compilation but can also increase the waste in computation.
# seqs_pad_multiple = 32

# model_hexllm, endpoint_hexllm = deploy_model_hexllm(
#     model_name=get_job_name_with_datetime(prefix="gemma-serve-hexllm"),
#     model_id=MODEL_ID,
#     service_account=SERVICE_ACCOUNT,
#     machine_type=machine_type,
#     max_num_batched_tokens=max_num_batched_tokens,
#     tokens_pad_multiple=tokens_pad_multiple,
#     seqs_pad_multiple=seqs_pad_multiple,
# )

Once deployment succeeds, you can send requests to the endpoint with text prompts. Note that the first few prompts will take longer to execute.

Example:

```
> What is a car?
> A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.
```

In [None]:
# #ct5lp-hightpu-1t
# #ct5lp-hightpu-4t
# # Loads an existing endpoint instance using the endpoint name:
# # - Using `endpoint_name = endpoint_hexllm.name` allows us to get the endpoint
# #   name of the endpoint `endpoint_hexllm` created in the cell above.
# # - Alternatively, you can set `endpoint_name = "1234567890123456789"` to load
# #   an existing endpoint with the ID 1234567890123456789.
# # You may uncomment the code below to load an existing endpoint:
# # endpoint_name = endpoint_without_peft.name
# # # endpoint_name = ""  # @param {type:"string"}
# # aip_endpoint_name = (
# #     f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{endpoint_name}"
# # )
# # endpoint_hexllm = aiplatform.Endpoint(aip_endpoint_name)

# instances = [
#     {
#         "prompt": "What is a car?",
#         "max_tokens": 50,
#         "temperature": 1.0,
#         "top_p": 1.0,
#         "top_k": 10,
#     },
# ]
# response = endpoint_hexllm.predict(instances=instances)

# prediction = response.predictions[0]
# print(prediction)

### Moderate model predictions

Text moderation analyzes a document against a list of safety attributes, which include "harmful categories" and topics that may be considered sensitive.

In [None]:
# # Sends a request to the text moderation API.
# response = moderate_text(prediction)
# # Shows text moderation results.
# show_text_moderation(prediction, response)

### Build chat applications with Gemma

You can build chat applications with the instruction finetuned Gemma models.

The instruction tuned Gemma models were trained with a specific formatter that annotates instruction tuning examples with extra information, both during training and inference. The annotations (1) indicate roles in a conversation, and (2) delineate tunes in a conversation. Below we show a sample code snippet for formatting the model prompt using the user and model chat templates for a multi-turn conversation. The relevant tokens are:
- `user`: user turn
- `model`: model turn
- `<start_of_turn>`: beginning of dialogue turn
- `<end_of_turn>`: end of dialogue turn

An example set of dialogues is:
```
<start_of_turn>user
knock knock<end_of_turn>
<start_of_turn>model
who is there<end_of_turn>
<start_of_turn>user
LaMDA<end_of_turn>
<start_of_turn>model
LaMDA who?<end_of_turn>
```
where `<end_of_turn>\n` is the turn separator and `<start_of_turn>model\n` is the prompt prefix. This means if we would like to prompt the model with a question like, `What is Cramer's Rule?`, we should use:
```
<start_of_turn>user
What is Cramer's Rule?<end_of_turn>
<start_of_turn>model
```

In [None]:
# # Chat templates.
# USER_CHAT_TEMPLATE = "<start_of_turn>user\n{prompt}<end_of_turn>\n"
# MODEL_CHAT_TEMPLATE = "<start_of_turn>model\n{prompt}<end_of_turn>\n"

# # Sample formatted prompt.
# prompt = (
#     USER_CHAT_TEMPLATE.format(prompt="What is a good place for travel in the US?")
#     + MODEL_CHAT_TEMPLATE.format(prompt="California.")
#     + USER_CHAT_TEMPLATE.format(prompt="What can I do in California?")
#     + "<start_of_turn>model\n"
# )
# print("Chat prompt:\n", prompt)

# instances = [
#     {
#         "prompt": "What is a car?",
#         "max_tokens": 50,
#         "temperature": 1.0,
#         "top_p": 1.0,
#         "top_k": 10,
#     },
# ]
# response = endpoint_hexllm.predict(instances=instances)

# prediction = response.predictions[0]
# print(prediction)

## Deploy Gemma models with vLLM on GPU

[vLLM](https://github.com/vllm-project/vllm) is a high-throughput GPU Large Language Model (LLM) serving library which implements a number of optimizations including paged attention and continuous batching.

Note that V100 GPUs generally offer better throughput and latency performance than L4 GPUs, while L4 GPUs are generally more cost efficient than V100 GPUs. The serving efficiency of L4, V100 and T4 GPUs is inferior to that of A100 GPUs, but L4, V100 and T4 GPUs are nevertheless good serving solutions if you do not have A100 quota.

Gemma model weights are stored in bfloat16 precision. L4 and A100 GPUs are needed for vLLM serving at bfloat16 precision. V100 and T4 GPUs can support vLLM serving at float32 and float16 precision, and they are also meaningful deployment configurations.

Set the model ID. Model weights will be loaded from HuggingFace.

In [8]:
MODEL_ID = "gemma-7b-it"  # @param ["gemma-2b", "gemma-2b-it", "gemma-7b", "gemma-7b-it"]
model_id = os.path.join(model_path_prefix, MODEL_ID)

In [12]:
#HF_TOKEN = "" 
# Finds Vertex AI prediction supported accelerators and regions in
# https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.

if "2b" in MODEL_ID:
    # Sets 1 L4 (24G) to deploy Gemma 2B models.
    machine_type = "g2-standard-8"
    accelerator_type = "NVIDIA_L4"
    accelerator_count = 1
    vllm_dtype = "bfloat16"
else:
    # Sets 1 L4 (24G) to deploy Gemma 7B models.
    machine_type = "g2-standard-12"
    accelerator_type = "NVIDIA_L4"
    accelerator_count = 1
    vllm_dtype = "bfloat16"

# Alternative hardware configurations:

# Sets 1 V100 (16G) to deploy Gemma 2B models.
# machine_type = "n1-standard-8"
# accelerator_type = "NVIDIA_TESLA_V100"
# accelerator_count = 1
# vllm_dtype = "float32"

# Sets 1 T4 (16G) to deploy Gemma 2B models.
# machine_type = "n1-standard-8"
# accelerator_type = "NVIDIA_TESLA_T4"
# accelerator_count = 1
# vllm_dtype = "float32"

# Sets 1 A100 (40G) to deploy Gemma 2B and Gemma 7B models.
# machine_type = "a2-highgpu-1g"
# accelerator_type = "NVIDIA_TESLA_A100"
# accelerator_count = 1
# vllm_dtype = "bfloat16"

# Note that a larger max_model_len will require more GPU memory.
max_model_len = 2048

model_vllm, endpoint_vllm = deploy_model_vllm(
    model_name=get_job_name_with_datetime(prefix="gemma-serve-vllm"),
    model_id=model_id,
    service_account=SERVICE_ACCOUNT,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
    max_model_len=max_model_len,
    dtype=vllm_dtype,
)

Creating Endpoint
Create Endpoint backing LRO: projects/1024619852873/locations/us-central1/endpoints/6631486679628644352/operations/4275288821001617408
Endpoint created. Resource name: projects/1024619852873/locations/us-central1/endpoints/6631486679628644352
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/1024619852873/locations/us-central1/endpoints/6631486679628644352')
Creating Model
Create Model backing LRO: projects/1024619852873/locations/us-central1/models/2031933772013764608/operations/2033059156524531712
Model created. Resource name: projects/1024619852873/locations/us-central1/models/2031933772013764608@1
To use this Model in another session:
model = aiplatform.Model('projects/1024619852873/locations/us-central1/models/2031933772013764608@1')
Deploying model to Endpoint : projects/1024619852873/locations/us-central1/endpoints/6631486679628644352
Deploy Endpoint model backing LRO: projects/1024619852873/locations/us-central1/endpoints/663148

Once deployment succeeds, you can send requests to the endpoint with text prompts. Sampling parameters supported by vLLM can be found [here](https://github.com/vllm-project/vllm/blob/2e8e49fce3775e7704d413b2f02da6d7c99525c9/vllm/sampling_params.py#L23-L64). Setting `raw_response` to `True` allows you to obtain raw outputs.

## POC (inferences from endpoint) --

In [13]:
# Loads an existing endpoint instance using the endpoint name:
# - Using `endpoint_name = endpoint_vllm.name` allows us to get the endpoint
#   name of the endpoint `endpoint_vllm` created in the cell above.
# - Alternatively, you can set `endpoint_name = "1234567890123456789"` to load
#   an existing endpoint with the ID 1234567890123456789.
# You may uncomment the code below to load an existing endpoint:
# endpoint_name = endpoint_without_peft.name
# # endpoint_name = ""  # @param {type:"string"}
# aip_endpoint_name = (
#     f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{endpoint_name}"
# )
# endpoint_vllm = aiplatform.Endpoint(aip_endpoint_name)

instances = [
    {
        "prompt": "What is a car?",
        "max_tokens": 50,
        "temperature": 1.0,
        "top_p": 1.0,
        "top_k": 10,
        "raw_response": False,
    },
]
response = endpoint_vllm.predict(instances=instances)

prediction = response.predictions[0]
print(prediction)

Prompt:
What is a car?
Output:
A car is a wheeled vehicle propelled by an engine that is used primarily for transportation on roads.


In [None]:
instances = [
    {
        "prompt": "How to win a taekwondo tournament?",
        "max_tokens": 50,
        "temperature": 1.0,
        "top_p": 1.0,
        "top_k": 10,
        "raw_response": False,
    },
]
response = endpoint_vllm.predict(instances=instances)

prediction = response.predictions[0]
print(prediction)

In [None]:
corpus_text = "John Cena aka the flying dutchman, was best known for his sporting career at made up FC (a lesser known league \
the australian football league). His career was cut short, when he was attacked by Gru and his minons who had mistakenly \
identified him as a super villan. Following the attack, John relocated his family to the southern most tip of jumaji, where \
they operataed a successfull turtle farm that made shell fish. GPU are really helpful for AI and Nvidia is making a stack of \
moola, with this whole AI thing that kimd of kicked off Nov22. Fish live in the sea and birds in the sky, but birds can also. \
go in the sea. Billy bob was a young man and had 7 cats, 4 hats, 2 bats and often enjoyed sitting on mats. Jenny was a business woman \
who sold fish. On a good day Jenny could make $100 selling fish. Randys 4 favourite pizza toppings are lamb, apple, steak & chilly"

#question = "who is John Cena"
#question = "what is John Cena known for"
#question = "did Billy have any cats?"
#question = "how much money did Jenny make?"
question = "What were Randys favourite pizza topings?"

#prompt = corpus_text + "\n" + question
#prompt = f"Directly answer the question using only information from the provided text:\n{question}\n{corpus_text}"
#prompt = f"directly answer the {question}, using only information provided in:\n{corpus_text}.\
#           do not include information unrelated to the question the answer"
prompt = f"Answer the {question} using {corpus_text}"

# prompt = f"""Answer the question as precise as possible using the provided context. If the answer is
#               not contained in the context, say "answer not available in context" \n\n
#             Context: \n {corpus_text}?\n
#             Question: \n {question} \n
#             Answer:
#           """
           
#prompt = f"Answer the {question} using only information from {corpus_text}"


instances = [
    {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.15,
        "top_p": 1.0,
        "top_k": 10,
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)


prediction = response.predictions[0]
output_index = prediction.find("Output:")


# ------ Shouldn't need any of the below, but using it to fix format of answer atm
# Check if "Output" was found in the text
if output_index != -1:
    # Extract the text after "Output" (excluding "Output" itself)
    text_after_output = prediction[output_index + len("Output:"):]
    
import re
def clean_text(text):
    # Remove leading whitespace (spaces, tabs, newlines)
    text = text.lstrip()
    # Remove any non-alphanumeric characters and spaces at the beginning of the text
    text = re.sub(r"^[^\w\s]+", "", text)
    # Remove any blank lines at the beginning
    text = re.sub(r"^\s*\n+", "", text)
    return text

# Example usage
text = text_after_output
cleaned_text = clean_text(text)
print(cleaned_text)  # Output: This is some text

In [None]:
context = "Brenda adjusted her phone, frustration simmering. The bank's app was frozen again.\
It had been happening more and more lately, leaving her scrambling to pay bills on time. Brenda, a freelance graphic designer, relied on a steady\
flow of payments. This digital hiccup could mean late fees and a tarnished reputation with clients. Fuming, Brenda called the customer service \
number. After navigating a labyrinth of automated prompts, she finally reached a representative named Kevin. His voice, though polite, lacked \
any real empathy for her predicament. He aplogiesed and suggested restarting the app, a solution Brenda had tried countless times. \
The call ended with Kevin promising a technician would look into it, a vague statement that offered little comfort. Disheartened, Brenda decided\
to visit her local branch. The once familiar space felt sterile and unwelcoming. Gone were the friendly faces she recognized; replaced by sleek \
terminals and impatient security guards. After a long wait, she finally spoke with a young teller who, while courteous, seemed utterly powerless\
to help. These app issues are beyond our control the teller mumbled, his eyes glued to his computer screen."

#question = "why was Brenda frustrated?"
question = "why was Brenda frustrated?"

prompt = f"Answer the {question} using {context}"

# prompt = f"""Answer the question as precise as possible using the provided context. If the answer is
#               not contained in the context, say "answer not available in context" \n\n
#             Context: \n {corpus_text}?\n
#             Question: \n {question} \n
#             Answer:
#           """
           
#prompt = f"Answer the {question} using only information from {corpus_text}"


instances = [
    {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.15,
        "top_p": 1.0,
        "top_k": 10,
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)


prediction = response.predictions[0]
print(prediction)
#output_index = prediction.find("Output:")


# ------ Shouldn't need any of the below, but using it to fix format of answer atm
# # Check if "Output" was found in the text
# if output_index != -1:
#     # Extract the text after "Output" (excluding "Output" itself)
#     text_after_output = prediction[output_index + len("Output:"):]
    
# import re
# def clean_text(text):
#     # Remove leading whitespace (spaces, tabs, newlines)
#     text = text.lstrip()
#     # Remove any non-alphanumeric characters and spaces at the beginning of the text
#     text = re.sub(r"^[^\w\s]+", "", text)
#     # Remove any blank lines at the beginning
#     text = re.sub(r"^\s*\n+", "", text)
#     return text

# # Example usage
# text = text_after_output
# cleaned_text = clean_text(text)
# print(cleaned_text)  # Output: This is some text

## RAG

#### - Lib

In [None]:
# --- Install lib
#!pip3 install --upgrade google-cloud-aiplatform
#!pip3 install ipython pandas[output_formatting] google-cloud-language==2.10.0
#!pip install langchain
#!pip install PyPDF2
#!pip install pypdf
#!pip install sentence_transformers
#!pip install torch
#!pip install pandas
#!pip install pdfplumber

#### - RAG (POC)

In [None]:
import langchain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
import PyPDF2


# Load the PDF
loader = PyPDFLoader("/home/jupyter/cba.pdf")
#loader = PyPDFDirectoryLoader("/home/jupyter/imported/cba.pdf")
#text = loader.load()

docs = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=75))

data = []

for doc in docs:
    data.append({
        'page_content': doc.page_content,
        'metadata': doc.metadata
    })
df = pd.DataFrame(data)
df['len'] = df['page_content'].str.len()

df1 = df.copy()
df1['page_content'] = df1['page_content'].str.replace('\n', '')
df2 = pd.json_normalize(df1.metadata)
df1 = pd.concat([df1, df2], axis=1)
#df1['source'] = df1['source'].str.replace('/home/jupyter/imported/', '')
df1.drop(['metadata'], axis=1, inplace=True)
display(df1.head())

In [None]:
extracted_value = df1.iloc[0, 0]  # Using iloc for indexing by position
print(extracted_value)

In [None]:
context = extracted_value

#question = "is interest paid?"
#question = "what is the target market determination?"
question = "how do i make a decision about this product?"

#prompt = f"Answer the {question} using {context}"
#prompt = f"Answer the {question} using content from this {context}. \
#         Do NOT print the prompt or question. ONLY print the actual final answer"

prompt = f"Answer the {question} using content from this {context}.\
         [INST]Directly & concisley answer the question, without any preamble[/INST]"

# prompt = f"Answer the {question} as precise as possible using content from this {context}.\
#          If the answer is not contained in the context, say '"'answer not available in context'"'\
#          [INST]Directly & concisley answer the question, without any preamble[/INST]"


# prompt = f"""Answer the question as precise as possible using the provided context. If the answer is
#               not contained in the context, say "answer not available in context" \n\n
#             Context: \n {corpus_text}?\n
#             Question: \n {question} \n
#             Answer:
#           """
           
#prompt = f"Answer the {question} using only information from {corpus_text}"


instances = [
    {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.15,
        "top_p": 1.0,
        "top_k": 10,
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)


prediction = response.predictions[0]
print(prediction)


In [None]:
# As per above, but using new prompt from here: https://github.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain.ipynb


context = extracted_value

#question = "what is the target market determination?"
question = "how do i make a decision about this product?"

prompt = """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context, say "answer not available in context" \n\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                 """
instances = [
    {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.15,
        "top_p": 1.0,
        "top_k": 10,
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)


prediction = response.predictions[0]
print(prediction)


#### - RAG (WIP)

### V1: the pilot RAG. Set up and running
Works well, but saw it had an issue returning results from tables within the PDF. So experimented with new doc ingestion that had improved results. Will make a V2 with the new ingestion set up properly to compare with this

In [None]:
#!pip install pdfplumber
# -- New ingestion (pilot format)
# (this was the pilot way of using this (2 cells below) is a more standard way of using)
# Can see the text here is in different format to the original doc ingestion
import os as os
print(os.getcwd())
os.chdir('/home/jupyter/pdf/pdftable')

import pdfplumber

# Open the PDF document
with open("bendigo-payment-facilities-terms-conditions.pdf", "rb") as pdf_file:
    pdf = pdfplumber.open(pdf_file)

# # Check the number of pages (optional)
    num_pages = len(pdf.pages)
    print("Number of pages in the PDF:", num_pages)

# # Access content of a specific page (replace 2 with the desired page number)
    page_content = pdf.pages[3].extract_text()
    print("Content of page 3:", page_content[1200:1600])

In [None]:
# -- Current doc ingestion
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("bendigo-payment-facilities-terms-conditions.pdf")
doc = loader.load()[3]
#print(doc.page_content)
print("Content of page 3:", doc.page_content[1150:1400])

In [None]:
# -- New ingestion (in standard format)
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("bendigo-payment-facilities-terms-conditions.pdf")
data = loader.load()[3]
print("Content of page 3:", data.page_content[1200:1600])


In [None]:
# -- Below is current best path (above is looking for new ingestion approaches

In [None]:
# --- Ingest and chunk data
import langchain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
import PyPDF2
from langchain.document_loaders import PyPDFDirectoryLoader


# Load the PDF
#loader = PyPDFLoader("/home/jupyter/cba.pdf")
loader = PyPDFDirectoryLoader("/home/jupyter/pdf")
#text = loader.load()

docs = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50))

data = []

for doc in docs:
    data.append({
        'page_content': doc.page_content,
        'metadata': doc.metadata
    })
df = pd.DataFrame(data)
df['len'] = df['page_content'].str.len()

df1 = df.copy()
df1['page_content'] = df1['page_content'].str.replace('\n', '')
df2 = pd.json_normalize(df1.metadata)
df1 = pd.concat([df1, df2], axis=1)
df1['source'] = df1['source'].str.replace('/home/jupyter/pdf/', '')

df1['pagestr'] = df1['page'].astype(str)

df1['page_content_plus'] = df1['page_content'] + '[' + 'Source: ' + df1['source'] + ' page: ' + df1['pagestr'] + ']'

df1.drop(['metadata'], axis=1, inplace=True)
display(df1.head())

In [None]:
# Convert all text to lower case
df1['page_content'] = df1['page_content'].map(str.lower)
df1.head()

In [None]:
# --- Embedd and store (once RAG working, replace storage from df to cloudSQL)
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the Sentence Transformer model
model = SentenceTransformer("paraphrase-mpnet-base-v2")

# Embed the chunked text
embeddings = model.encode(df1['page_content'])

# Convert embedding vectors into one-dimensional arrays
one_dimensional_embeddings = []
for embedding in embeddings:
    one_dimensional_embedding = np.ravel(embedding)
    one_dimensional_embeddings.append(one_dimensional_embedding)

# Combine the original text and embeddings into a DataFrame
data = {
    "original_text": df1['page_content'],
    "embeddings": one_dimensional_embeddings
}

dfe = pd.DataFrame(data)
df3 = pd.concat([df1, dfe], axis=1)
df3.drop(['original_text'], axis=1, inplace=True)
display(df3.head())

In [None]:
# Substring to search for
substring = 'mastercard'

# Filter the DataFrame using boolean indexing
filtered_df = df3[df3['page_content'].str.contains(substring)]

# Print the filtered DataFrame
display(filtered_df)

In [None]:
# --- Embed question (once all running, convert this and most other code to functions (or a class))
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd

# Initialize the Sentence Transformer model
model = SentenceTransformer("paraphrase-mpnet-base-v2")

# -- CBA questions --
#question = "what are your distribution channels?"
#question = "does this document replace terms and conditions?"
#question = "can you list the elibibility criteria for the product?"
#question = "does the document talk about 'review triggers'?"
#question = "how are your brokers paid?"

# -- Ben questions --
#question = "can you list your Payment Facilities?" # Perfect answer
question = "what is the eligibility for the Easy Money Card?" # Failed to retrieve answer
#question = "what is the eligibility for the Bendigo Easy Money Card?" # Retrieves terrible results
#question = "can the Easy Money Card be used overseas?"


# Embed the question
embeddings = model.encode([question])

# Flatten the embedding list into a one-dimensional array
#ne_dimensional_embeddings = np.concatenate(embeddings).ravel()
one_dimensional_embeddings = np.ravel(embeddings)

# Convert array to a single-row dataframe
df_question = pd.DataFrame({'embeddings': [one_dimensional_embeddings]})

df_question['question'] = question
df_question = df_question[['question','embeddings']]
display(df_question)

In [None]:
# ---- Semantic search
import pandas as pd
from sentence_transformers import SentenceTransformer

# Load your pre-trained sentence transformer model
#model = SentenceTransformer('all-mpnet-base-v2')  # Replace with your desired model

# Access your dataframe with embedded text column
#df = pd.read_csv('your_dataframe.csv')  # Replace with your data source

# Question to embed
#question = "How to build a sentence embedding model?"

# Embed the question
question_embedding = model.encode(question, convert_to_tensor=True)

# Calculate cosine similarities using dot product
#similarities = df3['embeddings'].apply(lambda x: torch.dot(question_embedding, x))
similarities = df3['embeddings'].apply(lambda x: np.dot(question_embedding, x))


# Identify the 3 most similar rows
most_similar_indices = similarities.nlargest(4).index
most_similar_rows = df3.iloc[most_similar_indices]

display(most_similar_rows)

In [None]:
#cont = most_similar_rows['page_content_plus'].tolist()
cont = most_similar_rows['page_content'].tolist()
cont

In [None]:
# ---- Retrival and response
# -- Testing the new doc ingestion approach v the current... 'cont' = current --
context = cont
#context = page_content
# ------------------------

#question = "is interest paid?"
#question = "what is the target market determination?"
#question = "how do i make a decision about this product?"

#prompt = f"Answer the {question} using {context}"
#prompt = f"Answer the {question} using content from this {context}. \
#         Do NOT print the prompt or question. ONLY print the actual final answer"

# prompt = f"Answer the {question} using content from this {context}.\
#          [INST]Directly & concisley answer the question, without any preamble[/INST]"


# prompt source: https://www.kaggle.com/code/gpreda/exploring-eu-ai-act-with-gemma/
prompt = f"""
You are an AI Agent specialized to answer to questions about the context provided.
In order to create the answer, please only use the information from the
context provided (Context). Do not include other information.
Answer with simple words.
If needed, include also explanations.
Question: {question}
Context: {context}
Answer:
"""

# prompt = f"Answer the {question} as precise as possible using content from this {context}.\
#          If the answer is not contained in the context, say '"'answer not available in context'"'\
#          [INST]Directly & concisley answer the question, without any preamble[/INST]"


# prompt = f"""Answer the question as precise as possible using the provided context. If the answer is
#               not contained in the context, say "answer not available in context" \n\n
#             Context: \n {corpus_text}?\n
#             Question: \n {question} \n
#             Answer:
#           """
           
#prompt = f"Answer the {question} using only information from {corpus_text}"


instances = [
    {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.15,
        "top_p": 1.0,
        "top_k": 10,
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)


prediction = response.predictions[0]
print(prediction)

In [None]:
# -- Can i boost the performance of RAG (through advanced rag)
# Going to get LLM to reword question 3 times, then retrive, then compare retrived and pick the most frequent results as context.
prompt = f"""
You are an AI Agent specialized to answer to questions about the context provided.
In order to create the answer, please only use the information from the
context provided (Context). Do not include other information.
Answer with simple words.
Once the most answer as been located in the context, find the text contained in the '[]'
that immediately follows the relevant context, and include that in the answer
If needed, include also explanations.
Question: {question}
Context: {context}
Answer:
"""

instances = [
    {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.15,
        "top_p": 1.0,
        "top_k": 10,
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)

prediction = response.predictions[0]
print(prediction)

### V2: WIP
Does this prove the new doc ingestion approach performs better to V1?

A key learning. When using the data in below format, it gets the right answer for 'easy mc eli', but once this data is in a dataframe, 
gives the wrong answer. So, it may be less to do with the doc loader and MORE to do with how the data moves from ingestion to storage (df).
Suspect if the data in the df row was on seperate lines (not \), then it might work with either loader.

In [14]:
# The one version that works, uses the below approach (page_content), try to work that into bigger code 
## -- Validates new approach and data in different format to other loader. (only good for single doc)
os.chdir('/home/jupyter/pdf/')
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("bendigo-payment-facilities-terms-conditions.pdf")
data = loader.load()[3]
print("Content of page 3:", data.page_content[1200:1600])
test = data.page_content
#print(test)
test

Content of page 3: n connection with your Account.
The types of Debit Cards we offer are:
Debit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit
Mastercard Card Mastercard
Eligibility 16 years of age or older 12 years of age or older 12-15 years of age
Access Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is
accepted, including ATM accepted, including ATM accepted, including ATM
withdraw


'Fees and charges that apply to your Payment Facilities are set out in our Schedule of Fees, Charges\nand Transaction Account Rebates. We may debit applicable fees and charges to your Account\nwhenever they become payable.\nCopies of each of the documents described above are available on request and can be accessed on\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.\n2.2 ePayments Code\nWe will comply with the terms of the ePayments Code. Nothing in these Terms and Conditions\nimposes 

In [51]:
# Trialing new approach to preserve text format (not using)
import PyPDF2
import pdfplumber

# Open the PDF file
with open('bendigo-payment-facilities-terms-conditions.pdf', 'rb') as file:
    # Create a PDF reader object
    pdf_reader = PyPDF2.PdfReader(file)

    # Get the number of pages in the PDF
    num_pages = len(pdf_reader.pages)

    # Initialize an empty string to store the extracted text
    extracted_text = ''

    # Loop through each page and extract the text
    for page_num in range(num_pages):
        # Get the current page
        page = pdf_reader.pages[page_num]

        # Use pdfplumber to extract the text with formatting
        with pdfplumber.open('bendigo-payment-facilities-terms-conditions.pdf') as pdf:
            current_page = pdf.pages[page_num]
            page_text = current_page.extract_text(x_tolerance=1, y_tolerance=1)

        # Append the extracted text to the overall text
        extracted_text += page_text + '\n'

# Print the extracted text
print(extracted_text[3800:4800])
test3 = extracted_text[3800:4800]

sue to you or at your request or
allow you to use in connection with your Account.
The types of Debit Cards we offer are:
Debit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit
Mastercard Card Mastercard
Eligibility
16 years of age or older 12 years of age or older 12-15 years of age
Access
Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is
accepted, including ATM accepted, including ATM accepted, including ATM
withdrawals. withdrawals. withdrawals.
Overseas access
Overseas ATM access No overseas access Overseas ATM access
available at any ATM available. available at any ATM
bearing the Mastercard, bearing the Mastercard,
CIRRUS, or MAESTRO CIRRUS, or MAESTRO
symbol. symbol.
General / other
Restrictions apply to
criteria, features or
Transactions at certain
restrictions
types of merchants (see
‘Youth Debit Mastercard’
below).
Debit Cards are not available on all Accounts. See the terms and conditions for your Account to
determine whether a Debit Card may be 

In [54]:
# Updated to use plumber + character text splitter (now being trialed)
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pdfplumber
from langchain.document_loaders import PDFPlumberLoader

def load_pdfs_from_directory(directory_path):
    docs = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory_path, filename)
            loader = PDFPlumberLoader(file_path)
            loaded_docs = loader.load()
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=25)
            for doc in loaded_docs:
                texts = text_splitter.split_text(doc.page_content)
                for text in texts:
                    docs.append({
                        'page_content': text,
                        'metadata': doc.metadata
                    })
    return docs

directory_path = "/home/jupyter/pdf"
all_pdf_docs = load_pdfs_from_directory(directory_path)

data = all_pdf_docs
data
#df = pd.DataFrame(data)
#df.head()

[{'page_content': 'Bendigo Payment Facilities\nTerms and Conditions\n9 November 2023',
  'metadata': {'source': '/home/jupyter/pdf/bendigo-payment-facilities-terms-conditions.pdf',
   'file_path': '/home/jupyter/pdf/bendigo-payment-facilities-terms-conditions.pdf',
   'page': 0,
   'total_pages': 50,
   'Creator': 'Power PDF Create',
   'Producer': 'Power PDF Create',
   'CreationDate': "D:20231107131823+10'00'",
   'ModDate': "D:20231108123535+10'00'",
   'Author': 'Bianca Pisoni',
   'Title': 'Bendigo-payment-facilities-terms-and-conditions-4-october-2022;',
   'ContentTypeId': '0x010100E809092CA74C4245B60BDAEA41DE29DE',
   '_dlc_DocIdItemGuid': 'da16ccf0-5d9b-4890-8741-29ad99354354',
   'PADocID': '39175925v1',
   'MediaServiceImageTags': '',
   'Subject': 'personal-payment-facilities-terms-and-conditions-4-october-2022;',
   'Keywords': 'personal-payment-facilities-terms-and-conditions-4-october-2022'}},
 {'page_content': 'Contents\nBendigo Payment Facilities Terms and Conditions 3

In [1]:
# -- Retired
# Below is replaced with above. The 'loader.load_and_split...' was wrong (as loader not defined in this code)
# Testing a new process to add the ingested text to dataframe, but maintaining row counts, over compressions data with \

# --- Ingest and chunk data (now using new doc ingestion)

# Uses same loader as above, but now works for multiple docs
#from langchain.document_loaders import PDFPlumberLoader
import os

def load_pdfs_from_directory(directory_path):
    docs = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory_path, filename)
            loader = PDFPlumberLoader(file_path)
            docs.extend(loader.load())
    return docs

# Example usage
directory_path = "/home/jupyter/pdf/pdftable"
all_pdf_docs = load_pdfs_from_directory(directory_path)

# -- Below works, but, probably need to put into dataframe
# Now you can work with the list of Document objects
# for doc in all_pdf_docs:
#     print(doc.page_content)


# -- 16.04.24 trying to add splitter in here --------------------------------
#all_pdf_docs = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=75))
# -- 16.04.24 trying to add splitter in here --------------------------------

# -- Trying to get above into dataframe.
data = []

# -- Not part of this code, only here to compare --
# data = loader.load()[3]
# print("Content of page 3:", data.page_content[1200:1600])
# test = data.page_content
# -------------------------------------------------


for doc in all_pdf_docs:
    data.append({
        'page_content': doc.page_content,
        'metadata': doc.metadata
    })
df = pd.DataFrame(data)
df['len'] = df['page_content'].str.len()

df1 = df.copy()
#df1['page_content'] = df1['page_content'].str.replace('\n', '')
df2 = pd.json_normalize(df1.metadata)
df1 = pd.concat([df1, df2], axis=1)
df1['source'] = df1['source'].str.replace('/home/jupyter/pdf/', '')

df1['pagestr'] = df1['page'].astype(str)

df1['page_content_plus'] = df1['page_content'] + '[' + 'Source: ' + df1['source'] + ' page: ' + df1['pagestr'] + ']'

#df1.drop(['metadata'], axis=1, inplace=True)
# df1.drop(['metadata', 'total_pages', 'Creator', 'Producer', 'CreationDate', 'ModDate', 'Author', 'ContentTypeId', '_dlc_DocIdItemGuid',\
#            'PADocID', 'MediaServiceImageTags', 'file_path', 'source', 'Subject', 'Keywords'], axis=1, inplace=True)
#'Company', 

df1['page_content2'] = df1['page_content'].astype(object)
    
    
display(df1.head(2))

NameError: name 'loader' is not defined

In [16]:
# lets have a look at 'data' (the list) before it goes in to dataframe.
#data

#df1_test = df1[(df1['source']=='bendigo-payment-facilities-terms-conditions.pdf') & (df1['page']==3)]
#df1_test

In [17]:
#pd.set_option('display.max_colwidth', None)
#testrows = df1_test.head(2)
#testrows = df1_test.loc[[2]]
#testrows = df1.loc[[3]]
#testrows = testrows[['page_content']]
#display(testrows)

In [18]:
#testrows = df1_test.iloc[1:2, 0:1]
#testrows

In [None]:
# -- Back to 1st end to end path on this approach

In [None]:
## -- Validates new approach and data in different format to other loader. (only good for single doc)
os.chdir('/home/jupyter/pdf/')
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("bendigo-payment-facilities-terms-conditions.pdf")
data = loader.load()[3]
print("Content of page 3:", data.page_content[1200:1600])
test = data.page_content
#print(test)
test

In [None]:
df_ = pd.DataFrame([test])
df_

#### Back to path

In [62]:
# --- Ingest and chunk data (now using new doc ingestion)

# Uses same loader as above, but now works for multiple docs
#from langchain.document_loaders import PDFPlumberLoader
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pdfplumber
from langchain.document_loaders import PDFPlumberLoader

def load_pdfs_from_directory(directory_path):
    docs = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory_path, filename)
            loader = PDFPlumberLoader(file_path)
            loaded_docs = loader.load()
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
            for doc in loaded_docs:
                texts = text_splitter.split_text(doc.page_content)
                for text in texts:
                    docs.append({
                        'page_content': text,
                        'metadata': doc.metadata
                    })
    return docs

directory_path = "/home/jupyter/pdf"
all_pdf_docs = load_pdfs_from_directory(directory_path)

data = all_pdf_docs
df = pd.DataFrame(data)


df['len'] = df['page_content'].str.len()

df1 = df.copy()

df1['page_content_data'] = df1['page_content']
df1['page_content'] = df1['page_content'].str.replace('\n', '')

df2 = pd.json_normalize(df1.metadata)
df1 = pd.concat([df1, df2], axis=1)
df1['source'] = df1['source'].str.replace('/home/jupyter/pdf/', '')

df1['pagestr'] = df1['page'].astype(str)

df1['page_content_plus'] = df1['page_content'] + '[' + 'Source: ' + df1['source'] + ' page: ' + df1['pagestr'] + ']'

#df1.drop(['metadata'], axis=1, inplace=True)
# df1.drop(['metadata', 'total_pages', 'Creator', 'Producer', 'CreationDate', 'ModDate', 'Author', 'ContentTypeId', '_dlc_DocIdItemGuid',\
#            'PADocID', 'MediaServiceImageTags', 'Company', 'file_path', 'source', 'Subject', 'Keywords'], axis=1, inplace=True)
 
display(df1.head(2))

Unnamed: 0,page_content,metadata,len,page_content_data,source,file_path,page,total_pages,Creator,Producer,...,Title,ContentTypeId,_dlc_DocIdItemGuid,PADocID,MediaServiceImageTags,Subject,Keywords,Company,pagestr,page_content_plus
0,Bendigo Payment FacilitiesTerms...,{'source': '/home/jupyter/pdf/b...,63,Bendigo Payment Facilities\nTer...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,0,50,Power PDF Create,Power PDF Create,...,Bendigo-payment-facilities-term...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,0,Bendigo Payment FacilitiesTerms...
1,ContentsBendigo Payment Facilit...,{'source': '/home/jupyter/pdf/b...,493,Contents\nBendigo Payment Facil...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,1,50,Power PDF Create,Power PDF Create,...,Bendigo-payment-facilities-term...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,1,ContentsBendigo Payment Facilit...


In [63]:
# --- Embedd and store (once RAG working, replace storage from df to cloudSQL)
pd.set_option('display.max_colwidth', 35)
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the Sentence Transformer model
model = SentenceTransformer("paraphrase-mpnet-base-v2")

# Embed the chunked text
embeddings = model.encode(df1['page_content'])

# Convert embedding vectors into one-dimensional arrays
one_dimensional_embeddings = []
for embedding in embeddings:
    one_dimensional_embedding = np.ravel(embedding)
    one_dimensional_embeddings.append(one_dimensional_embedding)

# Combine the original text and embeddings into a DataFrame
data = {
    "original_text": df1['page_content'],
    "embeddings": one_dimensional_embeddings
}

dfe = pd.DataFrame(data)
df3 = pd.concat([df1, dfe], axis=1)
df3.drop(['original_text'], axis=1, inplace=True)
display(df3.head(3))

Unnamed: 0,page_content,metadata,len,page_content_data,source,file_path,page,total_pages,Creator,Producer,...,ContentTypeId,_dlc_DocIdItemGuid,PADocID,MediaServiceImageTags,Subject,Keywords,Company,pagestr,page_content_plus,embeddings
0,Bendigo Payment FacilitiesTerms...,{'source': '/home/jupyter/pdf/b...,63,Bendigo Payment Facilities\nTer...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,0,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,0,Bendigo Payment FacilitiesTerms...,"[-0.1684852, 0.1925771, 0.03708..."
1,ContentsBendigo Payment Facilit...,{'source': '/home/jupyter/pdf/b...,493,Contents\nBendigo Payment Facil...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,1,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,1,ContentsBendigo Payment Facilit...,"[-0.14589557, 0.039279297, 0.00..."
2,14. Records of transactions 351...,{'source': '/home/jupyter/pdf/b...,480,14. Records of transactions 35\...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,1,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,1,14. Records of transactions 351...,"[0.024490027, 0.12706426, 0.013..."


#### Embed question

In [129]:
# --- Embed question (once all running, convert this and most other code to functions (or a class))
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd

# Initialize the Sentence Transformer model
model = SentenceTransformer("paraphrase-mpnet-base-v2")

# -- CBA questions --
#question = "what are your distribution channels?"
#question = "does this document replace terms and conditions?"
#question = "can you list the elibibility criteria for the product?"
#question = "does the document talk about 'review triggers'?"
#question = "how are your brokers paid?"

# -- Ben questions --
#question = "can you list your Payment Facilities?" # Perfect answer
#question = "what is the eligibility for the Easy Money Card?" # Failed to retrieve answer
#question = "what is the eligibility for the youth debit Card?" # Failed to retrieve answer
#question = "what is the eligibility for the debit mastercard?" # Failed to retrieve answer
#question = "can i use my debit card at a non bendigo atm?"
#question = "what happens if i used my debit card after its expired?"
#question = "What is the maximum I can withdraw from an atm?"
#question = "why did my youth debit master card not let me buy cigarettes?"
#question = "can i add my debit card to multiple digital wallets?"
#question = "what do i do if someone has added my debit card to their digital wallet?"
#question = "Can i used my card for international transactions?"
#question = "What is the maximum pay anyone I can set over the phone?"
question = "How do you process pay anyone payments?"

#question = "what is the eligibility for the Bendigo Easy Money Card?" # Retrieves terrible results
#question = "can the Easy Money Card be used overseas?"


# Embed the question
embeddings = model.encode([question])

# Flatten the embedding list into a one-dimensional array
#ne_dimensional_embeddings = np.concatenate(embeddings).ravel()
one_dimensional_embeddings = np.ravel(embeddings)

# Convert array to a single-row dataframe
df_question = pd.DataFrame({'embeddings': [one_dimensional_embeddings]})

df_question['question'] = question
df_question = df_question[['question','embeddings']]
display(df_question)

Unnamed: 0,question,embeddings
0,How do you process pay anyone p...,"[0.024945913, 0.101148486, 0.00..."


In [130]:
# ---- Semantic search
import pandas as pd
from sentence_transformers import SentenceTransformer

# Load your pre-trained sentence transformer model
#model = SentenceTransformer('all-mpnet-base-v2')  # Replace with your desired model

# Access your dataframe with embedded text column
#df = pd.read_csv('your_dataframe.csv')  # Replace with your data source

# Question to embed
#question = "How to build a sentence embedding model?"

# Embed the question
question_embedding = model.encode(question, convert_to_tensor=True)

# Calculate cosine similarities using dot product
#similarities = df3['embeddings'].apply(lambda x: torch.dot(question_embedding, x))
similarities = df3['embeddings'].apply(lambda x: np.dot(question_embedding, x))


# Identify the 3 most similar rows
most_similar_indices = similarities.nlargest(8).index
most_similar_rows = df3.iloc[most_similar_indices]

display(most_similar_rows)

Unnamed: 0,page_content,metadata,len,page_content_data,source,file_path,page,total_pages,Creator,Producer,...,ContentTypeId,_dlc_DocIdItemGuid,PADocID,MediaServiceImageTags,Subject,Keywords,Company,pagestr,page_content_plus,embeddings
80,payment at the time it is to be...,{'source': '/home/jupyter/pdf/b...,440,payment at the time it is to be...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,11,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,11,payment at the time it is to be...,"[0.07403093, 0.10757666, 0.0167..."
94,a Pay Anyone payment has not be...,{'source': '/home/jupyter/pdf/b...,495,a Pay Anyone payment has not be...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,13,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,13,a Pay Anyone payment has not be...,"[0.1179436, 0.09825538, 0.05006..."
79,"to be made and, if relevant, de...",{'source': '/home/jupyter/pdf/b...,416,"to be made and, if relevant, de...",bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,11,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,11,"to be made and, if relevant, de...","[-0.07513793, 0.03444433, -0.06..."
93,receive the returned funds; or•...,{'source': '/home/jupyter/pdf/b...,473,receive the returned funds; or\...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,13,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,13,receive the returned funds; or•...,"[0.08317044, 0.08395549, 0.0072..."
85,Once you have requested a Pay A...,{'source': '/home/jupyter/pdf/b...,474,Once you have requested a Pay A...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,12,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,12,Once you have requested a Pay A...,"[0.12634808, 0.046889484, -0.00..."
84,When you instruct us to make a ...,{'source': '/home/jupyter/pdf/b...,485,When you instruct us to make a ...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,12,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,12,When you instruct us to make a ...,"[0.110002905, 0.0126431445, 0.0..."
86,We may choose which option to u...,{'source': '/home/jupyter/pdf/b...,443,We may choose which option to u...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,12,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,12,We may choose which option to u...,"[0.090245575, 0.06374071, -0.00..."
88,we have processed the payment w...,{'source': '/home/jupyter/pdf/b...,431,we have processed the payment w...,bendigo-payment-facilities-term...,/home/jupyter/pdf/bendigo-payme...,12,50,Power PDF Create,Power PDF Create,...,0x010100E809092CA74C4245B60BDAE...,da16ccf0-5d9b-4890-8741-29ad993...,39175925v1,,personal-payment-facilities-ter...,personal-payment-facilities-ter...,,12,we have processed the payment w...,"[0.047835246, 0.14793652, -0.01..."


In [131]:
#cont = most_similar_rows['page_content_plus'].tolist()
#cont = most_similar_rows['page_content'].tolist()
cont = most_similar_rows['page_content_data'].tolist()
cont

['payment at the time it is to be processed.\nWe are not required to process a Pay Anyone payment if you do not give us all of the information we\nrequire or if any of the information you give us is inaccurate and we are not liable for any loss you\nsuffer as a result of you giving us incomplete or inaccurate instructions.\n7.3 Payment descriptions\nWhere we allow you to provide a payment description or reference with a Pay Anyone payment, you',
 'a Pay Anyone payment has not been processed in accordance with your instructions;\n• you become aware that a Pay Anyone payment made to a PayID from your Account was directed\nto an incorrect recipient; or\n• you were fraudulently induced to make a Pay Anyone payment.\nWhere we consider it appropriate and we are reasonably able to do so, we may request that the\nfinancial institution to whom the funds were transferred returns the funds to us, on your behalf.\nHowever, this is not always possible.',
 'to be made and, if relevant, details about

In [133]:
cont_plus = most_similar_rows['page_content_plus'].tolist()
cont_plus

['payment at the time it is to be processed.We are not required to process a Pay Anyone payment if you do not give us all of the information werequire or if any of the information you give us is inaccurate and we are not liable for any loss yousuffer as a result of you giving us incomplete or inaccurate instructions.7.3 Payment descriptionsWhere we allow you to provide a payment description or reference with a Pay Anyone payment, you[Source: bendigo-payment-facilities-terms-conditions.pdf page: 11]',
 'a Pay Anyone payment has not been processed in accordance with your instructions;• you become aware that a Pay Anyone payment made to a PayID from your Account was directedto an incorrect recipient; or• you were fraudulently induced to make a Pay Anyone payment.Where we consider it appropriate and we are reasonably able to do so, we may request that thefinancial institution to whom the funds were transferred returns the funds to us, on your behalf.However, this is not always possible.[So

In [103]:
#new_text = "'" + ', '.join([item.strip("[]()") for item in cont]) + "'"
#new_text

In [None]:
## -- Testing raw ingested data, vs same data moved to dataframe
os.chdir('/home/jupyter/pdf/')
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("bendigo-payment-facilities-terms-conditions.pdf")
data = loader.load()[3]
print("Content of page 3:", data.page_content[1200:1600])
test = data.page_content
#print(test)
test

In [None]:
test

In [None]:
test4 = 'imposes any liability or responsibility on you unless permitted by the ePayments Code.\n3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions'

- When answers are correct 'Eligibility' appears as per below
Debit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or...

- What i am seeing most often (when it fails) is it looks like this... (with a new line)
Mastercard Card Mastercard\nEligibility\n16 years of age or older 12 years

In [None]:
testa = 'Fees and charges that apply to your Payment Facilities are set out in our Schedule of Fees, Charges\nand Transaction Account Rebates. We may debit applicable fees and charges to your Account\nwhenever they become payable.\nCopies of each of the documents described above are available on request and can be accessed on\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.\n2.2 ePayments Code\nWe will comply with the terms of the ePayments Code. Nothing in these Terms and Conditions\nimposes any liability or responsibility on you unless permitted by the ePayments Code.\n3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions\ntypes of merchants (see\n‘Youth Debit Mastercard’\nbelow).\nDebit Cards are not available on all Accounts. See the terms and conditions for your Account to\ndetermine whether a Debit Card may be issued for your Account and, if so, which types of Debit\nCards are available.\nBendigo Payment Facilities Terms and Conditions 9 November 2023 4 of 50\n'

In [38]:
testb ='\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.\n2.2 ePayments Code\nWe will comply with the terms of the ePayments Code. Nothing in these Terms and Conditions\nimposes any liability or responsibility on you unless permitted by the ePayments Code.\n3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions'

In [34]:
testc ='\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.\n2.2 ePayments Code\nWe will comply with the terms of the ePayments Code. Nothing in these Terms and Conditions\nimposes any liability or responsibility on you unless permitted by the ePayments Code.\n3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions'

In [43]:
testd = '3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions\ntypes of merchants (see\n‘Youth Debit Mastercard’\nbelow).\nDebit Cards are not available on all Accounts. See the terms and conditions for your Account to\ndetermine whether a Debit Card may be issued for your Account and, if so, which types of Debit\nCards are available.'

In [81]:
ootb = 'The types of Debit Cards we offer are:\nDebit Card type Bendigo Debit \nMastercard Bendigo Easy Money \nCard Bendigo Youth Debit \nMastercard Eligibility Access 16 years of age or older 12 years of age or older 12-15 years of age'

In [132]:
#Fees and charges that apply to your Payment Facilities are set out in our Schedule of Fees, Charges\nand Transaction Account Rebates. We may debit applicable fees and charges to your Account\nwhenever they become payable.\nCopies of each of the documents described above are available on request and can be accessed on\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.
# ---- Retrival and response
# -- Testing the new doc ingestion approach v the current... 'cont' = current --
# the below is an extract of
#context = 'Fees and charges that apply to your Payment Facilities are set out in our Schedule of Fees, Charges\nand Transaction Account Rebates. We may debit applicable fees and charges to your Account\nwhenever they become payable.\nCopies of each of the documents described above are available on request and can be accessed on\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.\n2.2 ePayments Code\nWe will comply with the terms of the ePayments Code. Nothing in these Terms and Conditions\nimposes any liability or responsibility on you unless permitted by the ePayments Code.\n3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions\ntypes of merchants (see\n‘Youth Debit Mastercard’\nbelow).\nDebit Cards are not available on all Accounts. See the terms and conditions for your Account to\ndetermine whether a Debit Card may be issued for your Account and, if so, which types of Debit\nCards are available.\nBendigo Payment Facilities Terms and Conditions 9 November 2023 4 of 50\n'
#context = 'Not all merchants or ATMs may accept your type of Debit Card and the functionality available in\nrespect of ATMs may differ depending on the provider of the ATM and the actual ATM used.\nBank@Post(cid:222) services are subject to any requirements, conditions or restrictions imposed by Australia\nPost and are not available at all post offices (see the Australia Post website for details about which\npost offices provide Bank@Post(cid:222) services).\nWhen using your Debit Card or Debit Card details, a PIN or your signature may be required. In some\ncases a PIN or signature may not be required, such as contactless transactions below a certain\namount. These amounts may differ based on the nature of your Transaction or the merchant.\nWhen you use your Debit Card or Debit Card details you authorise us to debit your relevant Account\nwith the amount of the Transaction. If using a Debit Card, selecting SAV/CHQ/CR or using\ncontactless payments will not change the relevant Account from which a payment is debited.\nIf you setup a recurring payment with a merchant using your Debit Card details (such as a direct\ndebit), you may need to give the merchant your updated Debit Card details if we issue you with a\nreplacement Debit Card. In some cases we may be able to automatically provide your new Debit\nCard details to a merchant with whom you have a recurring payment set up through the applicable\nCard Scheme, where arrangements to facilitate this are in place between us, the Card Scheme and\nthe merchant. You agree we may do this where we can but are not obliged to do so and it remains\nyour responsibility to ensure merchants have your correct and current Debit Card details at all times. If\nyou want to cancel a recurring payment you should give the merchant as much notice as possible\nbefore the next recurring payment is due.\n3.3 Debit Card Transaction limits\nThe maximum amount you may withdraw in cash at an ATM or EFTPOS terminal in a day is $1,000.00\n(AUD) per Debit Card.\n3.4 Youth Debit Mastercard\nThe Youth Debit Mastercard restricts certain types of Transactions based on the merchant category\ncode of the merchant.\nWe may decline any Transaction on a Youth Debit Mastercard where the merchant category code\nfor the relevant merchant relates to gambling, cigarettes, alcohol (including bars and nightclubs),\ndating services, direct marketing by telemarketing or internet merchants (such as information services,\npolls, sweepstakes, horoscope readings, adult chat and entertainment, sports scores, stock market\nquotes, horoscope readings and other audio text or videotext services), massage parlours, and online\nor wholesale pharmacy purchases.\nWe may make changes to the types of merchant category codes we do not permit Transactions on\nfor Youth Debit Mastercard at any time.\n3.5 Digital Wallets\nIf you have a compatible device, you may use your Debit Card by adding it to a Digital Wallet that we\nsupport for your type of Debit Card. The Digital Wallets that we support are listed on our website.\nBefore you add a Debit Card to a Digital Wallet you will need to meet our identification and\nverification requirements.\nIn addition to these Terms and Conditions, use of your Debit Card through a Digital Wallet is subject\nto our Digital Wallet Terms of Use, which are provided when a Debit Card is added to a Digital Wallet\nthat we support and are available upon request.\nYour liability for Transactions conducted using a Debit Card through a Digital Wallet is determined in\nthe same way as if the Transactions were conducted using the Debit Card itself.\nBendigo Payment Facilities Terms and Conditions 9 November 2023 6 of 50\n, Fees and charges that apply to your Payment Facilities are set out in our Schedule of Fees, Charges\nand Transaction Account Rebates. We may debit applicable fees and charges to your Account\nwhenever they become payable.\nCopies of each of the documents described above are available on request and can be accessed on\nour website at www.bendigobank.com.au/disclosure-documents or by contacting us using the details\nat the end of this document.\nImportant We may make changes to these Terms and Conditions from time to time without your\nconsent. See ‘Changes we may make without your consent’ below for more information.\n2. Codes of Practice\n2.1 Banking Code of Practice\nThe relevant provisions of the Banking Code of Practice apply to the Payment Facilities to which\nthese Terms and Conditions apply. A copy of the Banking Code of Practice is available on our\nwebsite.\n2.2 ePayments Code\nWe will comply with the terms of the ePayments Code. Nothing in these Terms and Conditions\nimposes any liability or responsibility on you unless permitted by the ePayments Code.\n3. Debit Cards\nThis ‘Debit Cards’ section applies in relation to any Debit Cards we issue to you or at your request or\nallow you to use in connection with your Account.\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions\ntypes of merchants (see\n‘Youth Debit Mastercard’\nbelow).\nDebit Cards are not available on all Accounts. See the terms and conditions for your Account to\ndetermine whether a Debit Card may be issued for your Account and, if so, which types of Debit\nCards are available.\nBendigo Payment Facilities Terms and Conditions 9 November 2023 4 of 50\n'
#context = '3. Debit Cards \nThis ‘ Debit Cards ’ section applies in relation to any Debit Cards we issue to you or at your request or \nallow you to use in connection with your Account. \nThe types of Debit Cards we offer are: \nDebit Card type Bendigo Debit \nMastercard Bendigo Easy Money \nCard Bendigo Youth Debit \nMastercard \nEligibility  16 years of age or older  12 years of age or older 12-15 years of age  \nAccess  Anywhere Mastercard is \naccepted, including ATM \nwithdrawals.  Anywhere EFTPOS is \naccepted, including ATM \nwithdrawals. Anywhere Mastercard is \naccepted, including ATM \nwithdrawals.  \nOverseas access  Overseas ATM access \navailable at any ATM \nbearing the Mastercard, \nCIRRUS, or MAESTRO \nsymbol.  No overseas access \navailable. Overseas ATM access \navailable at any ATM \nbearing the Mastercard, \nCIRRUS, or MAESTRO \nsymbol.  \nGeneral / other \ncriteria, features or \nrestrictions    Restrictions apply to \nTransactions at certain \ntypes of merchants (see \n‘Youth Debit Mastercard’ \nbelow)., 3.3 Debit Card Transaction limits \nThe maximum amount you may withdraw in cash at an ATM or EFTPOS terminal in a day is $1,000.00 \n(AUD) per Debit Card. \n3.4 Youth Debit Mastercard \nThe Youth Debit Mastercard restricts certain types of Transactions based on the merchant category \ncode of the merchant.  \nWe may decline any Transaction on a Youth Debit Mastercard where the merchant category code \nfor the relevant merchant relates to gambling, cigarettes, alcohol (including bars and nightclubs), \ndating services, direct marketing by telemarketing or internet merchants (such as information services, \npolls, sweepstakes, horoscope readings, adult chat and entertainment, sports scores, stock market \nquotes, horoscope readings and other audio text or videotext services), massage parlours, and online \nor wholesale pharmacy purchases.  \nWe may make changes to the types of merchant category codes we do not permit Transactions on \nfor Youth Debit Mastercard at any time. \n3.5 Digital Wallets'
#cont = '\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age\nAccess Anywhere Mastercard is Anywhere EFTPOS is Anywhere Mastercard is\naccepted, including ATM accepted, including ATM accepted, including ATM\nwithdrawals. withdrawals. withdrawals.\nOverseas access Overseas ATM access No overseas access Overseas ATM access\navailable at any ATM available. available at any ATM\nbearing the Mastercard, bearing the Mastercard,\nCIRRUS, or MAESTRO CIRRUS, or MAESTRO\nsymbol. symbol.\nGeneral / other Restrictions apply to\ncriteria, features or\nTransactions at certain\nrestrictions'
#test = '\nThe types of Debit Cards we offer are:\nDebit Card type Bendigo Debit Bendigo Easy Money Bendigo Youth Debit\nMastercard Card Mastercard\nEligibility 16 years of age or older 12 years of age or older 12-15 years of age'


context = cont
#context = page_content
#context = test
#context = test1
#context = test2
#context = test3
#context = test4
#context = testa
#context = testb
#context = testc
#context = testd
#context = df_
#context = new_text
#context = testrows
#context = ootb
# ------------------------

# prompt source: https://www.kaggle.com/code/gpreda/exploring-eu-ai-act-with-gemma/
prompt = f"""
You are an AI Agent specialized to answer to questions about the context provided.
In order to create the answer, please only use the information from the
context provided (Context). Do not include other information.
Answer with simple words.
If needed, include also explanations.
Question: {question}
Context: {context}
Answer:
"""

instances = [
    {
        "prompt": prompt,
        "max_tokens": 100, #100
        "temperature": 0.25, #0.15
        "top_p": 0.025, # 1.0
        "top_k": 40, #10
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)
prediction = response.predictions[0]
print(prediction)

Sure, here is the answer to the question:

To process payments through Pay Anyone, you must provide complete and accurate information, ensure there are sufficient funds in your nominated Account, and authorize us to act on your instructions. Once you have requested a Pay Anyone payment, the timing of when the funds will be received and made available to the recipient depends on the payment system or channel it is processed through. We process Pay Anyone payments as Osko Payments, Fast Payments, or Direct Entry Payments.


In [134]:
# Copy of above, experimenting with prompt
# prompt source: https://www.kaggle.com/code/gpreda/exploring-eu-ai-act-with-gemma/
# Changes made to prommpt;
# added: If you can not locate the answer, then say "i am unsure"
# added: please provide 

prompt = f"""
You are an AI Agent specialized to answer to questions about the context provided.
In order to create the answer, please only use the information from the
context provided (Context). Do not include other information.
Answer with simple words.
If needed, include also explanations.
If you can not locate the answer, then say "i am unsure"
Question: {question}
Context: {context}
Answer:
"""

instances = [
    {
        "prompt": prompt,
        "max_tokens": 100, #100
        "temperature": 0.25, #0.15
        "top_p": 0.025, # 1.0
        "top_k": 40, #10
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)
prediction = response.predictions[0]
print(prediction)

The text does not describe how to process payments to others, therefore I am unable to answer this question.


In [None]:
## -- Below is archiving or parked.

In [None]:
# (looking at TFIDF for improving RAG, but would need stop words removed. Parking for now)
from collections import Counter
context_str = ''.join(context)

def extract_keywords(text, n=5):
    # Preprocess the text (optional): lowercase, remove punctuation, etc.
    # ... (preprocessing steps)

    # Split the text into words
    words = text.split()

    # Count word frequencies
    word_counts = Counter(words)

    # Extract the top n most frequent words
    top_keywords = word_counts.most_common(n)

    # Return the list of keywords (modify if needed)
    return [keyword[0] for keyword in top_keywords]
a = extract_keywords(context_str)
print(a)

### V3: WIP (format code into functions for vertex)
Not focusing on improving code or testing, just trying to put into func for simulating end-to-end in vertex

# Code can be grouped as follows.
--- Only needs to be run once at set up
- Ingest and chunk data
- Embedd and store

--- Needs to run at inference 
- Embed question
- Semantic search
- Model inference and response

In [None]:
# --- Install lib
#!pip3 install --upgrade google-cloud-aiplatform
#!pip3 install ipython pandas[output_formatting] google-cloud-language==2.10.0
#!pip install langchain
#!pip install PyPDF2
#!pip install pypdf
#!pip install sentence_transformers
#!pip install torch
#!pip install pandas
#!pip install pdfplumber

In [None]:
# --- Ingest and chunk data (now using new doc ingestion)

# Uses same loader as above, but now works for multiple docs
#from langchain.document_loaders import PDFPlumberLoader
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pdfplumber
from langchain.document_loaders import PDFPlumberLoader

def load_pdfs_from_directory(directory_path):
    docs = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory_path, filename)
            loader = PDFPlumberLoader(file_path)
            loaded_docs = loader.load()
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
            for doc in loaded_docs:
                texts = text_splitter.split_text(doc.page_content)
                for text in texts:
                    docs.append({
                        'page_content': text,
                        'metadata': doc.metadata
                    })
    return docs

directory_path = "/home/jupyter/pdf"
all_pdf_docs = load_pdfs_from_directory(directory_path)

data = all_pdf_docs
df = pd.DataFrame(data)


df['len'] = df['page_content'].str.len()

df1 = df.copy()

df1['page_content_data'] = df1['page_content']
df1['page_content'] = df1['page_content'].str.replace('\n', '')

df2 = pd.json_normalize(df1.metadata)
df1 = pd.concat([df1, df2], axis=1)
df1['source'] = df1['source'].str.replace('/home/jupyter/pdf/', '')

df1['pagestr'] = df1['page'].astype(str)

df1['page_content_plus'] = df1['page_content'] + '[' + 'Source: ' + df1['source'] + ' page: ' + df1['pagestr'] + ']'

#df1.drop(['metadata'], axis=1, inplace=True)
# df1.drop(['metadata', 'total_pages', 'Creator', 'Producer', 'CreationDate', 'ModDate', 'Author', 'ContentTypeId', '_dlc_DocIdItemGuid',\
#            'PADocID', 'MediaServiceImageTags', 'Company', 'file_path', 'source', 'Subject', 'Keywords'], axis=1, inplace=True)
 
display(df1.head(2))

In [None]:
# --- Embedd and store (once RAG working, replace storage from df to cloudSQL)
pd.set_option('display.max_colwidth', 35)
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the Sentence Transformer model
model = SentenceTransformer("paraphrase-mpnet-base-v2")

# Embed the chunked text
embeddings = model.encode(df1['page_content'])

# Convert embedding vectors into one-dimensional arrays
one_dimensional_embeddings = []
for embedding in embeddings:
    one_dimensional_embedding = np.ravel(embedding)
    one_dimensional_embeddings.append(one_dimensional_embedding)

# Combine the original text and embeddings into a DataFrame
data = {
    "original_text": df1['page_content'],
    "embeddings": one_dimensional_embeddings
}

dfe = pd.DataFrame(data)
df3 = pd.concat([df1, dfe], axis=1)
df3.drop(['original_text'], axis=1, inplace=True)
display(df3.head(3))

In [None]:
# --- Embed question (once all running, convert this and most other code to functions (or a class))
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd

# Initialize the Sentence Transformer model
model = SentenceTransformer("paraphrase-mpnet-base-v2")

#question = "can the Easy Money Card be used overseas?"

# Embed the question
embeddings = model.encode([question])

# Flatten the embedding list into a one-dimensional array
one_dimensional_embeddings = np.ravel(embeddings)

# Convert array to a single-row dataframe
df_question = pd.DataFrame({'embeddings': [one_dimensional_embeddings]})

df_question['question'] = question
df_question = df_question[['question','embeddings']]
display(df_question)

In [None]:
# ---- Semantic search
import pandas as pd
from sentence_transformers import SentenceTransformer

# Embed the question
question_embedding = model.encode(question, convert_to_tensor=True)

# Calculate cosine similarities using dot product
#similarities = df3['embeddings'].apply(lambda x: torch.dot(question_embedding, x))
similarities = df3['embeddings'].apply(lambda x: np.dot(question_embedding, x))

# Identify the 3 most similar rows
most_similar_indices = similarities.nlargest(8).index
most_similar_rows = df3.iloc[most_similar_indices]

display(most_similar_rows)

In [None]:
cont = most_similar_rows['page_content_data'].tolist()
context = cont

# ------------------------

# prompt source: https://www.kaggle.com/code/gpreda/exploring-eu-ai-act-with-gemma/
prompt = f"""
You are an AI Agent specialized to answer to questions about the context provided.
In order to create the answer, please only use the information from the
context provided (Context). Do not include other information.
Answer with simple words.
If needed, include also explanations.
Question: {question}
Context: {context}
Answer:
"""

instances = [
    {
        "prompt": prompt,
        "max_tokens": 100, #100
        "temperature": 0.25, #0.15
        "top_p": 0.025, # 1.0
        "top_k": 40, #10
        #"raw_response": False,
        "raw_response": True,
    },
]
response = endpoint_vllm.predict(instances=instances)
prediction = response.predictions[0]
print(prediction)

In [140]:
# Above cell, converted to function
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer

def answer_question(question, df3):
    # -- Embed the question
    model = SentenceTransformer("paraphrase-mpnet-base-v2")
    embeddings = model.encode([question])
    one_dimensional_embeddings = np.ravel(embeddings)
    df_question = pd.DataFrame({'embeddings': [one_dimensional_embeddings]})
    df_question['question'] = question
    df_question = df_question[['question', 'embeddings']]

    # -- Semantic search
    question_embedding = model.encode(question, convert_to_tensor=True)
    similarities = df3['embeddings'].apply(lambda x: np.dot(question_embedding, x))
    most_similar_indices = similarities.nlargest(8).index
    most_similar_rows = df3.iloc[most_similar_indices]

    cont = most_similar_rows['page_content_data'].tolist()
    context = ' '.join(cont)

    # -- Inference
    prompt = f"""
    You are an AI Agent specialized to answer to questions about the context provided.
    In order to create the answer, please only use the information from the
    context provided (Context). Do not include other information.
    Answer with simple words.
    If needed, include also explanations.
    Question: {question}
    Context: {context}
    Answer:
    """

    instances = [
        {
            "prompt": prompt,
            "max_tokens": 100,
            "temperature": 0.25,
            "top_p": 0.025,
            "top_k": 40,
            "raw_response": True,
        },
    ]
    response = endpoint_vllm.predict(instances=instances)
    prediction = response.predictions[0]
    return prediction

question = "what is the eligibility of the easy money card?"
print(answer_question(question, df3))

The eligibility of the Easy Money Card is 16 years of age or older.


In [155]:
#!pip install --upgrade google-cloud-aiplatform

In [156]:
#endpoint_name = "projects/1024619852873/locations/us-central1/endpoints/6631486679628644352"

# run this

from google.cloud import aiplatform_v1beta1 as api

# Replace with your endpoint name
#endpoint_name = "projects/1024619852873/locations/us-central1/endpoints/6631486679628644352"
endpoint_name = 'projects/1024619852873/locations/us-central1/endpoints/6631486679628644352'

# Get the endpoint object (add closing parenthesis)
endpoint = api.Endpoint.create_from_resource_name(endpoint_name)

AttributeError: type object 'Endpoint' has no attribute 'create_from_resource_name'

In [147]:
# Import libraries
from google.cloud import aiplatform_v1beta1
from google.cloud.aiplatform_v1beta1 import Endpoint

# Get the endpoint object (replace with your endpoint name)
endpoint = Endpoint.create_from_resource_name("projects/1024619852873/locations/us-central1/endpoints/6631486679628644352")

# Get the endpoint URI
endpoint_uri = endpoint.uri

# (Optional) Print the URI for reference
print(endpoint_uri)

AttributeError: type object 'Endpoint' has no attribute 'create_from_resource_name'

## Apply text moderation and chat templates

Text moderation and chat templates can be applied to model predictions generated by the vLLM endpoint as well. You may use the same code snippets as for the Hex-LLM endpoint. They are not repeated here for brevity.

## Clean up resources

In [None]:
# # Undeploy models and delete endpoints.
# endpoint_hexllm.delete(force=True)
# endpoint_vllm.delete(force=True)

# # Delete models.
# model_hexllm.delete()
# model_vllm.delete()

# # Delete Cloud Storage objects.
# delete_bucket = False
# if delete_bucket:
#     ! gsutil -m rm -r $STAGING_BUCKET