In [None]:
# Install Vertex AI LLM SDK
! pip install --user --upgrade google-cloud-aiplatform==1.44.0 langchain==0.1.12 langchain-google-vertexai==0.1.1 typing_extensions==4.9.0

# Dependencies required by Unstructured PDF loader
! sudo apt -y -qq install tesseract-ocr libtesseract-dev
! sudo apt-get -y -qq install poppler-utils
! pip install --user --upgrade unstructured==0.12.4 pdf2image==1.17.0 pytesseract==0.3.10 pdfminer.six==20221105
! pip install --user --upgrade pillow-heif==0.15.0 opencv-python==4.9.0.80 unstructured-inference==0.7.24 pikepdf==8.13.0 pypdf==4.0.1

# For Matching Engine integration dependencies (default embeddings)
! pip install --user --upgrade tensorflow_hub==0.16.1 tensorflow_text==2.15.0

Collecting google-cloud-aiplatform==1.44.0
  Downloading google_cloud_aiplatform-1.44.0-py2.py3-none-any.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain==0.1.12
  Downloading langchain-0.1.12-py3-none-any.whl (809 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.1/809.1 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-google-vertexai==0.1.1
  Downloading langchain_google_vertexai-0.1.1-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.9/48.9 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing_extensions==4.9.0
  Downloading typing_extensions-4.9.0-py3-none-any.whl (32 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain==0.1.12)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain==0.1.12)
  Downloading json

The following additional packages will be installed:
  libarchive-dev libleptonica-dev tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  libarchive-dev libleptonica-dev libtesseract-dev tesseract-ocr
  tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 6 newly installed, 0 to remove and 45 not upgraded.
Need to get 8,560 kB of archives.
After this operation, 31.6 MB of additional disk space will be used.
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 6.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package libarchive-dev:amd64.
(Reading database ... 121752 files and directories

Collecting tensorflow_text==2.15.0
  Downloading tensorflow_text-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_text
Successfully installed tensorflow_text-2.15.0


#### Phase I: Install, Set Up, and Develop Q&A Search Ecosystem

In [None]:
# Automatically restart kernel so that the system can access newly-installed packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [None]:
# This cell should ideally check session state and proceed accordingly
print("This cell might not run correctly if the kernel was just restarted. Manually execute cells sequentially after a restart.")

This cell might not run correctly if the kernel was just restarted. Manually execute cells sequentially after a restart.


In [None]:
# Authentication of Google Account
import sys

if "google.colab" in sys.modules:
  from google.colab import auth

  auth.authenticate_user()

In [None]:
# Custom Python Modules for accessing Vertex AI Matching Engine
import os
import urllib.request

if not os.path.exists("utils"):
  os.makedirs("utils")

urlprefix = "https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/language/use-cases/document-qa/utils"
files = ["__init__.py", "matching_engine.py", "matching_engine_utils.py"]

for fname in files:
  urllib.request.urlretrieve(f"{urlprefix}/{fname}", filename=f"utils/{fname}")

In [None]:
import bigframes.dataframe

In [None]:
# Import Libraries
import json
import textwrap

# Utils
import time
import uuid
from typing import List

import numpy as np
import vertexai

# Vertex AI
from google.cloud import aiplatform
print(f"Vertex AI SDK Version: {aiplatform.__version__}")

# LangChain
import langchain
print(f"LangChain version: {langchain.__version__}")

from langchain.chains import RetrievalQA
from langchain.document_loaders import GCSDirectoryLoader
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Import custom Matching Engine packages
from utils.matching_engine import MatchingEngine
from utils.matching_engine_utils import MatchingEngineUtils

Vertex AI SDK Version: 1.44.0
LangChain version: 0.1.12


In [None]:
PROJECT_ID = "amplified-time-418915" # @param {type:"string"}
REGION = "us-central1" # @param {type:"string"}

# Initialize Vertex AI SDK
vertexai.init(project=PROJECT_ID, location=REGION)

In [None]:
# Define Custom Vertex AI Embeddings

# Funtion to limit the rate for Embeddings API
def rate_limit(max_per_minute):
  period = 60 / max_per_minute
  print("Waiting")
  while True:
    before = time.time()
    yield
    after = time.time()
    elapsed = after - before
    sleep_time = max(0, period - elapsed)
    if sleep_time > 0:
      print(".",end="")
      time.sleep(sleep_time)

# Class to perform vector embeddings using Vertex AI services
# Class CustomVertexAIEmbeddings: child of class VertexAIEmbeddings
# Class VertexAIEmbeddings: LangChain's wrapper around GCP Vertex AI text embedding models API
# This class handles vector embeddings using GCP: Vertex AI services and technologies

class CustomVertexAIEmbeddings(VertexAIEmbeddings):
  requests_per_minute: int
  num_instances_per_batch: int

  # Overriding embed_documents method
  def embed_documents(self, texts: List[str]):
    limiter = rate_limit(self.requests_per_minute)
    results = []
    docs = list(texts)

    while docs:
      # Working in batches because the API accepts maximum 5 documents per request to get embeddings
      head, docs = (
          docs[: self.num_instances_per_batch],
          docs[self.num_instances_per_batch :],
      )
      chunk = self.client.get_embeddings(head)
      results.extend(chunk)
      next(limiter)

    return [r.values for r in results]

In [None]:
# Create Embeddings Instance and LLM Instance

# Text model instance integrated with LangChain
# Create GEMINI LLM using LangChain's VertexAI class API

llm = VertexAI(
    model_name="gemini-1.0-pro",
    max_output_tokens=2048,
    temperature=0.5,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

# Embeddings API integrated with langchain
# Create an instance, named "embeddings", of class CustomVertexAIEmbeddings
# This instance can handle 100 requests/queries per minute (QPM)

EMBEDDING_QPM = 100
EMBEDDING_NUM_BATCH = 5
embeddings = CustomVertexAIEmbeddings(
    requests_per_minute=EMBEDDING_QPM,
    num_instances_per_batch=EMBEDDING_NUM_BATCH,
)

  warn_deprecated(


#### Phase II: Develop and Test Q&A – Search System

In [None]:
# Matching Engine Index and Endpoint: Specify Parameters

ME_REGION = "us-central1"
ME_INDEX_NAME = f"{PROJECT_ID}-me-logimind-index" # @param {type:"string"}
ME_EMBEDDING_DIR = f"{PROJECT_ID}-me-logimind-bucket" # @param {type:"string"}
ME_DIMENSIONS = 768 # For gemini 1.0 pro, same as using Vertex PaLM Embedding

In [None]:
# Matching Engine Index and Endpoint: Create GCP Cloud Storage Buckets

! set -x && gsutil mb -p $PROJECT_ID -l us-central1 gs://$ME_EMBEDDING_DIR

+ gsutil mb -p amplified-time-418915 -l us-central1 gs://amplified-time-418915-me-logimind-bucket
Creating gs://amplified-time-418915-me-logimind-bucket/...


In [None]:
# Initialize Newly-Created Matching Engine Index Folder with Dummy Embeddings File

# dummy embeddings
init_embedding = {"id": str(uuid.uuid4()), "embedding": list(np.zeros(ME_DIMENSIONS))}

# Save dummy embeddings to a local JSON file
with open("embedding_0.json","w") as f:
  json.dump(init_embedding, f)

# Upload the dummy embedding JSON file to cloud storage buckets
! set -x && gsutil cp embedding_0.json gs://{ME_EMBEDDING_DIR}/init_index/init_embedding_0.json

+ gsutil cp embedding_0.json gs://amplified-time-418915-me-logimind-bucket/init_index/init_embedding_0.json
Copying file://embedding_0.json [Content-Type=application/json]...
/ [1 files][  3.8 KiB/  3.8 KiB]                                                
Operation completed over 1 objects/3.8 KiB.                                      


In [None]:
# Create Matching Engine
mengine = MatchingEngineUtils(PROJECT_ID, ME_REGION, ME_INDEX_NAME)

In [None]:
# Create Matching Engine Index

# Invoke the method create_index of the Matching Engine to create the index
index = mengine.create_index(
    embedding_gcs_uri=f"gs://{ME_EMBEDDING_DIR}/init_index",
    dimensions=ME_DIMENSIONS,
    index_update_method="streaming",
    index_algorithm="tree-ah",
)

if index:
  print(index.name)

.projects/429987130664/locations/us-central1/indexes/7885473094037405696


In [None]:
# Deploy ME (or Vector Search Engine - VSE) Index to the endpoint

# Create an ME (or VSE) endpoint
# Then, deploy the ME (or VSE) index to the newly created endpoint
index_endpoint = mengine.deploy_index()

if index_endpoint:
  print(f"Index endpoint resource name: {index_endpoint.name}")
  print(
      f"Index endpoint public domain name: {index_endpoint.public_endpoint_domain_name}"
  )
  print("Deployed indexes on the index endpoint: ")

  for d in index_endpoint.deployed_indexes:
    print(f"    {d.id}")

..................Index endpoint resource name: projects/429987130664/locations/us-central1/indexEndpoints/3638015695473606656
Index endpoint public domain name: 
Deployed indexes on the index endpoint: 


In [15]:
# Ingest and pre-process the PDF files

# adta5760-docs-folder-1 is the name of the GCP cloud storage bucket
# adta5760-docs-folder-1 --> subfolder: documents
# documents --> subfolder: pdfs
# pdfs: The subfolder where all the PDFs are stored
GCS_BUCKET_DOCS = f"adta5760-docs-folder-2"

folder_prefix = "documents/pdfs"

print(f"Processing documents from {GCS_BUCKET_DOCS}")

# Load all the PDFs to be processed into the system
# First, create a loader to upload the entire folder (or directory)
loader = GCSDirectoryLoader(
    project_name=PROJECT_ID, bucket=GCS_BUCKET_DOCS, prefix=folder_prefix
)

# Then, load all PDFs into the knowledge base metadata named "documents"
documents = loader.load()

# Add document name and source to the metadata
for document in documents:
  doc_md = document.metadata
  document_name = doc_md["source"].split("/")[-1]

  # derive doc source from Document loader
  doc_source_prefix = "/".join(GCS_BUCKET_DOCS.split("/")[:3])
  doc_source_suffix = "/".join(doc_md["source"].split("/")[4:-1])
  source = f"{doc_source_prefix}/{doc_source_suffix}"
  document.metadata = {"source": source, "document_name": document_name}

print(f"# of documents loaded (pre-chunking) = {len(documents)}")

Processing documents from adta5760-docs-folder-2


  warn_deprecated(
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


# of documents loaded (pre-chunking) = 30


In [16]:
# Verify the metadata of the first PDF in the knowledge base

documents[0].metadata

{'source': 'adta5760-docs-folder-2/pdfs',
 'document_name': 'An Investigation of Visibility and Flexibility as complements to supply chain analytics.pdf'}

In [17]:
# Split the documents into chunks
# Using LangChain's Document Transformer function RecursiveCharacterTextSplitter()
# RecursiveCharacterTextSplitter: Recursively Split by Characters

# Create a Langchain's document transformer to split text documents into smaller chunks
# Using the function RecursiveCharacterTextSplitter()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 50,
    separators = ["\n\n", "\n", ".", "!", "?", ",", " ", ""],
)

# Split documents using the text splitter
doc_splits = text_splitter.split_documents(documents)

# Add chunk number to a document's metadata
for idx, split in enumerate(doc_splits):
  split.metadata["chunk"] = idx

print(f"# of document splits = {len(doc_splits)}")

# of document splits = 3190


In [18]:
# Verify the split data realted to the first document

doc_splits[0].metadata

{'source': 'adta5760-docs-folder-2/pdfs',
 'document_name': 'An Investigation of Visibility and Flexibility as complements to supply chain analytics.pdf',
 'chunk': 0}

In [19]:
# Get Matching Engine (or Vector Search Engine) Index ID and Endpoint ID

ME_INDEX_ID, ME_INDEX_ENDPOINT_ID = mengine.get_index_and_endpoint()

print(f"ME_INDEX_ID={ME_INDEX_ID}")
print(f"ME_INDEX_ENDPOINT_ID={ME_INDEX_ENDPOINT_ID}")

ME_INDEX_ID=projects/429987130664/locations/us-central1/indexes/7885473094037405696
ME_INDEX_ENDPOINT_ID=projects/429987130664/locations/us-central1/indexEndpoints/3638015695473606656


In [20]:
# Store docs as embeddings in Mactching Engine Index

# First, get contents of each document chunk
texts = [doc.page_content for doc in doc_splits]

# Next, create metadata for each document chunk
metadatas = [
    [
        {"namespace": "source", "allow_list": [doc.metadata["source"]]},
        {"namespace": "document_source", "allow_list": [doc.metadata["document_name"]]},
        {"namespace": "chunk", "allow_list": [str(doc.metadata["chunk"])]},
    ]
    for doc in doc_splits
]

In [21]:
# Configure Matching Engine (or Vector Search Engine) as GCP Vector Store(or Vector Database)

# initialize vector store
me = MatchingEngine.from_components(
    project_id=PROJECT_ID,
    region=ME_REGION,
    gcs_bucket_name=f"gs://{ME_EMBEDDING_DIR}".split("/")[2],
    embedding=embeddings,
    index_id=ME_INDEX_ID,
    endpoint_id=ME_INDEX_ENDPOINT_ID,
)

In [22]:
# Store docs as vector embeddings in Matching Engine (or Vector Search Engine) index
# It may take a while since API is rate limited
# At least 30 minutes or longer

doc_ids = me.add_texts(texts=texts, metadatas=metadatas)

Waiting
.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [23]:
# Verify if semantic search with Matching Engine (or Vector Search Engine) is working.
# Test 1: k = 2 --> A parameter for ANN (Approximate Nearest Neighbor) vector search
# k: similar to K in K-Nearest Neighbor Algorithm

me.similarity_search("In E-commerce, what are the two types of business models?", k=2)

Waiting


[Document(page_content='In the E-commerce LSCM, there are two major types of business models: business to consumer (B2C) and business to business (B2B) (Bolumole et al., 2015). In B2C model, business website is a place where all the transactions take place between a business organization and consumer directly (Mangiaracina et al., 2015). In this model, a consumer visits the website and places an order to buy the products. The business organization, after receiving the orders, will dispatch the goods to the customer. Successful examples like Amazon.com and Priceline.com are B2C leaders (Rappa, 2008; Ta et al., 2015). Key features of this model are heavy advertising required to attract large customers, high investment of hardware and software, and good customer care service (Nica, 2015). B2B refers to a situation where one business makes a commercial transaction with another, thus, the transaction volume of B2B is much higher than the volume of B2C', metadata={'source': 'adta5760-docs-fo

In [24]:
# Verify if semantic search with Matching Engine (or Vector Search Engine) is working.
# Test 2: k = 2 --> A parameter for ANN (Approximate Nearest Neighbor) vector search
# k: similar to K in K-Nearest Neighbor Algorithm
# search_distance: the concept is similar to the distance in K-Nearest Neighbor Algorithm

me.similarity_search("Give me B2C leaders?", k=2, search_distance=0.4)

Waiting


[Document(page_content='[47]. Ta, H., T. Esper and A. R. Hofer (2015). "Business(cid:1)to(cid:1)Consumer (B2C) Collaboration: Rethinking the Role of Consumers in Supply Chain Management." Journal of Business Logistics 36(1): 133-134. [48]. Tan, K. H., Y. Zhan, G. Ji, F. Ye and C. Chang (2015). "Harvesting big data to enhance supply chain innovation capabilities: An analytic infrastructure based on deduction graph." International Journal of Production Economics 165: 223-233.\n\n[31]. Li, J. and J. Ding (2014). "Research of self-support logistics network synergy route and structure evolution--take SUNING and ZJS for examples." Journal of Beijing Jiaotong University(Social Sciences Edition)(03): 46-53.\n\n[49]. Timothy Thacher, B. W., Brian Stuorius (2007). Strategic Report For\n\nLowe\'s Companies,Inc.\n\n[50]. Trebilcock, B. (2011). IKEA: Think global, act local for warehouse\n\ndistribution. Modern Materials Handling. August.\n\n[32]. Liu, J. and Y. R. Hou (2011). "Time based strategy 

#### Phase III: Formatting the Retrievel Q&A using LLM

In [25]:
# Create chain to answer questions
NUMBER_OF_RESULTS = 3
SEARCH_DISTANCE_THRESHOLD = 0.6

In [26]:
# Expose index to the retriever
retriever = me.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": NUMBER_OF_RESULTS,
        "search_distance": SEARCH_DISTANCE_THRESHOLD,
    },
    filters=None,
)

In [27]:
template = """SYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: {question}

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

=============
{context}
=============

Question: {question}
Helpful Answer:"""

In [28]:
# Uses LLM to synthesize results from the search index.
# Use Vertex Gemini Text API for LLM

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    verbose=True,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        ),
    },
)

In [29]:
# Enable for troubleshooting
qa.combine_documents_chain.verbose = True
qa.combine_documents_chain.llm_chain.verbose = True
qa.combine_documents_chain.llm_chain.llm.verbose = True

In [30]:
def formatter(result):
    print(f"Query: {result['query']}")
    print("." * 80)
    if "source_documents" in result.keys():
        for idx, ref in enumerate(result["source_documents"]):
            print("-" * 80)
            print(f"REFERENCE #{idx}")
            print("-" * 80)
            if "score" in ref.metadata:
                print(f"Matching Score: {ref.metadata['score']}")
            if "source" in ref.metadata:
                print(f"Document Source: {ref.metadata['source']}")
            if "document_name" in ref.metadata:
                print(f"Document Name: {ref.metadata['document_name']}")
            print("." * 80)
            print(f"Content: \n{wrap(ref.page_content)}")
    print("." * 80)
    print(f"Response: {wrap(result['result'])}")
    print("." * 80)


def wrap(s):
    return "\n".join(textwrap.wrap(s, width=120, break_long_words=False))


def ask(
    query,
    qa=qa,
    k=NUMBER_OF_RESULTS,
    search_distance=SEARCH_DISTANCE_THRESHOLD,
    filters={},
):
    qa.retriever.search_kwargs["search_distance"] = search_distance
    qa.retriever.search_kwargs["k"] = k
    qa.retriever.search_kwargs["filters"] = filters
    result = qa({"query": query})
    return formatter(result)

##### Test Case I

In [31]:
ask("In E-commerce, what are the two types of business models?")

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: In E-commerce, what are the two types of business models?

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

In the E-commerce LSCM, there are two major types of business models: business to consumer (B2C) and business to business (B2B) (Bolumole et al., 2015). In B2C model, business website is a place where all the transact

##### Test Case II

In [32]:
ask("Give me B2C leaders?")



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: Give me B2C leaders?

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

[47]. Ta, H., T. Esper and A. R. Hofer (2015). "Business(cid:1)to(cid:1)Consumer (B2C) Collaboration: Rethinking the Role of Consumers in Supply Chain Management." Journal of Business Logistics 36(1): 133-134. [48]. Tan, K. H., Y. Zhan, G. Ji, F. Ye and 

##### Test Case III

In [35]:
ask("What are primary activities and support activities?")



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: What are primary activities and support activities?

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

Porter distinguishes between primary activities and support activities. Primary activities are directly concerned with the creation or delivery of a product or service. They can be grouped into five main areas: inbound logi

##### Test Case IV

In [36]:
ask("Since when has supply chain management received attention?")



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: Since when has supply chain management received attention?

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

Supply chain management and other similar terms, such as network sourcing, supply pipeline management, value chain management, and value stream management have become subjects of increasing interest in recent years, 

##### Test Case V

In [37]:
ask("What has caused perishable products to have limited shelf lives with continuous and significant deterioration in the quality value over time in all stages of the supply chain? ")



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: What has caused perishable products to have limited shelf lives with continuous and significant deterioration in the quality value over time in all stages of the supply chain? 

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

Perishable products have limited shelf lives with continuous and significant deterioration in the 

##### Test Case VI

In [38]:
ask("Who scored more runs in IPL 2022?")



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: Who scored more runs in IPL 2022?

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

Annals of Operations Research (2024) 333:769–797 https://doi.org/10.1007/s10479-022-04749-6

O R I G I N A L R E S E A R C H

Impact of big data analytics on supply chain performance: an analysis of inﬂuencing factors

P. R. C. Gopal1 · Nrip

##### Test Case VII

In [39]:
filters = {
    "document_name": "E-commerce_logistics_in_supply.pdf",
}
ask("Give me B2C leaders? ", filters=filters)



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: Give me B2C leaders? 

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

[47]. Ta, H., T. Esper and A. R. Hofer (2015). "Business(cid:1)to(cid:1)Consumer (B2C) Collaboration: Rethinking the Role of Consumers in Supply Chain Management." Journal of Business Logistics 36(1): 133-134. [48]. Tan, K. H., Y. Zhan, G. Ji, F. Ye and

##### Test Case VIII

In [42]:
filters = {
    "document_name" : "Optimization of vehicle routing with inventory allocation problems in Cold Supply chain analytics.pdf",
}
ask("What has caused perishable products to have limited shelf lives with continuous and significant deterioration in the quality value over time in all stages of the supply chain?", filters=filters)



[1m> Entering new RetrievalQA chain...[0m
Waiting


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSYSTEM: You are an intelligent assistant and subject matter expert helping the employees with their questions on supply chain industry, especially on the logistics domain.

Question: What has caused perishable products to have limited shelf lives with continuous and significant deterioration in the quality value over time in all stages of the supply chain?

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

Perishable products have limited shelf lives with continuous and significant deterioration in the q

#### Phase IV - Clean Up of the Resources

In [47]:
CLEANUP_RESOURCES = True

In [50]:
# Delete indexes and Delete index endpoints

if CLEANUP_RESOURCES:
    print(f"Undeploying all indexes and deleting the index endpoint {index_endpoint}")
    mengine.delete_index_endpoint()
    print(f"Deleting the index {index}")
    mengine.delete_index()

Undeploying all indexes and deleting the index endpoint name: "projects/429987130664/locations/us-central1/indexEndpoints/3638015695473606656"
display_name: "amplified-time-418915-me-logimind-index-endpoint"
encryption_spec {
}

Deleting the index name: "projects/429987130664/locations/us-central1/indexes/7885473094037405696"
display_name: "amplified-time-418915-me-logimind-index"
description: "Index for LangChain demo"
metadata_schema_uri: "gs://google-cloud-aiplatform/schema/matchingengine/metadata/nearest_neighbor_search_1.0.0.yaml"
metadata {
  struct_value {
    fields {
      key: "config"
      value {
        struct_value {
          fields {
            key: "algorithmConfig"
            value {
              struct_value {
                fields {
                  key: "treeAhConfig"
                  value {
                    struct_value {
                      fields {
                        key: "leafNodeEmbeddingCount"
                        value {
                

In [53]:
# Delete contents from the Cloud Storage bucket

if CLEANUP_RESOURCES and "ME_EMBEDDING_DIR" in globals():
    print(f"Deleting contents from the Cloud Storage bucket {ME_EMBEDDING_DIR}")
    ME_EMBEDDING_BUCKET = "/".join(ME_EMBEDDING_DIR.split("/")[:3])

    shell_output = ! gsutil du -ash gs://$ME_EMBEDDING_BUCKET
    print(shell_output)
    print(
        f"Size of the bucket {ME_EMBEDDING_BUCKET} before deleting = {' '.join(shell_output[0].split()[:2])}"
    )

Deleting contents from the Cloud Storage bucket amplified-time-418915-me-logimind-bucket
['2.3 MiB      gs://amplified-time-418915-me-logimind-bucket']
Size of the bucket amplified-time-418915-me-logimind-bucket before deleting = 2.3 MiB


In [54]:
# comment to delete contents of the bucket

! gsutil -m rm -r gs://$ME_EMBEDDING_BUCKET

Removing gs://amplified-time-418915-me-logimind-bucket/documents/00233569-7a5e-41d0-bbc8-199dc70e2a49#1713843173991366...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/00011527-3f96-4bff-86a1-fb66c13c16d1#1713843528402027...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/0044063e-69d1-4fac-9cc1-e55d2adf44f0#1713842541463707...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/00e9f42e-4e35-44ef-ab0b-5a1a92a73030#1713843039812059...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/00ebdd84-f4a1-40dc-98ff-386e821e2042#1713842949069352...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/00ec5c33-214b-4d91-af1f-6688f8dc32d9#1713842576641565...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/004df39e-1a94-4147-a8d2-52229ed3c54b#1713843073242264...
Removing gs://amplified-time-418915-me-logimind-bucket/documents/0118cb26-8292-4f83-9070-fe85e9f4f0e4#1713843105537346...
Removing gs://amplified-