# **Attribution**

Inspiration from Hamza's Course on Enterprise RAG and Multi-Agent Applications

https://maven.com/boring-bot/advanced-llm

1.   Signup on CouchBase (https://cloud.couchbase.com/) - Use the free tire option
2.   Create a search index named "**hotel_desc_search_index**" on inventory -> hotel

# **JINA Embeddings**

In [77]:
import requests
from google.colab import userdata

def get_embeddings(content):
  url = 'https://api.jina.ai/v1/embeddings'
  headers = {
      'Content-Type': 'application/json',
      'Authorization': 'Bearer '+ userdata.get('jina_apikey')
  }
  data = {
      "model": "jina-embeddings-v3",
      "task": "text-matching",
      "late_chunking": False,
      "dimensions": 1024,
      "embedding_type": "float",
      "input": [
          content
      ]
  }

  response = requests.post(url, headers=headers, json=data)
  print(response.json())
  return response.json()

In [45]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [119]:
demo_embeddings = get_embeddings("40 bed summer hostel about 3 miles from Gillingham, housed in a districtive converted Oast House in a semi-rural setting.")

{'model': 'jina-embeddings-v3', 'object': 'list', 'usage': {'total_tokens': 33, 'prompt_tokens': 33}, 'data': [{'object': 'embedding', 'index': 0, 'embedding': [-0.109475665, 0.0450993, 0.04877086, 0.028413778, -0.044740826, -0.09973108, -0.09658188, -0.0024917861, -0.042275872, -0.106235676, 0.119427994, 0.0036004714, 0.03112956, -0.010226051, -0.054508995, -0.12750836, -0.024459677, 0.04197465, 0.051268466, 0.07324717, -0.044268936, -0.045601655, -0.0527214, -0.113108054, 0.05448354, 0.088933736, -0.1149054, 0.05819249, 0.057165686, -0.007271633, 0.005998137, 0.07181222, -0.007073966, -0.037292037, -0.014403569, 0.06874381, 0.03427087, 0.005492415, -0.005071177, -0.10698354, -0.038128987, 0.056153454, -0.03349593, 0.06647212, -0.035142135, 0.07637406, 0.041832693, 0.044147275, -0.02233918, 0.014244205, -0.02918599, 0.03154696, 0.054066524, 0.026703231, 0.05361546, 0.059967905, 0.034303285, -0.02122418, -0.056088917, 0.050068278, 0.03408225, -0.04388147, -0.07917087, 0.038208958, -0.0

In [120]:
demo_embeddings.get('data')
data = demo_embeddings.get('data')

if(data is not None):
  embeddings = data[0]['embedding']

In [125]:
len(embeddings)

1024

# **FAISS Demo**

In [128]:
import faiss
import numpy as np

# Create a FAISS index (L2 is the default metric for similarity search)
index = faiss.IndexFlatL2(1024)  # Flat index for exact search

# Convert the embeddings list to a NumPy array before adding to the index
embeddings_np = np.array([embeddings], dtype='float32')
index.add(embeddings_np)  # Add the NumPy array to the index

print(f"Number of vectors in the index: {index.ntotal}")

Number of vectors in the index: 1


In [129]:
import numpy as np

search_text = 'hotel'
embeddings = get_embeddings(search_text)
hotel_embeddings = embeddings.get('data')[0]['embedding']

{'model': 'jina-embeddings-v3', 'object': 'list', 'usage': {'total_tokens': 3, 'prompt_tokens': 3}, 'data': [{'object': 'embedding', 'index': 0, 'embedding': [-0.039003458, -0.027811198, 0.0054835207, 0.09556104, 0.020527005, -0.0029010216, 0.00093116885, 0.031633466, -0.03225121, -0.017663252, 0.07745781, 0.14695363, 0.060864612, -0.061036203, -0.097053915, -0.1695698, -0.02839891, 0.024212, 0.042503987, 0.0049269106, 0.022084227, -0.021934083, -0.03666119, 0.00669219, -0.0066739917, 0.047892056, 0.04142294, 0.103368595, 0.07539867, 0.0014456847, 0.12289606, -0.054601405, -0.002122411, 0.0070932927, 0.017838333, 0.016509548, 0.10362599, 0.004726359, -0.015938995, -0.007940541, 0.013279279, 0.080100365, -0.061379395, -0.0048818667, -0.083480775, 0.049324874, -0.051100876, -0.012687277, -0.015908966, 0.03795673, -0.06532607, 0.04228949, -0.04364509, 0.029286912, 0.022088518, -0.017669955, 0.029582912, 0.03530559, -0.0059714927, -0.059886523, 0.11953281, -0.043361958, 0.004651555, 0.0221

In [132]:
# Convert to NumPy array
embeddings_np = np.array([hotel_embeddings], dtype='float32')
k = 1  # Number of nearest neighbors to retrieve
distances, indices = index.search(embeddings_np, k)
distances, indices

(array([[1.3961257]], dtype=float32), array([[0]]))

# **Couchbase**

In [None]:
!pip install couchbase

Collecting couchbase
  Downloading couchbase-4.3.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (23 kB)
Downloading couchbase-4.3.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: couchbase
Successfully installed couchbase-4.3.4


In [35]:
from couchbase.cluster import Cluster, ClusterOptions, ClusterTimeoutOptions
from couchbase.auth import PasswordAuthenticator
from couchbase.exceptions import CouchbaseException
import couchbase.search as search
from couchbase.n1ql import N1QLQuery

In [25]:
# Connect to Couchbase
from google.colab import userdata
from datetime import timedelta

auth = PasswordAuthenticator(userdata.get('couchbase_username'), userdata.get('couchbase_password'))
cluster = Cluster("couchbases://cb.u-aa4ttazyz-txmz.cloud.couchbase.com", ClusterOptions(auth))

# Connect options - global timeout opts
timeout_opts = ClusterTimeoutOptions(kv_timeout=timedelta(seconds=30))
options=ClusterOptions(PasswordAuthenticator('username', 'password'), timeout_options=timeout_opts)

bucket = cluster.bucket("travel-sample")
collection = bucket.default_collection()

  cluster = Cluster("couchbases://cb.u-aa4ttazyz-txmz.cloud.couchbase.com", ClusterOptions(auth))
  timeout_opts = ClusterTimeoutOptions(kv_timeout=timedelta(seconds=30))
  options=ClusterOptions(PasswordAuthenticator('username', 'password'), timeout_options=timeout_opts)


In [143]:
def search_couchbase(query_to_search):
  try:
      result = cluster.search_query(
          "travel-sample.inventory.hotel_desc_search_index", search.QueryStringQuery(query_to_search))

      filtered_documents = []
      for row in result.rows():
          # Retrieve the document by its ID
          try:
              document = collection.get(row.id)
              filtered_documents.append(document.content_as[dict])
          except Exception as e:
              print("Error retrieving document:", e)

      return filtered_documents
  except CouchbaseException as ex:
      import traceback
      traceback.print_exc()

In [145]:
documents = search_couchbase('40 bed')
documents

[{'title': 'Aberdyfi',
  'name': 'Aberdovey Hillside Village',
  'address': 'Church St',
  'directions': 'Turn right in square, past front of chapel and continue straight up Church St for approximately 200 metres, ignoring left dog-leg and continue to metal gates.',
  'phone': '+44 1654767522',
  'tollfree': None,
  'email': 'info@hillsidevillage.co.uk',
  'fax': None,
  'url': 'http://www.hillsidevillage.co.uk/',
  'checkin': None,
  'checkout': None,
  'price': None,
  'geo': {'lat': 52.54493, 'lon': -4.04067, 'accuracy': 'APPROXIMATE'},
  'type': 'hotel',
  'id': 40,
  'country': 'United Kingdom',
  'city': 'Aberdovey',
  'state': None,
  'reviews': [{'content': 'My girlfriend and I decided to surprise our 11-year-old daughter with a trip to Seattle for her birthday this last weekend. She had never been and we knew she would love it. I actually went on a hotel discount site and was given this hotel. I had been in the Portland location once and remember it being beautiful and have st

# **Scemantic Caching**

In [158]:
import faiss
import json
import numpy as np
import time

class CacheFacade:
    def __init__(self, json_file='cache.json'):
        self.euclidean_threshold = 0.3
        self.index =faiss.IndexFlatL2(1024)  # Use IndexFlatL2 with Euclidean distance
        self.json_file = json_file
        self.load_cache()

    def load_cache(self):
        # Load cache from JSON file, creating an empty cache if the file is not found
        try:
            with open(self.json_file, 'r') as file:
                self.cache = json.load(file)
        except FileNotFoundError:
            self.cache = {'questions': [], 'embeddings': [], 'answers': []}

    def save_cache(self):
        # Save the cache to the JSON file
        with open(self.json_file, 'w') as file:
            json.dump(self.cache, file)

    # Reused logic from Module:1a-Advanced-LLMs -semantic_cache_from_scratch.ipynb
    def ask(self, question: str) -> str:
        # Method to retrieve an answer from the cache or generate a new one
        start_time = time.time()
        try:
            embeddings = get_embeddings(question)
            embedding = embeddings.get('data')[0]['embedding']
            embeddings_np = np.array([embedding], dtype='float32')

            # Search for the nearest neighbor in the index
            distances, indices = self.index.search(embeddings_np, 1)

            if distances[0] >= 0:
                if indices[0][0] != -1 and distances[0][0] <= self.euclidean_threshold:
                    row_id = int(indices[0][0])
                    print(f'Found cache in row: {row_id} with score {1 - distances[0][0]}') #score inversed to show similarity
                    end_time = time.time()
                    elapsed_time = end_time - start_time
                    print(f"Time taken: {elapsed_time} seconds")
                    return self.cache['answers'][row_id]

            # Handle the case when there are not enough results or Euclidean distance is not met
            documents = search_couchbase(question)

            self.cache['questions'].append(question)
            self.cache['embeddings'].append(embedding)
            self.cache['answers'].append(documents)

            self.index.add(embeddings_np)
            self.save_cache()
            end_time = time.time()
            elapsed_time = end_time - start_time
            print(f"Time taken: {elapsed_time} seconds")

            return documents
        except Exception as e:
            raise RuntimeError(f"Error during 'ask' method: {e}")


In [156]:
cache_facade = CacheFacade()
cache_facade.ask('40 bed')

{'model': 'jina-embeddings-v3', 'object': 'list', 'usage': {'total_tokens': 4, 'prompt_tokens': 4}, 'data': [{'object': 'embedding', 'index': 0, 'embedding': [-0.033579838, -0.028526977, -0.077937484, 0.11767607, -0.08678457, 0.0013728112, -0.03774677, -0.0058738748, -0.05686455, -0.13233976, -0.008450903, 0.105492994, -0.10526082, -0.03407321, -0.11672293, -0.10087393, -0.05077912, -0.04607452, 0.048115216, 0.005268998, 0.087676615, -0.004598058, 0.008206174, 0.009748295, -6.873602e-06, 0.12356598, -0.13187541, 0.123321585, -0.03898096, 0.0067605698, 0.10515084, 0.021922208, 0.053662974, 0.008029895, 0.07168098, 0.02144564, 0.05252043, -0.0010738095, -0.014805739, -0.08144455, -0.008128417, 0.01883978, 0.062467296, 0.07666663, 0.052471552, 0.027164476, -0.07581125, -0.0189299, -0.038815994, 0.07730206, -0.037578747, 0.0017901915, -0.007754951, 0.04600731, 0.009518412, 0.046685506, 0.015235339, -0.007499864, -0.056180242, -0.029752005, -0.02402095, 0.004224974, -0.024564726, -0.0771309

[{'title': 'Aberdyfi',
  'name': 'Aberdovey Hillside Village',
  'address': 'Church St',
  'directions': 'Turn right in square, past front of chapel and continue straight up Church St for approximately 200 metres, ignoring left dog-leg and continue to metal gates.',
  'phone': '+44 1654767522',
  'tollfree': None,
  'email': 'info@hillsidevillage.co.uk',
  'fax': None,
  'url': 'http://www.hillsidevillage.co.uk/',
  'checkin': None,
  'checkout': None,
  'price': None,
  'geo': {'lat': 52.54493, 'lon': -4.04067, 'accuracy': 'APPROXIMATE'},
  'type': 'hotel',
  'id': 40,
  'country': 'United Kingdom',
  'city': 'Aberdovey',
  'state': None,
  'reviews': [{'content': 'My girlfriend and I decided to surprise our 11-year-old daughter with a trip to Seattle for her birthday this last weekend. She had never been and we knew she would love it. I actually went on a hotel discount site and was given this hotel. I had been in the Portland location once and remember it being beautiful and have st

In [157]:
cache_facade.ask('40 bed')

{'model': 'jina-embeddings-v3', 'object': 'list', 'usage': {'total_tokens': 4, 'prompt_tokens': 4}, 'data': [{'object': 'embedding', 'index': 0, 'embedding': [-0.033579838, -0.028526977, -0.077937484, 0.11767607, -0.08678457, 0.0013728112, -0.03774677, -0.0058738748, -0.05686455, -0.13233976, -0.008450903, 0.105492994, -0.10526082, -0.03407321, -0.11672293, -0.10087393, -0.05077912, -0.04607452, 0.048115216, 0.005268998, 0.087676615, -0.004598058, 0.008206174, 0.009748295, -6.873602e-06, 0.12356598, -0.13187541, 0.123321585, -0.03898096, 0.0067605698, 0.10515084, 0.021922208, 0.053662974, 0.008029895, 0.07168098, 0.02144564, 0.05252043, -0.0010738095, -0.014805739, -0.08144455, -0.008128417, 0.01883978, 0.062467296, 0.07666663, 0.052471552, 0.027164476, -0.07581125, -0.0189299, -0.038815994, 0.07730206, -0.037578747, 0.0017901915, -0.007754951, 0.04600731, 0.009518412, 0.046685506, 0.015235339, -0.007499864, -0.056180242, -0.029752005, -0.02402095, 0.004224974, -0.024564726, -0.0771309

[{'title': 'Aberdyfi',
  'name': 'Aberdovey Hillside Village',
  'address': 'Church St',
  'directions': 'Turn right in square, past front of chapel and continue straight up Church St for approximately 200 metres, ignoring left dog-leg and continue to metal gates.',
  'phone': '+44 1654767522',
  'tollfree': None,
  'email': 'info@hillsidevillage.co.uk',
  'fax': None,
  'url': 'http://www.hillsidevillage.co.uk/',
  'checkin': None,
  'checkout': None,
  'price': None,
  'geo': {'lat': 52.54493, 'lon': -4.04067, 'accuracy': 'APPROXIMATE'},
  'type': 'hotel',
  'id': 40,
  'country': 'United Kingdom',
  'city': 'Aberdovey',
  'state': None,
  'reviews': [{'content': 'My girlfriend and I decided to surprise our 11-year-old daughter with a trip to Seattle for her birthday this last weekend. She had never been and we knew she would love it. I actually went on a hotel discount site and was given this hotel. I had been in the Portland location once and remember it being beautiful and have st