In [1]:
import os
import json
import shutil
from pathlib import Path

import numpy as np
import pandas as pd
import joblib
import psutil
import ray
import torch
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec, PodSpec
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer




In [7]:
DATA_DIR = Path("/mnt/cluster_storage/")
DATA_DIR
shutil.copytree(Path("../02_embeddings-generation/"), DATA_DIR, dirs_exist_ok=True)

PosixPath('/mnt/cluster_storage')

In [9]:
ds = ray.data.read_json(DATA_DIR / "data1")

#ds.show()

In [10]:
ds.count()

Read progress 0:   0%|          | 0/1 [00:00<?, ?it/s]

5

In [12]:
def get_device():
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.has_mps:
        device = "mps"
    else:
        device = "cpu"
    return device

device = get_device()
device

'cuda'

In [13]:
num_gpus = 2
num_cpus = psutil.cpu_count()

class EmbedBatch:
    def __init__(self):
        self.model = SentenceTransformer("thenlper/gte-large", device=device)

    def __call__(self, batch):
        text = batch["text"].tolist()
        embeddings = self.model.encode(text, batch_size=len(text))
        batch["embeddings"] = embeddings.tolist()
        return batch

ds = ds.map_batches(
    EmbedBatch,
    # Maximum number of actors to launch.
    concurrency=num_gpus if device == "cuda" else num_cpus,
    # Size of batches passed to embeddings actor.
    batch_size=100,
    # 1 GPU for each actor.
    num_gpus=1 if device == "cuda" else 0,
)

In [14]:
%%time

if (DATA_DIR / "full_scale" / "03_embeddings").exists():
    shutil.rmtree(DATA_DIR / "full_scale" / "03_embeddings")
(
    ds
    .write_json(
        num_rows_per_file=50,
        path=DATA_DIR / "full_scale" / "03_embeddings",
    )
)

2024-03-09 22:14:03,157	INFO set_read_parallelism.py:115 -- Using autodetected parallelism=64 for operator ReadJSON to satisfy parallelism at least twice the available number of CPUs (32).
2024-03-09 22:14:03,158	INFO set_read_parallelism.py:122 -- To satisfy the requested parallelism of 64, each read task output is split into 64 smaller blocks.
2024-03-09 22:14:03,159	INFO streaming_executor.py:110 -- Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[ReadJSON] -> ActorPoolMapOperator[MapBatches(EmbedBatch)] -> TaskPoolMapOperator[Write]
2024-03-09 22:14:03,159	INFO streaming_executor.py:111 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
2024-03-09 22:14:03,160	INFO streaming_executor.py:113 -- Tip: For detailed progress reportin

Running 0:   0%|          | 0/1 [00:00<?, ?it/s]

[36m(MapWorker(MapBatches(EmbedBatch)) pid=8338, ip=10.0.41.156)[0m   return transform_pyarrow.concat(tables)
[36m(MapWorker(MapBatches(EmbedBatch)) pid=8338, ip=10.0.41.156)[0m Could not construct Arrow block from numpy array; encountered values of unsupported numpy type `17` in column named 'metadata', which cannot be casted to an Arrow data type. Falling back to using pandas block type, which is slower and consumes more memory. For maximum performance, consider applying the following suggestions before ingesting into Ray Data in order to use native Arrow block types:
[36m(MapWorker(MapBatches(EmbedBatch)) pid=8338, ip=10.0.41.156)[0m - Expand out each key-value pair in the dict column into its own column
[36m(MapWorker(MapBatches(EmbedBatch)) pid=8338, ip=10.0.41.156)[0m - Replace `None` values with an Arrow supported data type
[36m(MapWorker(MapBatches(EmbedBatch)) pid=8338, ip=10.0.41.156)[0m 
pytorch_model.bin: 100%|██████████| 670M/670M [00:02<00:00, 224MB/s][32m [rep

CPU times: user 269 ms, sys: 105 ms, total: 374 ms
Wall time: 24 s


In [15]:
!ls -llh {DATA_DIR / "full_scale" / "03_embeddings"}


total 456K
-rw-r--r-- 1 ray users 453K Mar  9 22:14 3_000000_000000.json


In [16]:
YOUR_PINECONE_API_KEY = "9386359a-0227-4d5b-80d9-b1bb7600dd08"

In [17]:
pinecone_api_key = os.environ.get("PINECONE_API_KEY", YOUR_PINECONE_API_KEY)
pc = Pinecone(api_key=pinecone_api_key)

In [18]:
pc.list_indexes()

{'indexes': [{'dimension': 1024,
              'host': 'canopy--shanker-index-zhxkhfk.svc.apw5-4e34-81fa.pinecone.io',
              'metric': 'cosine',
              'name': 'canopy--shanker-index',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-west-2'}},
              'status': {'ready': True, 'state': 'Ready'}},
             {'dimension': 1024,
              'host': 'canopy--cong-index-zhxkhfk.svc.apw5-4e34-81fa.pinecone.io',
              'metric': 'cosine',
              'name': 'canopy--cong-index',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-west-2'}},
              'status': {'ready': True, 'state': 'Ready'}},
             {'dimension': 1024,
              'host': 'shanker-index-zhxkhfk.svc.apw5-4e34-81fa.pinecone.io',
              'metric': 'cosine',
              'name': 'shanker-index',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-west-2'}},
              'status': {'ready': True, 'state': 'Ready'}},
     

In [44]:
def create_index(
    index_name: str,
    cloud: str,
    region: str,
    metric: str,
    embedding_dimension: int,
    index_type: str,
    **kwargs,
) -> None:
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY", YOUR_PINECONE_API_KEY))
    existing_index_names = {index.name for index in pc.list_indexes().indexes}

    if index_name in existing_index_names:
        pc.delete_index(index_name)

    if index_type == "serverless":
        pc.create_index(
            name=index_name,
            dimension=embedding_dimension,
            metric=metric,
            spec=ServerlessSpec(cloud=cloud, region=region),
        )
    elif index_type == "pod":
        pc.create_index(
            name=index_name,
            dimension=embedding_dimension,
            metric=metric,
            spec=PodSpec(
                environment="gcp-starter",
                metadata_config={"indexed": []},
                **kwargs,
            ),
        )

In [45]:
cloud = "aws"
region = "us-west-2"
metric = "cosine"
index_type = "serverless"  # "serverless" or "pod"
index_name = "shanker-index" # A unique name for the index under your organization
embedding_dimension = 1024  # From the model page of thenlper/gte-large

In [46]:
create_index(
    index_name=index_name,
    cloud=cloud,
    region=region,
    metric=metric,
    index_type=index_type,
    embedding_dimension=embedding_dimension,
)

In [47]:
ds = ray.data.read_json(DATA_DIR / "full_scale" / "03_embeddings/")
ds

Dataset(
   num_blocks=64,
   num_rows=5,
   schema={
      id: string,
      source: string,
      text: string,
      meta...: struct<primary_category: string, published: string, title: string, updated: string>,
      embeddings: list<item: double>
   }
)

In [48]:
def convert_to_pinecone_vectors(row):
    row_hash = joblib.hash(row)
    page_name = row["source"].split("/")[-1]
    section_name = row["source"].split("#")[-1]
    return {
        "id": f"{page_name}#{section_name}#{row_hash}", # sample ID prefix
        "values": row["embeddings"],
        "metadata": {
            
        },
    }


ds = ds.map(convert_to_pinecone_vectors)

In [49]:
sample = ds.take_batch(1)

2024-03-09 22:24:35,091	INFO set_read_parallelism.py:115 -- Using autodetected parallelism=64 for operator ReadJSON to satisfy parallelism at least twice the available number of CPUs (32).
2024-03-09 22:24:35,092	INFO set_read_parallelism.py:122 -- To satisfy the requested parallelism of 64, each read task output is split into 64 smaller blocks.
2024-03-09 22:24:35,092	INFO streaming_executor.py:110 -- Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[ReadJSON] -> TaskPoolMapOperator[Map(convert_to_pinecone_vectors)] -> LimitOperator[limit=1]
2024-03-09 22:24:35,092	INFO streaming_executor.py:111 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
2024-03-09 22:24:35,093	INFO streaming_executor.py:113 -- Tip: For detailed progress rep

Running 0:   0%|          | 0/1 [00:00<?, ?it/s]

In [50]:
batch_size = 100

def get_size_of_batch(batch):
    size_of_batch_in_bytes = pd.DataFrame(batch).memory_usage(deep=True).sum().sum()
    size_of_batch_in_mb = size_of_batch_in_bytes / 1024**2
    return {"size_in_mb": [size_of_batch_in_mb]}


out = ds.map_batches(get_size_of_batch, batch_size=batch_size).to_pandas()
out["size_in_mb"].describe()

2024-03-09 22:24:41,301	INFO set_read_parallelism.py:115 -- Using autodetected parallelism=64 for operator ReadJSON to satisfy parallelism at least twice the available number of CPUs (32).
2024-03-09 22:24:41,302	INFO set_read_parallelism.py:122 -- To satisfy the requested parallelism of 64, each read task output is split into 64 smaller blocks.
2024-03-09 22:24:41,302	INFO streaming_executor.py:110 -- Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[ReadJSON] -> TaskPoolMapOperator[Map(convert_to_pinecone_vectors)->MapBatches(get_size_of_batch)]
2024-03-09 22:24:41,303	INFO streaming_executor.py:111 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
2024-03-09 22:24:41,303	INFO streaming_executor.py:113 -- Tip: For detailed progres

Running 0:   0%|          | 0/1 [00:00<?, ?it/s]

count    1.000000
mean     0.001591
std           NaN
min      0.001591
25%      0.001591
50%      0.001591
75%      0.001591
max      0.001591
Name: size_in_mb, dtype: float64

In [51]:
approx_total_batches = ds.count() // batch_size
approx_total_batches
concurrency = 9
approx_num_upserts_per_connection = approx_total_batches // concurrency
approx_num_upserts_per_connection

pinecone_namespace = "example-namespace"


class UpsertVectors:
    def __init__(self):
        self.pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY", YOUR_PINECONE_API_KEY))
        self.index = self.pc.Index(index_name)
        self.namespace = pinecone_namespace

    def __call__(self, batch):
        self.index.upsert(
            vectors=[
                {
                    "id": id_,
                    "values": values,
                    "metadata": metadata,
                }
                for id_, values, metadata in zip(
                    batch["id"], batch["values"], batch["metadata"]
                )
            ],
            namespace=self.namespace,
        )
        return batch


ds = ds.map_batches(
    UpsertVectors,
    concurrency=concurrency,
    batch_size=batch_size,
    num_cpus=1,
)


df_written = ds.to_pandas().drop_duplicates(subset=["id"])


import time


def verify_index(index_name: str, num_expected_vectors: int):
    index = pc.Index(index_name)
    stats = index.describe_index_stats()

    while stats.total_vector_count != num_expected_vectors:
        time.sleep(5)
        stats = index.describe_index_stats()


verify_index(index_name=index_name, num_expected_vectors=df_written.shape[0])

2024-03-09 22:25:00,016	INFO set_read_parallelism.py:115 -- Using autodetected parallelism=64 for operator ReadJSON to satisfy parallelism at least twice the available number of CPUs (32).
2024-03-09 22:25:00,016	INFO set_read_parallelism.py:122 -- To satisfy the requested parallelism of 64, each read task output is split into 64 smaller blocks.
2024-03-09 22:25:00,017	INFO streaming_executor.py:110 -- Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[ReadJSON] -> ActorPoolMapOperator[Map(convert_to_pinecone_vectors)->MapBatches(UpsertVectors)]
2024-03-09 22:25:00,018	INFO streaming_executor.py:111 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), exclude_resources=ExecutionResources(cpu=0, gpu=0, object_store_memory=0), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
2024-03-09 22:25:00,018	INFO streaming_executor.py:113 -- Tip: For detailed progress r

Running 0:   0%|          | 0/1 [00:00<?, ?it/s]

[36m(MapWorker(Map(convert_to_pinecone_vectors)->MapBatches(UpsertVectors)) pid=14649, ip=10.0.41.156)[0m Could not construct Arrow block from numpy array; encountered values of unsupported numpy type `17` in column named 'metadata', which cannot be casted to an Arrow data type. Falling back to using pandas block type, which is slower and consumes more memory. For maximum performance, consider applying the following suggestions before ingesting into Ray Data in order to use native Arrow block types:
[36m(MapWorker(Map(convert_to_pinecone_vectors)->MapBatches(UpsertVectors)) pid=14649, ip=10.0.41.156)[0m - Expand out each key-value pair in the dict column into its own column
[36m(MapWorker(Map(convert_to_pinecone_vectors)->MapBatches(UpsertVectors)) pid=14649, ip=10.0.41.156)[0m - Replace `None` values with an Arrow supported data type
[36m(MapWorker(Map(convert_to_pinecone_vectors)->MapBatches(UpsertVectors)) pid=14649, ip=10.0.41.156)[0m 


In [52]:
query = "What is the default number of replicas for a Ray Serve deployment?"

model = SentenceTransformer('thenlper/gte-large', device=get_device())
query_embedding = model.encode(query).tolist()

index = pc.Index(index_name)
result = index.query(
    vector=query_embedding,
    top_k=5,
    namespace=pinecone_namespace,
)

result["matches"]



{'read_units': 6}

In [54]:

result = index.query(
    vector=query_embedding, top_k=5, include_metadata=True, namespace=pinecone_namespace
)

result["matches"]


scores = [match["score"] for match in result["matches"]]
scores


score_threshold = 0.93  # determined based on data distribution
matches_above_threshold = [
    match for match in result["matches"] if match["score"] > score_threshold
]
len(matches_above_threshold), len(result["matches"])


response = index.query(
    vector=query_embedding,
    top_k=5,
    namespace=pinecone_namespace,
)

response["usage"]











{'read_units': 5}

In [55]:
response = index.query(
    vector=query_embedding,
    top_k=20,
    namespace=pinecone_namespace,
    include_metadata=True,
)
response["usage"]

{'read_units': 6}

In [56]:
response = index.query(
    vector=query_embedding,
    top_k=1,
    namespace=pinecone_namespace,
    include_metadata=True,
)
response["usage"]


{'read_units': 6}

In [57]:
response = index.query(
    vector=query_embedding,
    top_k=100,
    namespace=pinecone_namespace,
)

response["usage"]

{'read_units': 5}

In [58]:
def print_query_results(results):
    for query_results in results:
        print('query: ' + query_results.query + '\n')
        for document in query_results.documents:
            print('document: ' + document.text.replace("\n", "\\n"))
            print("title: " + document.metadata["title"])
            print('source: ' + document.source)
            print(f"score: {document.score}\n")

In [61]:
from canopy.knowledge_base.record_encoder import AnyscaleRecordEncoder

anyscale_record_encoder = AnyscaleRecordEncoder(
    api_key=os.environ["ANYSCALE_API_KEY"],
    base_url=os.environ["ANYSCALE_BASE_URL"],
    batch_size=30,
)

In [63]:
from canopy.tokenizer import Tokenizer
Tokenizer.initialize()

In [77]:
import os

os.environ["PINECONE_API_KEY"] = os.environ.get('PINECONE_API_KEY') or '9386359a-0227-4d5b-80d9-b1bb7600dd08'
os.environ["ANYSCALE_BASE_URL"] = 'https://api.endpoints.anyscale.com/v1'
os.environ["ANYSCALE_API_KEY"] = os.environ.get('ANYSCALE_API_KEY') or 'esecret_f6dz2g16nnrai635si83z8upk8'

from canopy.knowledge_base import KnowledgeBase

INDEX_NAME = "shanker-index" # Set the index name here

kb = KnowledgeBase(index_name=INDEX_NAME, record_encoder=anyscale_record_encoder)



#from canopy.knowledge_base import list_canopy_indexes
#list_canopy_indexes()
#if not any(name.endswith(INDEX_NAME) for name in list_canopy_indexes()):
#    print(name)
#    kb.create_canopy_index()

kb.connect()





In [78]:
from canopy.models.data_models import Query
results = kb.query([Query(text="p1 pod capacity")])

print_query_results(results)

query: p1 pod capacity

document: ### s1 pods\n\n\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\n\n\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\n\n\n### p1 pods\n\n\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\n\n\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.
title: indexes
source: https://docs.pinecone.io/docs/indexes
score: 0.918963

document: ## Pod storage capacity\n\n\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\n\n\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\n\n\n## Metadata\n\n\nMax metadata size per vector is 40 KB.\n\n\nNull metadata values are not suppor

[36m(autoscaler +49m20s)[0m Cluster is terminating (reason: user action).


In [66]:
from canopy.models.data_models import Query
results = kb.query([Query(text="p1 pod capacity",
                          metadata_filter={"source": "https://docs.pinecone.io/docs/limits"},
                          top_k=2)])

print_query_results(results)

query: p1 pod capacity

document: ## Pod storage capacity\n\n\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\n\n\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\n\n\n## Metadata\n\n\nMax metadata size per vector is 40 KB.\n\n\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\n\n\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.
title: limits
source: https://docs.pinecone.io/docs/limits
score: 0.90533584

document: # Limits\n\n[Suggest Edits](/edit/limits)This is a summary of current Pinecone limitations. For many of these, there is a workaround or we're working on increasing the limits.\n\n\n## Upserts\n\n\nMax vector dimensionality is 20,000.\n\n\nMax size for an upsert request is 2MB. Recommended upsert limit is 100 vectors per 

In [67]:
from canopy.context_engine import ContextEngine
context_engine = ContextEngine(kb)

In [68]:
import json

result = context_engine.query([Query(text="capacity of p1 pods", top_k=5)], max_context_tokens=512)

print(result.to_text(indent=2))
print(f"\n# tokens in context returned: {result.num_tokens}")

[
  {
    "query": "capacity of p1 pods",
    "snippets": [
      {
        "source": "https://docs.pinecone.io/docs/indexes",
        "text": "### s1 pods\n\n\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\n\n\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\n\n\n### p1 pods\n\n\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\n\n\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions."
      },
      {
        "source": "https://docs.pinecone.io/docs/limits",
        "text": "## Pod storage capacity\n\n\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\n\n\nEach **s1** pod has enough capacity for 5M vectors with 76

In [69]:
from canopy.chat_engine import ChatEngine
from canopy.llm.anyscale import AnyscaleLLM
from canopy.chat_engine.query_generator import InstructionQueryGenerator

anyscale_llm = AnyscaleLLM(
    model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
    api_key=os.environ["ANYSCALE_API_KEY"],
    base_url=os.environ["ANYSCALE_BASE_URL"],
)

chat_engine = ChatEngine(
    context_engine,
    query_builder=InstructionQueryGenerator(
        llm=anyscale_llm,
    ),
    llm=anyscale_llm,
)

In [70]:
from typing import Tuple
from canopy.models.data_models import Messages, UserMessage, AssistantMessage

def chat(new_message: str, history: Messages) -> Tuple[str, Messages]:
    messages = history + [UserMessage(content=new_message)]
    response = chat_engine.chat(messages)
    assistant_response = response.choices[0].message.content
    return assistant_response, messages + [AssistantMessage(content=assistant_response)]

In [71]:
from IPython.display import display, Markdown

history = []
response, history = chat("What is the capacity of p1 pods?", history)
display(Markdown(response))

 Each p1 pod has enough capacity for around 1M vectors of 768 dimensions.

Source: https://docs.pinecone.io/docs/indexes

In [72]:
response, history = chat("And for what latency requirements does it fit?", history)
display(Markdown(response))

 P1 pods are suitable for applications with low latency requirements, specifically less than 100ms.

Source: https://docs.pinecone.io/docs/indexes

In [73]:

response, history = chat("Do you know about Nachum, Yingjie Miao, Mustafa Safdari?", history)
display(Markdown(response))



 I do not have any information about Nachum, Yingjie Miao, and Mustafa Safdari from the provided context.

Source: [{"query": "Are Nachum, Yingjie Miao, and Mustafa Safdari known individuals?", "snippets": [{"source": "https://docs.pinecone.io/docs/langchain", "text": "```\n\n\n```python\n{'question': 'who was Benito Mussolini?',\n 'answer': 'Benito Mussolini was an Italian politician and journalist who was the Prime Minister of Italy from 1922 until 1943. He was the leader of the National Fascist Party and invented the ideology of Fascism. He became dictator of Italy by the end of 1927 and was friends with German dictator Adolf Hitler. Mussolini attacked Greece and failed to conquer it. He was removed by the Great Council of Fascism in 1943 and was executed by a partisan on April 28, 1945. After the war, several Neo-Fascist movements have had success in Italy, the most important being the Movimento Sociale Italiano. His granddaughter Alessandra Mussolini has outspoken views similar to Fascism. \\n',\n 'sources': 'https://simple.wikipedia.org/wiki/Benito%20Mussolini, https://simple.wikipedia.org/wiki/Fascism'}"}, {"source": "https://docs.pinecone.io/docs/using-public-datasets", "text": "\u2514\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2518"}, {"source": "https://docs.pinecone.io/docs/metadata-filtering", "text": "(\"E\", [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], {\"genre\": \"drama\"})\n])"}, {"source": "https://docs.pinecone.io/docs/insert-data", "text": "(\"E\", [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], {\"genre\": \"drama\"})\n])"}, {"source": "https://docs.pinecone.io/docs/langchain", "text": "[Document(page_content='Benito Amilcare Andrea Mussolini KSMOM GCTE (29 July 1883 \u2013 28 April 1945) was an Italian politician and journalist. He was also the Prime Minister of Italy from 1922 until 1943. He was the leader of the National Fascist Party.\\n\\nBiography\\n\\nEarly life\\nBenito Mussolini was named after Benito Juarez, a Mexican opponent of the political power of the Roman Catholic Church, by his anticlerical (a person who opposes the political interference of the Roman Catholic Church in secular affairs) father. Mussolini\\'s father was a blacksmith. Before being involved in politics, Mussolini was a newspaper editor (where he learned all his propaganda skills) and elementary school teacher.\\n\\nAt first, Mussolini was a socialist, but when he wanted Italy to join the First World War, he was thrown out of the socialist party. He \\'invented\\' a new ideology, Fascism, much out of Nationalist\\xa0and Conservative views.\\n\\nRise to power and becoming dictator\\nIn 1922, he took power by having a large group of men, \"Black Shirts,\" march on Rome and threaten to take"}]}] + What is the capacity of p1 pods?