In [2]:
import os
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_community.document_loaders import TextLoader

from pinecone import Pinecone

from langchain.text_splitter import RecursiveCharacterTextSplitter


In [None]:
template = """Question: {question}

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

prompt = PromptTemplate.from_template(template)

In [None]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])



In [None]:
n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 2048  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="models/llama-2-7b-chat.gguf.q4_0.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=False,  # Verbose is required to pass to the callback manager
)

In [None]:
llm_chain = prompt | llm
question = "what is cricket"
llm_chain.invoke({"question": question})

In [None]:
embeddings = LlamaCppEmbeddings(model_path='models/llama-2-7b-chat.gguf.q4_0.bin',verbose=False)

In [None]:
texts = "This is a test document."

In [None]:
query_result = embeddings.embed_query(texts)

In [None]:
query_result

In [None]:
flattened_result = [float(item) for sublist in query_result for item in sublist]


In [None]:
len(flattened_result)

In [None]:
def embed_query(self, text: str):
    """Embed a query using the Llama model.

    Args:
        text: The text to embed.

    Returns:
        Embeddings for the text.
    """
    embedding = self.client.embed(text)
    return embedding

# Langchain with Pincecone

In [3]:
from langchain_pinecone import PineconeVectorStore

In [4]:
embeddings = LlamaCppEmbeddings(model_path='models/llama-2-7b-chat.gguf.q4_0.bin',verbose=False)

In [5]:
pc_v = PineconeVectorStore(index_name='powerfule',pinecone_api_key=os.environ.get('PINECONE_API_KEY'),embedding=embeddings)

In [None]:
TextLoader()

In [6]:
loader = TextLoader("test.txt")
doc = loader.load()

In [56]:
splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap=20,separators='')

In [57]:
help(splitter.create_documents)

Help on method create_documents in module langchain_text_splitters.base:

create_documents(texts: 'List[str]', metadatas: 'Optional[List[dict]]' = None) -> 'List[Document]' method of langchain_text_splitters.character.RecursiveCharacterTextSplitter instance
    Create documents from a list of texts.



In [58]:
chunks = splitter.create_documents(texts=['the powerful yello panther leaps about the lethargic dog species'])


In [59]:
chunks

[Document(page_content='the powerful yello panther leaps about the'),
 Document(page_content='leaps about the lethargic dog species')]

In [24]:
pc_v.add_documents(doc)

embedings LEN 1


['bf5bd270-72a3-454c-a193-717d3c3bfccf']

In [6]:
pc_v.add_texts('apple power')

embedings LEN 11


['7f4fb017-e8f2-4f81-bb2a-e30d0c88e317',
 '34b19f17-6939-4550-8043-5e4992c66443',
 '400c09c4-d1ef-4041-af3c-c30bc3cdee50',
 'a6aaac90-1bbb-40be-a2a4-cc8cac19735a',
 '1a7c972c-c53d-4e6d-9eca-d92248e62396',
 '8f269294-6d84-48a5-af5b-43a260f68f47',
 '4c924870-293d-48b0-a61d-b298209c26ec',
 '2c6475a0-187a-4d6a-8fd6-ad5a1d37a529',
 '8460776d-6195-4e93-a563-0be57475a030',
 '9967035a-386b-4869-9012-8705383dcd92',
 'bcf98835-7c74-4998-ad71-86cb7f5ddf8a']

In [None]:
help(pc)

In [None]:
os.environ.get('PINECONE_API_KEY')

In [None]:
help(PineconeVectorStore)

In [11]:
pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY')) # Setting Pine Cone API Key


In [15]:
pc.describe_index('powerfule')

{'dimension': 4096,
 'host': 'powerfule-9g87gt2.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'powerfule',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [16]:
index = pc.Index('powerfule', pool_threads = 32)
doc_ids = sum([ids for ids in index.list()], [])
index_data  = index.fetch(doc_ids)

In [None]:
index_data

Help on method list_indexes in module pinecone.control.pinecone:

list_indexes() -> pinecone.models.index_list.IndexList method of pinecone.control.pinecone.Pinecone instance
    Lists all indexes.
    
    The results include a description of all indexes in your project, including the 
    index name, dimension, metric, status, and spec.
    
    :return: Returns an `IndexList` object, which is iterable and contains a 
        list of `IndexDescription` objects. It also has a convenience method `names()`
        which returns a list of index names.
    
    ```python
    from pinecone import Pinecone
    
    client = Pinecone()
    
    index_name = "my_index"
    if index_name not in client.list_indexes().names():
        print("Index does not exist, creating...")
        client.create_index(
            name=index_name,
            dimension=768,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-west-2")
        )
    ```
    
    You can also use