In [1]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3:8b")

In [2]:
llm.invoke("Tell me a joke")

### Basic Example of one document in Vector store

In [1]:
from langchain_community.chat_models import ChatOllama
from langchain_community.document_loaders import WebBaseLoader
# from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.embeddings.ollama import OllamaEmbeddings

### Initial Ingest

In [19]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

In [20]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [22]:
vectorstore = Chroma.from_documents(
    documents=all_splits,
    collection_name="rag-private",
    embedding=OllamaEmbeddings(),
    persist_directory="./chroma_db"
)
retriever = vectorstore.as_retriever()

### Querying Vector Store

In [23]:
vectorstore.search("Types of Memory","similarity")

[Document(page_content='Memory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains.', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}),
 Document(page_content='Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strat

In [27]:
# vectorstore.get()
temp_ids =  ['12937b1f-80df-4af9-bd94-f870e9fa0c19',
  '13166336-6774-4841-9c09-46133b796787',
  '13a9f73b-fe11-46e1-a950-07bd394c9b14',
  '146bc8e3-ff0c-4ea7-a221-93bd28cdcec3',
  '17606735-4be2-466c-bc3b-8aa7365d3f37',
  '1a4a09b5-1047-4bf1-8801-0969898f0479',
  '1ea25c73-5c20-4f70-85db-b7e949dcbef3',
  '1ec4fb21-7ec4-4917-b645-a034d09b9cec',]

In [28]:
for id in temp_ids:
    print(vectorstore.get(id))

{'ids': ['12937b1f-80df-4af9-bd94-f870e9fa0c19'], 'embeddings': None, 'metadatas': [{'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}], 'documents': ['Conversatin samples:\n[\n  {\n    "role": "system",'], 'uris': None, 'data': None}
{'ids': ['13166336-6774-4841-9c09-46133b796787'], 'embeddings': None, 'metadatas': [{'description': 'Building agents with LLM (large language model) as its core controll

### Vectore store from disk - Using chromadb module

In [1]:
import chromadb
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings

In [2]:
persistent_client = chromadb.PersistentClient("./chroma_db_test")

In [3]:
persistent_client.list_collections()

[Collection(name=system-documentation),
 Collection(name=si-docs),
 Collection(name=llm-papers),
 Collection(name=ssi-docs)]

In [5]:
persistent_client.get_collection("ssi-docs").metadata

{'source': 'db-ssi.pdf', 'topic': 'ssi docs'}

In [7]:
persistent_client.get_collection("system-documentation").metadata

{'source': 'pandas-basics.html', 'topic': 'system documentation'}

In [19]:
langchain_chroma = Chroma(
    client=persistent_client,
    collection_name="rag-private",
    embedding_function=OllamaEmbeddings(),
    
)

In [20]:
print("There are", langchain_chroma._collection.count(), "in the collection")

There are 586 in the collection


In [19]:
langchain_chroma.search("gives the axis","similarity",k=5)

[Document(page_content='Mastodon', metadata={'source': 'data/system-documentation/pandas-basics.html', 'title': 'Essential basic functionality — pandas 2.2.2 documentation'}),
 Document(page_content='[-1.715 , -1.0393, -0.3706]])', metadata={'source': 'data/system-documentation/pandas-basics.html', 'title': 'Essential basic functionality — pandas 2.2.2 documentation'}),
 Document(page_content='operation. DataFrame.to_numpy(), being a method, makes it clearer that the\nreturned NumPy array may not be a view on the same data in the DataFrame.', metadata={'source': 'data/system-documentation/pandas-basics.html', 'title': 'Essential basic functionality — pandas 2.2.2 documentation'}),
 Document(page_content='Note\nWhen writing performance-sensitive code, there is a good reason to spend\nsome time becoming a reindexing ninja: many operations are faster on\npre-aligned data. Adding two unaligned DataFrames internally triggers a\nreindexing step. For exploratory analysis you will hardly notic

In [22]:
retriever = langchain_chroma.as_retriever()

### Chain

In [23]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

ollama_llm = "llama2"
model = ChatOllama(model=ollama_llm)

In [24]:
chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt
    | model
    | StrOutputParser()
)

In [26]:
print(chain.invoke({"question": "Tell me something about pandas shape method"}))

Based on the provided context, here is the answer to your question:

The `shape` method in pandas is a way to access and manipulate the number of rows and columns in a DataFrame. It can be used to retrieve the number of rows and columns in a DataFrame, as well as to reshape the data in various ways. For example, you can use `shape` to get the number of rows and columns in a DataFrame like this:
```
df.shape
```
This will return the shape of the DataFrame in the form `(n rows, n columns)`, where `n` is the number of rows and columns in the DataFrame.

You can also use the `shape` method to reshape the data in a DataFrame. For example, you can use it to convert a wide DataFrame into a long DataFrame like this:
```
df = pd.read_csv('data.csv')
df = df.melt(id_vars=['column1', 'column2'], value_vars='value')
df = df.shape(nrows=True, ncolumns=True)
```
In this example, the `melt` method is used to reshape the data in the DataFrame so that each column is represented as a separate variable. 

In [29]:
print(chain.invoke({"question": "Using the document tell me what pandas methods I can use to perform binary operations?"}))

Based on the provided context, there are several pandas methods that can be used to perform binary operations:

1. `pd.Series(np.random.randn(4), index=["a", "b", "c", "d"])` - This method creates a new Series with random values in four columns.
2. `pd.Series(np.random.randn(3), index=["b", "c", "d"])` - This method creates a new Series with random values in three columns.
3. `pd.to_timedelta(m)` - This method converts a Python object of type `datetime` or `date` to a pandas TimedeltaIndex.
4. `pd.Series(np.random.randn(100, 100000), index=None)` - This method creates a new Series with random values in 100 columns and 100,000 rows.

So the answer to your question is:

You can use the following pandas methods to perform binary operations: `Series`, `to_timedelta`, and `Series` with various arguments such as `np.random.randn()` to create new Series objects.


In [None]:
# To do
# https://python.langchain.com/docs/integrations/vectorstores/chroma/
# https://github.com/langchain-ai/langchain/tree/master/templates/rag-chroma-private
# https://python.langchain.com/docs/modules/data_connection/vectorstores/