In [12]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

llm = Ollama(
    model="llama2",
    verbose=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
) # allow for streaming the response


In [13]:
# from langchain.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

data = CSVLoader("housing_prices.csv").load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100) # you should experiment with!
all_splits = text_splitter.split_documents(data)

all_splits

[Document(page_content='Housing Prices Dataset', metadata={'source': 'housing_prices.txt'}),
 Document(page_content='Home,Price,SqFt,Bedrooms,Bathrooms,Offers,Brick,Neighborhood\n1,114300,1790,2,2,2,No,East\n2,114200,2030,4,2,3,No,East\n3,114800,1740,3,2,1,No,East\n4,94700,1980,3,2,3,No,East\n5,119800,2130,3,3,3,No,East\n6,114600,1780,3,2,2,No,North\n7,151600,1830,3,3,3,Yes,West\n8,150700,2160,4,2,2,No,West\n9,119200,2110,4,2,3,No,East\n10,104000,1730,3,3,3,No,East\n11,132500,2030,3,2,3,Yes,East\n12,123000,1870,2,2,2,Yes,East\n13,102600,1910,3,2,4,No,North\n14,126300,2150,3,3,5,Yes,North\n15,176800,2590,4,3,4,No,West\n16,145800,1780,4,2,1,No,West\n17,147100,2190,3,3,4,Yes,East\n18,83600,1990,3,3,4,No,North\n19,111400,1700,2,2,1,Yes,East\n20,167200,1920,3,3,2,Yes,West\n21,116200,1790,3,2,3,No,East\n22,113800,2000,3,2,4,No,North\n23,91700,1690,3,2,3,No,North\n24,106100,1820,3,2,3,Yes,North\n25,156400,2210,4,3,2,Yes,East\n26,149300,2290,4,3,3,No,North\n27,137000,2000,4,2,3,No,West\n28,993

In [None]:
!rm -rf local_embeddings

In [14]:
# Embed and store
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings
from langchain.embeddings import OllamaEmbeddings  # We can also try Ollama embeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings(),persist_directory="local_embeddings")

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [15]:
question = "types of Neighborhood?"
docs = vectorstore.similarity_search(question)
len(docs)

4

In [16]:
print(docs[0])

page_content='Home: 119\nPrice: 150200\nSqFt: 1950\nBedrooms: 3\nBathrooms: 2\nOffers: 3\nBrick: Yes\nNeighborhood: North' metadata={'row': 118, 'source': 'housing_prices.csv'}


In [17]:
# QA chain
from langchain.chains import RetrievalQA

# RAG prompt
from langchain import hub

QA_CHAIN_PROMPT = hub.pull("rlm/rag-prompt-llama")

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)


result = qa_chain({"query": "types of Neighborhoods?"})

Thank you for providing the context. Based on the information provided, there are three types of neighborhoods:

1. North Neighborhood: All the homes in this area have a brick exterior and are located in the North part of the city.
2. East Neighborhood: Homes in this area do not have a brick exterior and are located in the East part of the city.
3. Unspecified Neighborhood: The remaining homes do not have information about their neighborhood type.

In [18]:
result = qa_chain({"query": "What is the most expensive neighborhood?"})

The most expensive neighborhood based on the provided information is North. The home with the highest price in this neighborhood is Home: 89, with a price of $127,700.

In [19]:
result = qa_chain({"query": "display all the rows of the housing dataset?"})

Here are the rows of the retrieved Housing Prices Dataset:

Home: 108, Price: $134,000, SqFt: 1890, Bedrooms: 3, Bathrooms: 2, Offers: 1, Brick: Yes, Neighborhood: East

Home: 123, Price: $144,800, SqFt: 2060, Bedrooms: 2, Bathrooms: 2, Offers: 1, Brick: Yes, Neighborhood: East