In [None]:
# Step 1: Install dependencies
# pip install pandas langchain langchain_community langchain_ollama faiss-cpu

import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
#from langchain.embedding import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.chains import RetrievalQA

# --- Step 2: Load your Excel data ---
df = pd.read_excel("sales_data.xlsx")

# Optional: Combine useful columns into a text representation
df["text"] = df.apply(lambda row: f"Product: {row['Product']}, Region: {row['Region']}, "
                                  f"Sales: {row['Sales']}, Date: {row['Date']}", axis=1)

# Convert each row into a LangChain Document
documents = [Document(page_content=text) for text in df["text"].tolist()]

# --- Step 3: Split text if needed (for large datasets) ---
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = splitter.split_documents(documents)

# --- Step 4: Create embeddings ---
embeddings = OllamaEmbeddings(model="gemma:2b")

# --- Step 5: Store embeddings in FAISS vector store ---
vectorstore = FAISS.from_documents(texts, embeddings)

# --- Step 6: Create RetrievalQA chain ---
retriever = vectorstore.as_retriever()
llm = Ollama(model="gemma2:2b")  # You can replace with other local models

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# --- Step 7: Ask questions ---
query = "Which product had the highest sales in the East region?"
result = qa_chain.invoke({"query": query})

print("Answer:", result["result"])
print("\nSources:", [doc.page_content for doc in result["source_documents"]])


  llm = Ollama(model="gemma2:2b")  # You can replace with other local models


In [None]:
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain_ollama import OllamaEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.chains import RetrievalQA

# Load Excel
df = pd.read_excel("sales_data.xlsx")
df["text"] = df.apply(lambda row: f"Product: {row['Product']}, Region: {row['Region']}, Sales: {row['Sales']}, Date: {row['Date']}", axis=1)
documents = [Document(page_content=text) for text in df["text"].tolist()]

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = splitter.split_documents(documents)

embeddings = OllamaEmbeddings(model="gemma:2b")  # light model
vectorstore = FAISS.from_documents(texts, embeddings)

retriever = vectorstore.as_retriever()
llm = Ollama(model="gemma2:2b")  # smaller, stable model

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)

query = "Which product had the highest sales in the East region?"
result = qa_chain.invoke({"query": query})

print("Answer:", result["result"])


  llm = Ollama(model="gemma2:2b")  # smaller, stable model


In [1]:
from langchain.llms import Ollama
llm_obj = Ollama(model='gemma:2b')
response = llm_obj.invoke('What is embedding?')
print(response)

  llm_obj = Ollama(model='gemma:2b')


Embedding is a process of representing a piece of information, such as a text or a sound, in a way that can be stored and accessed more easily. This can be done using a variety of methods, such as:

* **Vectorization:** This involves converting the information into a set of numerical values, which can then be stored in a database.
* **Text encoding:** This involves converting the text into a sequence of bytes, which can then be stored in a database.
* **Audio encoding:** This involves converting the audio into a sequence of samples, which can then be stored in a database.
* **Multimodal embedding:** This involves representing the information from multiple modalities (such as text, audio, and video) in a unified way.

Embedding can be used to improve the performance of a machine learning model by reducing the amount of data that needs to be processed. Additionally, embedding can help to make the model more robust to noise and outliers in the data.


In [3]:
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain_ollama import OllamaEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.chains import RetrievalQA

# Load Excel
df = pd.read_excel("sales_data.xlsx")
df["text"] = df.apply(lambda row: f"Product: {row['Product']}, Region: {row['Region']}, Sales: {row['Sales']}, Date: {row['Date']}", axis=1)
documents = [Document(page_content=text) for text in df["text"].tolist()]


In [3]:
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = splitter.split_documents(documents)

embeddings = OllamaEmbeddings(model="gemma:2b")  # light model
vectorstore = FAISS.from_documents(texts, embeddings)


In [4]:
retriever = vectorstore.as_retriever()
llm = Ollama(model="gemma2:2b")  # smaller, stable model



In [None]:
print("Answer:", result["result"])

In [4]:
from langchain.llms import Ollama
llm_obj = Ollama(model='gemma:2b')
response = llm_obj.invoke('What is embedding?')
print(response)
# OllamaEndpointNotFoundError: Ollama call failed with status code 404. Maybe your model 
# is not found and you should pull the model with `ollama pull gemma:2b`.

  llm_obj = Ollama(model='gemma:2b')


OllamaEndpointNotFoundError: Ollama call failed with status code 404. Maybe your model is not found and you should pull the model with `ollama pull gemma:2b`.