In [1]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.readers.file import PagedCSVReader
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core.ingestion import IngestionPipeline
import faiss
import pandas as pd

https://www.kdnuggets.com/ollama-tutorial-running-llms-locally-made-super-simple

In [2]:
# Set up Ollama embedding and LLM
ollama_embedding = OllamaEmbedding(
    model_name="llama3",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0},
)
Settings.embed_model = ollama_embedding
Settings.llm = Ollama(model="llama3")

In [3]:
# Load the CSV file
file_path = 'data/customers-100.csv'
csv_reader = PagedCSVReader()
reader = SimpleDirectoryReader(
    input_files=[file_path],
    file_extractor={".csv": csv_reader}
)
docs = reader.load_data()

In [4]:
# Get the embedding dimension from the model
# Create a small test text to get the embedding dimension
test_text = "test"
test_embedding = ollama_embedding.get_text_embedding(test_text)
EMBED_DIMENSION = len(test_embedding)

In [5]:
# Create FAISS vector store with correct dimension
faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
vector_store = FaissVectorStore(faiss_index=faiss_index)

In [6]:
# Create and run the ingestion pipeline
pipeline = IngestionPipeline(
    vector_store=vector_store,
    documents=docs
)

nodes = pipeline.run()

In [7]:
# Create query engine
vector_store_index = VectorStoreIndex(nodes)
query_engine = vector_store_index.as_query_engine(similarity_top_k=2)

In [16]:
# Test query
response = query_engine.query("which company does roy berry work at?")
print(response.response)

The company that Roy Berry works at is not specified in the given context. The provided data only includes information about customers Clifford Jacobson (Simon LLC) and Vernon Kane (Carter-Strickland), but there is no mention of a customer named Roy Berry or their corresponding company. Therefore, it's impossible to determine which company Roy Berry works at based on the given context.


In [9]:
# Test query
response = query_engine.query("can you give me the customer id of roy berry?")
print(response.response)

6F94879bDAfE5a6


In [17]:
# Test query
response = query_engine.query("how many different customers are there in customers-100 data file?")
print(response.response)

There are 2 different customers.


In [14]:
# Test query
response = query_engine.query("what is customers-100?")
print(response.response)

The data file containing customer information.
