In [45]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, ServiceContext,set_global_service_context
from llama_index.core.node_parser import SentenceSplitter
import os
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

#### Creating documents by reading data

In [46]:
documents = SimpleDirectoryReader("docs",recursive = True, exclude_hidden = True).load_data()

In [47]:
documents, len(documents)

([Document(id_='1f72bf74-cac3-41d7-baa2-4a6064d2578c', embedding=None, metadata={'file_path': 'c:\\Users\\saras\\steffysara\\VS_Code_Projects\\Coversational_chatbot\\docs\\coffee_bean_menu.csv', 'file_name': 'coffee_bean_menu.csv', 'file_type': 'application/vnd.ms-excel', 'file_size': 1128, 'creation_date': '2024-08-09', 'last_modified_date': '2024-08-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Origin, Hacienda La Esmeralda, Panama, Yirgacheffe region, Ethiopia, Blue Mountains, Jamaica, Mandheling, Sumatra, Indonesia\nAltitude (meters), 1,500-2,000, 1,700-2,200, 910-1,700, 1,000-1,500\nFlavor Profile, Jasmine, bergamot, honey, peaches, berries, Floral, lemon, bergamot, jasmine, honey, Sweet herbs, nuts, chocolate, Dark chocolate, cedar, tropica

In [48]:
documents[0]

Document(id_='1f72bf74-cac3-41d7-baa2-4a6064d2578c', embedding=None, metadata={'file_path': 'c:\\Users\\saras\\steffysara\\VS_Code_Projects\\Coversational_chatbot\\docs\\coffee_bean_menu.csv', 'file_name': 'coffee_bean_menu.csv', 'file_type': 'application/vnd.ms-excel', 'file_size': 1128, 'creation_date': '2024-08-09', 'last_modified_date': '2024-08-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Origin, Hacienda La Esmeralda, Panama, Yirgacheffe region, Ethiopia, Blue Mountains, Jamaica, Mandheling, Sumatra, Indonesia\nAltitude (meters), 1,500-2,000, 1,700-2,200, 910-1,700, 1,000-1,500\nFlavor Profile, Jasmine, bergamot, honey, peaches, berries, Floral, lemon, bergamot, jasmine, honey, Sweet herbs, nuts, chocolate, Dark chocolate, cedar, tropical 

In [49]:
print(documents[0])

Doc ID: 1f72bf74-cac3-41d7-baa2-4a6064d2578c
Text: Origin, Hacienda La Esmeralda, Panama, Yirgacheffe region,
Ethiopia, Blue Mountains, Jamaica, Mandheling, Sumatra, Indonesia
Altitude (meters), 1,500-2,000, 1,700-2,200, 910-1,700, 1,000-1,500
Flavor Profile, Jasmine, bergamot, honey, peaches, berries, Floral,
lemon, bergamot, jasmine, honey, Sweet herbs, nuts, chocolate, Dark
chocolate, cedar, ...


In [50]:
print(documents[0].get_metadata_str())

file_path: c:\Users\saras\steffysara\VS_Code_Projects\Coversational_chatbot\docs\coffee_bean_menu.csv
file_name: coffee_bean_menu.csv
file_type: application/vnd.ms-excel
file_size: 1128
creation_date: 2024-08-09
last_modified_date: 2024-08-09


In [51]:
documents[0].__dict__

{'id_': '1f72bf74-cac3-41d7-baa2-4a6064d2578c',
 'embedding': None,
 'metadata': {'file_path': 'c:\\Users\\saras\\steffysara\\VS_Code_Projects\\Coversational_chatbot\\docs\\coffee_bean_menu.csv',
  'file_name': 'coffee_bean_menu.csv',
  'file_type': 'application/vnd.ms-excel',
  'file_size': 1128,
  'creation_date': '2024-08-09',
  'last_modified_date': '2024-08-09'},
 'excluded_embed_metadata_keys': ['file_name',
  'file_type',
  'file_size',
  'creation_date',
  'last_modified_date',
  'last_accessed_date'],
 'excluded_llm_metadata_keys': ['file_name',
  'file_type',
  'file_size',
  'creation_date',
  'last_modified_date',
  'last_accessed_date'],
 'relationships': {},
 'text': 'Origin, Hacienda La Esmeralda, Panama, Yirgacheffe region, Ethiopia, Blue Mountains, Jamaica, Mandheling, Sumatra, Indonesia\nAltitude (meters), 1,500-2,000, 1,700-2,200, 910-1,700, 1,000-1,500\nFlavor Profile, Jasmine, bergamot, honey, peaches, berries, Floral, lemon, bergamot, jasmine, honey, Sweet herbs, 

#### Creating nodes

In [69]:
node_parser = SentenceSplitter(chunk_size=500,chunk_overlap=0)
nodes = node_parser.get_nodes_from_documents(documents)

In [70]:
len(nodes)

7

In [71]:
print(nodes[0])

Node ID: 3a1bb1cb-e1b2-437c-9238-172d2d7c3038
Text: Origin, Hacienda La Esmeralda, Panama, Yirgacheffe region,
Ethiopia, Blue Mountains, Jamaica, Mandheling, Sumatra, Indonesia
Altitude (meters), 1,500-2,000, 1,700-2,200, 910-1,700, 1,000-1,500
Flavor Profile, Jasmine, bergamot, honey, peaches, berries, Floral,
lemon, bergamot, jasmine, honey, Sweet herbs, nuts, chocolate, Dark
chocolate, cedar, ...


##### Importing LLM

In [72]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3:8b")

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

##### Indexing

In [75]:
#service_context = ServiceContext.from_defaults(llm=llm, embed_model=Settings.embed_model)

# check if storage already exists
PERSIST_DIR = "./storage"

if not os.path.exists(PERSIST_DIR):
    # Pass the Mistral model (llm) to the VectorStoreIndex
    index = VectorStoreIndex(nodes, llm = llm)
    # Store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # Load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

#### Use Index as query engine

In [76]:
query_engine = index.as_query_engine()
response = query_engine.query("how is boo mountain coffee")
print(response)

ReadTimeout: timed out