In [10]:
from llama_index import SimpleDirectoryReader, ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.node_parser.text import SentenceSplitter
from llama_index.schema import TextNode
import chromadb
import os

In [11]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Text chunking configuration


In [12]:
chunk_size = 500  # Fixed chunk size in characters
text_splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=0)

In [13]:
chroma_db_path = "./chroma_db"
chroma_db = chromadb.PersistentClient(path=chroma_db_path)
chroma_collection = chroma_db.get_or_create_collection("aws_documentation")

In [14]:
#for ingestion pipeline we can set up a s3 bucket. user inputs their files -> sent to s3 bucket. We retrieve file from s3 and put it through the pipeline.
documents = (
    SimpleDirectoryReader(input_dir="C:/Users/Adel/Desktop/aws-documentation-main/aws-documentation-main/documents/alexa-for-business-administration-guide/doc_source", recursive=False, required_exts=[".md" , ".pdf"]).load_data())


In [16]:
text_chunks = []
for doc in documents:
    cur_text_chunks = text_splitter.split_text(doc.text)
    text_chunks.extend(cur_text_chunks)

In [22]:
embeddings = []
metadatas = []
documents = []
ids = []
for i, chunk in enumerate(text_chunks):
    node = TextNode(text=chunk)
    node_embedding = embed_model.get_text_embedding(node.get_content())
    embeddings.append(node_embedding)
    if not node.metadata:
        node.metadata = {"AWS Documentation" : chunk}  # Set a default metadata
        metadatas.append(node.metadata)
    documents.append(chunk)
    ids.append(str(i)) 

In [23]:
chroma_collection.add(
    embeddings=embeddings,
    metadatas=metadatas,
    documents=documents,
    ids=ids
)

Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

In [34]:
query_str = "Can you tell me about alexa for business"


In [35]:
from llama_index.llms import LlamaCPP

model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf" #using 13b for the time being until i can confirm that everything is working 100%
##you can change to 70b by just replacing the linkg

llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    verbose=True,
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 
Model metadata: {'general.name': 'LLaMA v2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'llama.rope.dimension_count': '128', 'llama.embedding_length': '5120', 'llama.block_count': '40', 'llama.feed_forward_length': '13824', 'llama.attention.head_count': '40', 'tokenizer.ggml.eos_token_id': '2', 'general.file_type': '2', 'llama.attention.head_count_kv': '40', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'tokenizer.ggml.model': 'llama', 'general.quantization_version': '2', 'tokenizer.ggml.bos_token_id': '1', 'tokenizer.ggml.unknown_token_id': '0'}


In [36]:
query_embedding = embed_model.get_query_embedding(query_str)

In [37]:
from llama_index.vector_stores import VectorStoreQuery

query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"

vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)

In [41]:
query_result = chroma_collection.query(vector_store_query)
print(query_result.nodes[0].get_content())

ValueError: Expected embeddings to be a list, got VectorStoreQuery(query_embedding=[0.022433791309595108, -0.09785578399896622, -0.011647995561361313, 0.006709470879286528, 0.011408106423914433, -0.013800844550132751, 0.04528016597032547, 0.06373386830091476, -0.02525698021054268, 0.0414503738284111, 0.009026887826621532, 0.07963839918375015, -0.014284857548773289, -0.02764747478067875, 0.02016776241362095, 0.03039521723985672, 0.06011497229337692, -0.10150458663702011, -0.0508599616587162, 0.004058341030031443, 0.07232792675495148, 0.023175304755568504, -0.09448248893022537, 0.016420023515820503, -0.05738189443945885, -0.030371155589818954, 0.009154521860182285, -0.02135523036122322, -0.057115212082862854, -0.08295891433954239, 0.019572829827666283, -0.037262093275785446, 0.06178784370422363, 0.08227599412202835, 0.05767480656504631, 0.06485921144485474, 0.020707594230771065, 0.02573552541434765, 0.03777938708662987, 0.026480207219719887, 0.006713313050568104, -0.03499256446957588, -0.0013600416714325547, -0.012239889241755009, 0.007091591600328684, -0.046415187418460846, 0.0643308237195015, 0.018390053883194923, -0.09910266846418381, -0.029807452112436295, -0.022079257294535637, -0.06048470363020897, -0.03796931728720665, -0.030020812526345253, -0.025493141263723373, 0.04054267331957817, 0.04166460037231445, 0.09124202281236649, 0.0531955249607563, 0.005770345218479633, 0.047410257160663605, -0.020604897290468216, -0.16659682989120483, 0.11953552067279816, -0.08995447307825089, 0.020028438419103622, -0.06383820623159409, -0.0829775482416153, -0.010319167748093605, 0.012220848351716995, -0.011628995649516582, -0.05131375044584274, -0.02564379759132862, 0.024464773014187813, 0.03586006537079811, 0.021184979006648064, 0.04146875441074371, 0.01966315694153309, -0.018113011494278908, 0.025765396654605865, -0.01849163882434368, -0.0013054091250523925, -0.05043789744377136, -0.0024472586810588837, -0.0392380952835083, -0.022747186943888664, -0.008403707295656204, 0.026766866445541382, 0.025297511368989944, -0.030369754880666733, -0.03243283927440643, 0.04089253023266792, -0.024797698482871056, -0.015898369252681732, -0.035098738968372345, -0.05960705876350403, -0.02203255146741867, -0.011342865414917469, -0.07040940225124359, 0.36592596769332886, 0.02786353975534439, -0.024355709552764893, 0.016358081251382828, -0.10553897172212601, -0.006396922282874584, -0.03889603540301323, -0.015945924445986748, -0.004454701207578182, 0.011249895207583904, 0.06747816503047943, 0.02506866678595543, 0.017980867996811867, 0.017718123272061348, -0.03134008124470711, -0.0042113340459764, 0.038551539182662964, -0.023517761379480362, 0.02712131477892399, 0.04675894230604172, -0.00026887949206866324, -0.006274671293795109, 0.01863894984126091, 0.04541603475809097, 0.044797156006097794, 0.0006671423325315118, -0.04074341803789139, 0.09935265779495239, 0.04151897877454758, -0.024970855563879013, 0.034027501940727234, 0.023354629054665565, -0.036111317574977875, -0.03588811680674553, 0.019871700555086136, 0.07213141769170761, 0.023658467456698418, -0.03750062733888626, 0.0120955565944314, 0.06923491507768631, 0.10156485438346863, -0.009526669979095459, -0.004403043072670698, 0.02904336154460907, -0.013108516111969948, -0.011407683603465557, 0.049877021461725235, -0.005831009708344936, -4.448149411473423e-05, -0.009419643320143223, 0.04169754683971405, -0.027434803545475006, 0.05051903799176216, 0.0248631052672863, -0.07168268412351608, 0.054642047733068466, 0.025281468406319618, 0.05840597301721573, -9.544625208945945e-05, -0.020953364670276642, -0.043521955609321594, -0.04350104182958603, -0.05123862624168396, 0.010753199458122253, 0.051363881677389145, -0.044097770005464554, -0.1720910370349884, -0.0393562987446785, -0.015030371956527233, -0.0609569288790226, -0.0018626641249284148, -0.013878093101084232, -0.030657364055514336, -0.024010075256228447, 0.04619278386235237, 0.021528365090489388, -0.03936125710606575, -0.04140930995345116, -0.0029200157150626183, 0.007711682002991438, -0.008138624019920826, -0.02349465899169445, -0.03150226175785065, 0.03156750649213791, -0.018602965399622917, -0.011663773097097874, -0.05088019371032715, 0.027936536818742752, -0.005960078444331884, 0.005576122552156448, -0.01645006239414215, -0.048868972808122635, 0.013832860626280308, -0.03239203616976738, 0.06104668602347374, 0.01849967986345291, -0.006951592862606049, 0.009662740863859653, 0.012235927395522594, -0.027836613357067108, -0.013054865412414074, -0.06644672155380249, 0.06380406022071838, -0.04535575956106186, 0.03981930390000343, 0.024619847536087036, 0.014093460515141487, 0.026986781507730484, 0.02603948675096035, 0.07245494425296783, -0.06107880920171738, -0.039692532271146774, 0.014600586146116257, -0.0030056890100240707, -0.032304421067237854, -0.053139325231313705, 0.03756565973162651, -0.003833919297903776, 0.07285767793655396, -0.02912561595439911, 0.008191032335162163, -0.01720835641026497, 0.02436177246272564, -0.06177137792110443, -0.26731184124946594, 0.0047192699275910854, -0.02585742436349392, 0.06435469537973404, -0.027667243033647537, -0.062989741563797, 0.03377450630068779, 0.005012255162000656, 0.02529059164226055, 0.07953517884016037, 0.06254047900438309, -0.024674929678440094, 0.010416599921882153, -0.015939252451062202, -0.002732065273448825, 0.06301581114530563, -0.006761473603546619, -0.003098508110269904, -0.014440787956118584, 0.01372620090842247, 0.05497254803776741, -0.014277317561209202, -0.012842793948948383, -0.06862307339906693, 0.03597058355808258, -0.016708500683307648, 0.10460367798805237, -0.09790302067995071, 0.033603742718696594, -0.01490076445043087, 0.0617438368499279, -0.025649193674325943, -0.07157918065786362, -0.12049221247434616, 0.04861922189593315, 0.002441797638311982, 0.04283028841018677, -0.03766424208879471, -0.031070547178387642, -0.001311064581386745, -0.11738420277833939, 0.012454807758331299, -0.07088644802570343, -0.006829085294157267, -5.105545824335422e-06, 0.003293014131486416, 0.030182750895619392, 0.010856378823518753, -0.03883705660700798, 0.07239911705255508, -0.01956578530371189, 0.04193070903420448, 0.0010250763734802604, 0.035938795655965805, -0.014671911485493183, -0.03888959810137749, -0.015049786306917667, 0.01584896259009838, -0.04661964252591133, 0.00010596050560707226, -0.0024831390473991632, -0.011250073090195656, 0.004468638449907303, 0.019607581198215485, 0.03911062330007553, -0.008382024243474007, -0.0296130683273077, 0.03159675374627113, 0.011533263139426708, -0.040991418063640594, 0.013589788228273392, 0.09252112358808517, -0.00382716185413301, 0.06819342076778412, 0.0590517483651638, -0.008249781094491482, 0.05701811984181404, -0.0435420423746109, -0.0066399588249623775, -0.0010389417875558138, 0.0291204284876585, 0.0032985536381602287, 0.04556667059659958, 0.023221788927912712, 0.0009170342236757278, 0.02976597286760807, 0.09762716293334961, -0.0797174796462059, -0.008480783551931381, -0.014664300717413425, -0.048933010548353195, -0.0157981775701046, -0.01508296187967062, -0.05685306340456009, 0.0714716911315918, -0.05060490965843201, -0.2301204800605774, 0.014342772774398327, -0.03348624333739281, 0.021723903715610504, -0.02527402713894844, 0.028880111873149872, -0.039282117038965225, -0.06487762182950974, -0.02150079421699047, 0.018861128017306328, 0.028108689934015274, -0.02225518226623535, 0.0034982834476977587, -0.01205652765929699, 0.11412912607192993, 0.030816923826932907, 0.07538995146751404, 0.027415230870246887, -0.014033844694495201, -0.003078683279454708, 0.02247648313641548, -0.002690807217732072, 0.12705068290233612, 0.06385978311300278, -0.05376167967915535, -0.01374832820147276, -0.0097331702709198, 0.00827879086136818, 0.05749678239226341, -0.041085947304964066, -0.00041676388354972005, -0.017810413613915443, 0.10181025415658951, -0.03653325140476227, 0.006486999336630106, -0.10865379124879837, -0.03461664542555809, 0.046486157923936844, 0.04531239718198776, -0.02683793008327484, 0.030239183455705643, -0.025950780138373375, -0.006100566126406193, -0.007215396501123905, 0.04836620017886162, 0.0348120741546154, -0.03611009567975998, -0.035096425563097, -0.0020851169247180223, 0.030994225293397903, 0.00494561530649662, -0.05890528857707977, 0.03354892134666443, 0.030263541266322136, -0.0057157124392688274, 0.055941250175237656, -0.009317860007286072, -0.056796155869960785, -0.00624654209241271, 0.009344944730401039, 0.01679193042218685, 0.028785984963178635, 0.025639839470386505, 0.061243053525686264, 0.03044206276535988], similarity_top_k=2, doc_ids=None, node_ids=None, query_str=None, output_fields=None, embedding_field=None, mode='default', alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)

In [40]:
from llama_index.schema import NodeWithScore
from typing import Optional

nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
    score: Optional[float] = None
    if query_result.similarities is not None:
        score = query_result.similarities[index]
    nodes_with_scores.append(NodeWithScore(node=node, score=score))

NameError: name 'query_result' is not defined

ValueError: 
******
Could not load OpenAI model. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

To disable the LLM entirely, set llm=None.
******