In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
import torch
from transformers import BitsAndBytesConfig
from llama_index.prompts import PromptTemplate
from llama_index.llms import HuggingFaceLLM
from llama_index import ServiceContext
from llama_index.response.notebook_utils import display_response

In [3]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [4]:
def messages_to_prompt(messages):
  prompt = ""
  for message in messages:
    if message.role == 'system':
      prompt += f"<|system|>\n{message.content}\n"
    elif message.role == 'user':
      prompt += f"<|user|>\n{message.content}\n"
    elif message.role == 'assistant':
      prompt += f"<|assistant|>\n{message.content}\n"
# ensure we start with a system prompt, insert blank if needed
  if not prompt.startswith("<|system|>\n"):
    prompt = "<|system|>\n\n" + prompt

  # add final assistant prompt
  prompt = prompt + "<|assistant|>\n"

  return prompt

In [5]:
llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-7b-chat-hf",
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    query_wrapper_prompt=PromptTemplate("<|system|>\n\n<|user|>\n{query_str}\n<|assistant|>\n"),
    context_window=3900,
    max_new_tokens=256,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    generate_kwargs={"temperature": 0.3, "top_k": 50, "top_p": 0.95},
    messages_to_prompt=messages_to_prompt,
    device_map="auto",

)

config.json: 100%|██████████| 614/614 [00:00<00:00, 3.16MB/s]
model.safetensors.index.json: 100%|██████████| 26.8k/26.8k [00:00<00:00, 54.8MB/s]
model-00001-of-00002.safetensors: 100%|██████████| 9.98G/9.98G [02:10<00:00, 76.5MB/s]
model-00002-of-00002.safetensors: 100%|██████████| 3.50G/3.50G [01:52<00:00, 31.1MB/s]
Downloading shards: 100%|██████████| 2/2 [04:03<00:00, 121.54s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]
generation_config.json: 100%|██████████| 188/188 [00:00<00:00, 884kB/s]
tokenizer_config.json: 100%|██████████| 1.62k/1.62k [00:00<00:00, 8.39MB/s]
tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 14.9MB/s]
tokenizer.json: 100%|██████████| 1.84M/1.84M [00:00<00:00, 23.7MB/s]
special_tokens_map.json: 100%|██████████| 414/414 [00:00<00:00, 2.70MB/s]


In [6]:
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")

config.json: 100%|██████████| 743/743 [00:00<00:00, 3.49MB/s]
model.safetensors: 100%|██████████| 133M/133M [00:04<00:00, 29.8MB/s] 
tokenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 1.83MB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 34.2MB/s]
tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 33.7MB/s]
special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 765kB/s]


In [53]:
# Load model directly
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("WhereIsAI/UAE-Large-V1")
model = AutoModel.from_pretrained("WhereIsAI/UAE-Large-V1")

In [57]:
import pinecone
from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores import PineconeVectorStore
from pinecone import Pinecone, PodSpec

# init pinecone
# pinecone.init(api_key="ad2c4c6e-833e-4ef5-b9b1-eeca2b976564", environment="gcp-starter")
pc = Pinecone(
        api_key="ad2c4c6e-833e-4ef5-b9b1-eeca2b976564",
        # environment="gcp-starter"
    )
# pinecone.create_index(
#     "quickstart", dimension=1024, metric="euclidean"
# )
try:
    pc.delete_index('my-index')
except:
    print("index_does not exist")
pc.create_index(
        name='my-index',
        dimension=1024,
        metric='euclidean',
        spec=PodSpec(
            replicas= 1, 
            shards= 1, 
            pod_type="p1",
            environment='gcp-starter'
            # cloud='aws',
            # region='us-west-2'
        )
        
)

pinecone_index = pc.Index("my-index")

# construct vector store and customize storage context
# storage_context = StorageContext.from_defaults(
#     vector_store=PineconeVectorStore(pinecone.Index("my_index"))
# )


In [58]:

vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index,
    add_sparse_vector=True,
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:WhereIsAI/UAE-Large-V1", chunk_size=2048)

# Load documents and build index
# documents = SimpleDirectoryReader(
#     "../../examples/data/paul_graham"
# ).load_data()

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, service_context=service_context
)


# index = VectorStoreIndex.from_documents(
#     documents, storage_context=storage_context
# )



Upserted vectors: 100%|██████████| 2048/2048 [00:15<00:00, 134.51it/s]
Upserted vectors: 100%|██████████| 2048/2048 [00:13<00:00, 148.49it/s]
Upserted vectors: 100%|██████████| 2048/2048 [00:14<00:00, 139.62it/s]
Upserted vectors: 100%|██████████| 2048/2048 [00:13<00:00, 146.94it/s]
Upserted vectors: 100%|██████████| 1406/1406 [00:09<00:00, 146.90it/s]


In [62]:
query_engine = index.as_query_engine()
response = query_engine.query("What are some of the features of Fabiana Filippi's shirts and blouses?")

In [65]:
response

Response(response='Fabiana Filippi\'s shirts and blouses have several features that make them versatile and stylish. Some of these features include:\n\n* Being the perfect base to create solid modern and refined looks\n* Having innate ability to be easily matched to different shapes, fabrics, and colors\n* Being "must-have" items in the wardrobe that confer impeccable elegance\n* Being made of high-quality fabrics, such as silk and cotton\n* Having classic, slim, and over cuts\n* Being available in different lengths up to maxi lines\n* Having a modern style that satisfies the needs of multiple daily commitments\n* Being the expression of relaxed, aware, and timeless refinement\n\nIn addition, the article also mentions that the new iteration of tankinis have a fresh feel to them, with bright colors and prints, and shorter lengths that make them more flattering.', source_nodes=[NodeWithScore(node=TextNode(id_='00a664d9-24cd-4a6f-9e0d-110b61b42eff', embedding=[-0.0143461982, -0.0385359786

In [None]:
## Then create prompt for LLM

In [59]:
questions[5]

"What are some of the features of Fabiana Filippi's shirts and blouses?"

In [64]:
contexts[5]

'The prices indicated in this catalogue do not include shipping costs.\nFabiana Filippi\'s shirts and blouses are the perfect base to create solid modern and refined looks thanks to their innate ability to be easily matched to different shapes, fabrics and colours. "Must-have" items in the wardrobe, always able to confer impeccable elegance. Silk and cotton shirts, carefully-selected precious fabrics are the protagonists of a timeless style and ultra-versatile use that satisfies the needs of multiple daily commitments. Elegant women’s shirts, interpreted with classic, slim and over cuts, in ultra-modern style. Short, long and ¾ sleeve blouses and shirts, available in different lengths up to maxi lines. Men’s cut, Korean, V, round necks are examples that underscore the versatility of the various models to wear during the day and at night, to feel the best yarns on the skin and embellish the look with surprising style details. The union between feminine essence and everyday versatility r

In [7]:
from datasets import load_dataset

dataset = load_dataset("neural-bridge/rag-dataset-12000")

Downloading readme: 100%|██████████| 5.18k/5.18k [00:00<00:00, 21.6MB/s]
Downloading data: 100%|██████████| 23.1M/23.1M [00:01<00:00, 12.3MB/s]
Downloading data: 100%|██████████| 5.79M/5.79M [00:00<00:00, 12.3MB/s]
Generating train split: 100%|██████████| 9600/9600 [00:00<00:00, 51358.43 examples/s]
Generating test split: 100%|██████████| 2400/2400 [00:00<00:00, 56947.53 examples/s]


In [10]:
hi = [('1', '2'), ('3', '4')]
for a, b in hi:
    print(a)
    print(b)

1
2
3
4


In [21]:
from llama_index import Document

text_list1 = ["a", "text1"]
text_list2 = ["b", "text2"]
documents = [Document(text=t, metadata={'hello': t2}) for t, t2 in zip(text_list1, text_list2)]

In [24]:
None in contexts

False

In [27]:
indices_to_pop = [i for i, value in enumerate(questions) if value is None]

In [28]:
indices_to_pop

[340, 2818]

In [29]:
indices_to_pop.sort(reverse=True)

# Pop elements
for index in indices_to_pop:
    questions.pop(index)
    contexts.pop(index)

In [30]:
from llama_index import Document

# questions = dataset['train']['question']
# contexts = dataset['train']['context']
documents = [Document(text=q, metadata={"context": c}) for q, c in zip(questions, contexts)]

In [None]:
document = Document(
    text="text",
    metadata={"filename": "<doc_file_name>", "category": "<category>"},
)

In [None]:
from llama_index.schema import TextNode

node1 = TextNode(text="<text_chunk>", id_="<node_id>")
node2 = TextNode(text="<text_chunk>", id_="<node_id>")
nodes = [node1, node2]
index = VectorStoreIndex(nodes)

In [None]:
# documents = SimpleDirectoryReader(
#     "../../examples/data/paul_graham"
# ).load_data()

vector_index = VectorStoreIndex.from_documents(documents, service_context=service_context)