In [None]:
!pip install beyondllm

In [None]:
!pip install llama-index-finetuning==0.1.5 llama-index-embeddings-huggingface==0.2.0

In [None]:
from beyondllm import source, retrieve, llms, generator
from beyondllm.embeddings import FineTuneEmbeddings
import os

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /usr/local/lib/python3.10/dist-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


> Load the data. Here lets use a PDF:

[The Last Question](https://docs.google.com/file/d/0ByoueGSWXluVVUtHYnRJVEg4YnM/edit?resourcekey=0-piVl8D4gdQhTyiaImqjduQ). Download it and upload on Colab or save within your project directory

In [None]:
path = "/content/The_Last_Question.pdf"

data = source.fit(path, dtype="pdf", chunk_size=1024, chunk_overlap=0)
list_of_files = [path]

> Define Open Source LLM

In [None]:
from beyondllm.llms import HuggingFaceHubModel

from getpass import getpass

HF_TOKEN = getpass("Access token:")

Access token:··········


In [None]:
llm = HuggingFaceHubModel(
    token = HF_TOKEN,
    model = "mistralai/Mistral-7B-Instruct-v0.2",
    model_kwargs = {"max_new_tokens": 1024,
                    "temperature": 0.1,
                    "top_p": 0.95,
                    "repetition_penalty": 1.1,
                    "return_full_text": False
                  }
)

> Fine tune the Embedding model

In [None]:
embed_model = FineTuneEmbeddings()

fine_tuned_model = embed_model.train(list_of_files, "BAAI/bge-small-en-v1.5", llm, "fintune")

Parsing nodes:   0%|          | 0/7 [00:00<?, ?it/s]

Parsed 7 nodes


Parsing nodes:   0%|          | 0/4 [00:00<?, ?it/s]

Parsed 4 nodes


100%|██████████| 7/7 [00:09<00:00,  1.31s/it]
100%|██████████| 4/4 [00:03<00:00,  1.06it/s]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2 [00:00<?, ?it/s]

> Build the retriever

In [None]:
# Option to load an already fine-tuned model
# embed_model = fine_tuned_model.load_model("fintune")

# Creating a retriever using the fine-tuned embeddings
retriever = retrieve.auto_retriever(data, fine_tuned_model, type="normal", top_k=4)

> Define generate pipeline

In [None]:
query = "what did Lupov reply to When the sun is done, the other stars will be gone, too."

In [None]:
prompt = f"<s>[INST] {query} [/INST]"

In [None]:
pipeline = generator.Generate(question=prompt,retriever=retriever,llm=llm)

In [None]:
print(pipeline.call())

 Lupov replied: "Darn right they will."

Context and chat history indicate that Lupov and Adell were discussing the fact that all stars, including the sun, will eventually run out of fuel and die. They were considering the implications of this fact, including the eventual end of all life in the universe due to the increase of entropy (the amount of disorder in the universe). Lupov acknowledged that other stars will also eventually die, agreeing with Adell's statement.
