In [None]:
!pip install openai llama_index llama-index-postprocessor-longllmlingua llmlingua

Collecting openai
  Downloading openai-1.41.0-py3-none-any.whl.metadata (22 kB)
Collecting llama_index
  Downloading llama_index-0.10.65-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-postprocessor-longllmlingua
  Downloading llama_index_postprocessor_longllmlingua-0.1.2-py3-none-any.whl.metadata (684 bytes)
Collecting llmlingua
  Downloading llmlingua-0.2.2-py3-none-any.whl.metadata (17 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama_index)
  Downloading llama_index_agent_openai-0.2.9-py3-none-any.whl.metadata (729 bytes)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama_index)
  Downloading llama_index_cli-0.1.13-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.11.0,>=0.10.65 (from llama_index)

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.postprocessor import LLMRerank, LongContextReorder
from llama_index.postprocessor.longllmlingua import LongLLMLinguaPostprocessor
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core import Settings, Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline

import getpass
import os
from IPython.display import Markdown, display

# Запрос API ключа OpenAI
os.environ["OPENAI_API_KEY"] = getpass.getpass("Введите OpenAI API Key:")

# Настройка глобальных параметров
Settings.llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.1, request_timeout=1000, max_retries=3)
Settings.chunk_size = 512


Введите OpenAI API Key:··········


In [None]:
documents = SimpleDirectoryReader("./data").load_data()

# Создание индекса
index = VectorStoreIndex.from_documents(documents)


In [None]:
# Движок запросов с ранжированием LLM
query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[
        LLMRerank(
            choice_batch_size=5,
            top_n=2,
        )
    ],
)

# Постобработка LongLLMLingua
lingua = LongLLMLinguaPostprocessor(
    instruction_str="Given the context, please answer the final question",
    target_token=300,
    rank_method="longllmlingua",
)
query_engine_lingua = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[lingua],
)

# Постобработка LongContextReorder
reorder = LongContextReorder()
reorder_engine = index.as_query_engine(
    node_postprocessors=[reorder], similarity_top_k=10
)

# Преобразование запросов с использованием HyDE
hyde = HyDEQueryTransform(include_original=True)
hyde_query_engine = TransformQueryEngine(query_engine, hyde)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [None]:
query_str = "what is flask?"

# Выполнение запроса с ранжированием LLM
response_rerank = query_engine.query(query_str)
display(Markdown(f"<b>LLMRerank Response:</b> {response_rerank}"))

# Выполнение запроса с LongLLMLingua постобработкой
response_lingua = query_engine_lingua.query(query_str)
display(Markdown(f"<b>LongLLMLingua Response:</b> {response_lingua}"))

# Выполнение запроса с LongContextReorder постобработкой
response_reorder = reorder_engine.query(query_str)
display(Markdown(f"<b>LongContextReorder Response:</b> {response_reorder}"))

# Выполнение запроса с HyDE
response_hyde = hyde_query_engine.query(query_str)
display(Markdown(f"<b>HyDE Response:</b> {response_hyde}"))


<b>LLMRerank Response:</b> Flask is a web framework written in Python that can be classified as a micro framework. It does not require any specific tools or libraries and has an inbuilt database. Flask does not have a database abstraction layer but supports extensions that can add various application features. These extensions include object-relational mappers, form validation, upload handling, open authentication technologies, and other common framework-related tools.

<b>LongLLMLingua Response:</b> Flask is a Python web framework that is used in the college management system described in the context.

<b>LongContextReorder Response:</b> Flask is a web framework written in Python that is classified as a micro framework. It does not require any specific tools or libraries and comes with an inbuilt database. Flask supports extensions that can add various application features such as object-relational mappers, form validation, upload handling, and authentication technologies. It is known for its flexibility and ease of deployment in production environments.

<b>HyDE Response:</b> Flask is a web framework written in Python that can be classified as a micro framework. It does not require any specific tools or libraries and has an inbuilt database. Flask does not have a database abstraction layer but supports extensions that can add various application features. These extensions include object-relational mappers, form validation, upload handling, open authentication technologies, and other common framework-related tools.

In [None]:
import logging
import sys

# Настройка логгера
logger = logging.getLogger("llama_index")
logger.setLevel(logging.INFO)  # Установка уровня логирования

# Добавление StreamHandler для вывода в stdout
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)

# Форматирование логов
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# Добавляем handler к логгеру
logger.addHandler(handler)

def trace_query_engine(engine, query):
    logger.info(f"Query: {query}")
    response = engine.query(query)
    logger.info(f"Response: {response}")
    return response

# Пример трассировки для движка с LongLLMLingua
response_lingua_traced = trace_query_engine(query_engine_lingua, query_str)


2024-08-19 16:20:42,073 - llama_index - INFO - Query: what is flask?


INFO:llama_index:Query: what is flask?


2024-08-19 16:20:51,662 - llama_index - INFO - Response: Flask is a Python framework used for developing web applications.


INFO:llama_index:Response: Flask is a Python framework used for developing web applications.


Эффективность постобработок:

LLMRerank улучшает релевантность ответов, отсекая менее подходящие варианты.

LongLLMLingua сжимает контекст, делая ответы более компактными и точными.

LongContextReorder улучшает структуру ответа, переформатируя контекст.

Анализ данных с трассировкой:
Трассировка показывает, какие данные отправляются в LLM, помогая выявлять и оптимизировать процесс генерации ответов.

Преимущества RAG-системы:
RAG-система эффективно сочетает поиск по тексту с генеративными возможностями LLM, повышая точность и релевантность ответов. Постобработки позволяют адаптировать систему под конкретные задачи.