# Initial Setup and define Environment Variables

In [2]:
# initial setup

import os
import sys
from getpass import getpass
import nest_asyncio
from IPython.display import Markdown, display

nest_asyncio.apply()


In [3]:
# Define environment variables
NOTION_TOKEN = os.getenv("NOTION_TOKEN")
DATABASE_ID = os.getenv("NOTION_DATABASE_ID")

CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

QDRANT_URL = os.environ['QDRANT_URL']
QDRANT_API_KEY = os.environ['QDRANT_API_KEY']

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']


## Set up observability with Llamatrace

In [None]:
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
    OTLPSpanExporter as HTTPSpanExporter,
)
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor


# Add Phoenix API Key for tracing
# PHOENIX_API_KEY = ""
# os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"

# Add Phoenix
span_phoenix_processor = SimpleSpanProcessor(
    HTTPSpanExporter(endpoint="https://app.phoenix.arize.com/v1/traces")
)

# Add them to the tracer
tracer_provider = trace_sdk.TracerProvider()
tracer_provider.add_span_processor(span_processor=span_phoenix_processor)

# Instrument the application
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

# Set Up the Notion Processor

In [4]:
import os
import json
from typing import Dict, List, Optional, Tuple
from notion_client import Client
import pandas as pd
from datetime import datetime

class NotionProcessor:
    def __init__(self, auth_token: str):
        """Initialize the Notion client with authentication token."""
        self.notion = Client(auth=auth_token)
    
    def get_database_pages(self, database_id: str, parent_properties: Dict = None) -> List[Dict]:
        """
        Retrieve all pages from a Notion database.
        If parent_properties provided, merge them with each page's properties.
        """
        pages = []
        cursor = None
        
        while True:
            response = self.notion.databases.query(
                database_id=database_id,
                start_cursor=cursor
            )
            
            # If parent properties exist, merge them with each page
            if parent_properties:
                for page in response['results']:
                    self._merge_parent_properties(page, parent_properties)
            
            pages.extend(response['results'])
            
            if not response.get('has_more'):
                break
                
            cursor = response['next_cursor']
            
        return pages
    
    def _merge_parent_properties(self, page: Dict, parent_properties: Dict):
        """
        Merge parent properties into page properties with specific handling for Name, Description, and Tags.
        """
        for key, value in parent_properties.items():
            if key == 'Name':
                # Handle name merging only if child page has a name
                if 'Name' in page['properties']:
                    child_name = self._get_rich_text_content(page['properties']['Name'].get('title', []))
                    if child_name:
                        merged_name = f"{value} - {child_name}"
                        page['properties']['Name'] = {
                            'type': 'title',
                            'title': [{
                                'type': 'text',
                                'text': {'content': merged_name},
                                'plain_text': merged_name
                            }]
                        }
            elif key == 'Description':
                # Skip Description property - keep child's description if it exists
                continue
            elif key == 'Tags':
                # Merge tags, removing duplicates
                parent_tags = set(value.split(', ')) if value else set()
                if 'Tags' in page['properties']:
                    child_tags = set(tag['name'] for tag in page['properties']['Tags'].get('multi_select', []))
                    merged_tags = parent_tags.union(child_tags)
                    page['properties']['Tags'] = {
                        'type': 'multi_select',
                        'multi_select': [{'name': tag} for tag in sorted(merged_tags)]
                    }
            else:
                # For all other properties, inherit from parent if not present in child
                if key not in page['properties']:
                    page['properties'][key] = {
                        'type': 'rich_text',
                        'rich_text': [{
                            'type': 'text',
                            'text': {'content': str(value)},
                            'plain_text': str(value)
                        }]
                    }
    
    
    def extract_properties(self, page: Dict) -> Dict:
        """Extract properties from a page."""
        properties = {}
        
        for prop_name, prop_data in page['properties'].items():
            prop_type = prop_data['type']
            
            if prop_type == 'title':
                properties[prop_name] = self._get_rich_text_content(prop_data['title'])
            elif prop_type == 'rich_text':
                properties[prop_name] = self._get_rich_text_content(prop_data['rich_text'])
            elif prop_type == 'select':
                if prop_data['select']:
                    properties[prop_name] = prop_data['select']['name']
            elif prop_type == 'multi_select':
                # Convert multi-select to comma-separated string
                properties[prop_name] = ', '.join(sorted(item['name'] for item in prop_data['multi_select']))
            elif prop_type == 'date':
                if prop_data['date']:
                    properties[prop_name] = prop_data['date']['start']
            elif prop_type in ['number', 'checkbox']:
                properties[prop_name] = prop_data[prop_type]
                
        return properties
    
    def _normalize_text(self, text: str) -> str:
        """
        Normalize text content by:
        1. Replacing multiple spaces with single space
        2. Removing spaces before newlines
        3. Removing spaces after newlines
        4. Removing empty lines
        """
        # Replace multiple spaces with single space
        text = ' '.join(text.split())
        
        # Remove spaces before colons
        text = text.replace(' :', ':')
        
        # Split into lines and process each line
        lines = text.split('\n')
        cleaned_lines = []
        
        for line in lines:
            # Clean each line individually
            cleaned_line = line.strip()
            if cleaned_line:  # Only keep non-empty lines
                cleaned_lines.append(cleaned_line)
        
        # Join lines back together
        return '\n'.join(cleaned_lines)
    
    def _get_rich_text_content(self, rich_text: List) -> str:
        """Extract text content from rich text array and normalize it."""
        text = ' '.join([text['plain_text'] for text in rich_text if text.get('plain_text')])
        return self._normalize_text(text)
    
    def get_block_children(self, block_id: str, level: int = 0) -> List[Tuple[Dict, int]]:
        """Retrieve all child blocks of a given block with their nesting level."""
        blocks = []
        cursor = None
        
        while True:
            response = self.notion.blocks.children.list(
                block_id=block_id,
                start_cursor=cursor
            )
            
            for block in response['results']:
                blocks.append((block, level))
                
                if block.get('has_children'):
                    if block['type'] != 'child_database':
                        child_blocks = self.get_block_children(block['id'], level + 1)
                        blocks.extend(child_blocks)
            
            if not response.get('has_more'):
                break
                
            cursor = response['next_cursor']
            
        return blocks
    
    def process_blocks(self, blocks: List[Tuple[Dict, int]]) -> Tuple[Dict, List[str]]:
        """
        Process blocks to extract headers and content.
        Sub-headers are treated as text content with line breaks.
        """
        current_header = None
        current_content = []
        headers = {}
        content_sections = []
        current_bullet_group = []
        
        def save_current_section():
            """Helper function to save current section's content."""
            nonlocal current_content, current_bullet_group, content_sections, current_header, headers
            
            if current_bullet_group:
                current_content.append(self._merge_bullet_group(current_bullet_group))
                current_bullet_group = []
            
            if current_header is not None and current_content:
                # Join content and normalize the entire section
                section_content = self._normalize_text('\n'.join(filter(None, current_content)))
                content_sections.append(section_content)
                headers[current_header] = len(content_sections) - 1
        
        for block, level in blocks:
            block_type = block['type']
            
            # Handle headers
            if block_type.startswith('heading_'):
                header_text = self._get_rich_text_content(block[block_type]['rich_text'])
                header_level = int(block_type[-1])
                
                if header_level == 1:
                    # Save current section before starting new one
                    save_current_section()
                    current_content = []
                    current_header = header_text
                else:
                    # Treat sub-headers as text content with line break
                    if current_bullet_group:
                        current_content.append(self._merge_bullet_group(current_bullet_group))
                        current_bullet_group = []
                    current_content.append(f"{header_text}\n")
            
            # Handle child database
            elif block_type == 'child_database':
                if current_bullet_group:
                    current_content.append(self._merge_bullet_group(current_bullet_group))
                    current_bullet_group = []
                current_content.append(f"[Database: {block['id']}]")
            
            # Handle bullet points and numbered lists
            elif block_type in ['bulleted_list_item', 'numbered_list_item']:
                text_content = self._get_rich_text_content(block[block_type]['rich_text'])
                
                if level == 0:
                    if current_bullet_group:
                        current_content.append(self._merge_bullet_group(current_bullet_group))
                        current_bullet_group = []
                    current_bullet_group = [(text_content, level)]
                else:
                    current_bullet_group.append((text_content, level))
            
            # Handle regular paragraphs
            elif block_type == 'paragraph':
                if current_bullet_group:
                    current_content.append(self._merge_bullet_group(current_bullet_group))
                    current_bullet_group = []
                
                text_content = self._get_rich_text_content(block[block_type]['rich_text'])
                if text_content:
                    current_content.append(text_content)
        
        # Save final section
        save_current_section()
        
        return headers, content_sections
    
    def _merge_bullet_group(self, bullet_group: List[Tuple[str, int]]) -> str:
        """Merge a group of bullets into a single line, with sub-bullets inline."""
        if not bullet_group:
            return ""
        
        result = []
        current_main_bullet = []
        
        for text, level in bullet_group:
            if level == 0:
                if current_main_bullet:
                    result.append(self._normalize_text(' '.join(current_main_bullet)))
                current_main_bullet = [text]
            else:
                current_main_bullet.append(text)
        
        if current_main_bullet:
            result.append(self._normalize_text(' '.join(current_main_bullet)))
        
        return '\n'.join(result)
    
    def process_page(self, page: Dict, parent_properties: Dict = None) -> List[Dict]:
        """Process a single page and its nested databases."""
        results = []
        
        # Extract properties before any merging
        properties = self.extract_properties(page)
        
        # Merge with parent properties if they exist
        if parent_properties:
            # Skip special properties handling here as it's done in _merge_parent_properties
            # Only handle properties that weren't merged during database query
            for key, value in parent_properties.items():
                if key not in ['Name', 'Description', 'Tags'] and key not in properties:
                    properties[key] = value
        
        # Process page blocks
        blocks = self.get_block_children(page['id'])
        headers, content_sections = self.process_blocks(blocks)
        
        # Create entries for each section
        for header, section_index in headers.items():
            section_properties = properties.copy()
            section_properties['header'] = header
            
            if 0 <= section_index < len(content_sections):
                results.append({
                    'properties': section_properties,
                    'content': content_sections[section_index]
                })
        
        # Process child databases
        for block, _ in blocks:
            if block['type'] == 'child_database':
                child_pages = self.get_database_pages(block['id'], properties)
                for child_page in child_pages:
                    results.extend(self.process_page(child_page, properties))
        
        return results

    def process_database(self, database_id: str) -> List[Dict]:
        """Process entire database and return structured data."""
        processed_data = []
        pages = self.get_database_pages(database_id)
        
        for page in pages:
            processed_data.extend(self.process_page(page))
            
        return processed_data

# Set up Custom Retreival


In [None]:
# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever,
)

from typing import List

class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both semantic search and hybrid search."""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        keyword_retriever: KeywordTableSimpleRetriever,
        mode: str = "AND",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._keyword_retriever = keyword_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        keyword_nodes = self._keyword_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        keyword_ids = {n.node.node_id for n in keyword_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in keyword_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(keyword_ids)
        else:
            retrieve_ids = vector_ids.union(keyword_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

# Data Pipeline Execution

## Set up the LLM, Embeddings and Vector Store

In [5]:
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.cohere import CohereEmbedding
from qdrant_client import QdrantClient, AsyncQdrantClient
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.qdrant import QdrantVectorStore

from llama_index.core.settings import Settings


COLLECTION_NAME = "Notion_vector_store"

# Set up the LLM
llm_openai = OpenAI(
    model="gpt-4o-mini", 
    temperature=0.5,
    api_key=OPENAI_API_KEY,,
    )

Settings.llm = llm_openai

# Set up the Embeddings
embed_model_openai = OpenAIEmbedding(
    model="text-embedding-3-large", 
    api_key=OPENAI_API_KEY
    )

Settings.embed_model = embed_model_openai

# set up the vector store client
client = QdrantClient(
    location=QDRANT_URL, 
    api_key=QDRANT_API_KEY
    )
# set up the async client
aclient = AsyncQdrantClient(
    location=QDRANT_URL,
    api_key=QDRANT_API_KEY
    )
# set up the vector store
vector_store = QdrantVectorStore(
    client=client, 
    aclient=aclient, 
    collection_name=COLLECTION_NAME,
    )

  from .autonotebook import tqdm as notebook_tqdm
Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


## Extract the data from the Notion database

In [5]:
from llama_index.core import Document

processor = NotionProcessor(NOTION_TOKEN)
processed_data = processor.process_database(DATABASE_ID)

documents = [Document(text=record['content'], metadata=record['properties']) for record in processed_data]

## Ingest to Database

This is where we need to:
- Transform the data (Split large documents into chunks and produce Keywords that are also embedded)
- Store in the database

In [6]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import KeywordExtractor
from llama_index.core.ingestion import IngestionPipeline


# Define the sentence splitter
sentence_splitter = SentenceSplitter(
    chunk_size=512,
    chunk_overlap=16,
    paragraph_separator="\n\n\n",
)

#keyword_extractor = KeywordExtractor(keywords=5, llm=llm_openai)

# define the transform
transforms = [sentence_splitter, embed_model_openai]#, keyword_extractor]

# Ingest into Database
nodes = IngestionPipeline(
    transformations=transforms,
    vector_store=vector_store
    ).run(nodes=documents)


NameError: name 'documents' is not defined

## Build Index over Vector Database

In [7]:
from llama_index.core import StorageContext
from llama_index.core import SimpleKeywordTableIndex, VectorStoreIndex

storage_context = StorageContext.from_defaults(
    vector_store=vector_store
    )

# Create index from vector store with storage context
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context,  # Add storage context
    embed_model=embed_model_openai,
    show_progress=True  # Optional: helps track progress
)

# Set up the Retriever

In [26]:
from llama_index.core.node_parser import MetadataAwareTextSplitter
from llama_index.core.schema import MetadataMode
from typing import List, Dict
from collections import defaultdict

def organize_chunks_by_project(nodes):
    """
    Organizes retrieved chunks by project and heading, merging related content.
    """
    # Group nodes by project
    project_groups = defaultdict(lambda: defaultdict(list))
    
    for node in nodes:
        metadata = node.metadata
        project = metadata.get('project', 'Unknown Project')
        heading = metadata.get('heading', 'General')
        
        # Add to appropriate group
        project_groups[project][heading].append(node)
    
    # Merge nodes within each project+heading group
    merged_nodes = []
    for project, heading_groups in project_groups.items():
        for heading, nodes_group in heading_groups.items():
            # Combine text from all nodes in group
            combined_text = "\n".join([n.text for n in nodes_group])
            
            # Get project metadata from first node
            project_metadata = nodes_group[0].metadata
            project_desc = project_metadata.get('project_description', '')
            
            # Create enriched text with metadata header
            enriched_text = f"""Project: {project}
Project Description: {project_desc}
Section: {heading}
---
{combined_text}"""
            
            # Create new node with merged content
            merged_node = nodes_group[0].copy()
            merged_node.text = enriched_text
            merged_nodes.append(merged_node)
    
    return merged_nodes

# Create post-retrieval transformation function
def post_retrieval_transform(retrieval_output):
    nodes = [r.node for r in retrieval_output]
    organized_nodes = organize_chunks_by_project(nodes)
    
    # Reconstruct retrieval output with organized nodes
    transformed_output = []
    for i, node in enumerate(organized_nodes):
        # Preserve original retrieval output structure but with new nodes
        new_ret = retrieval_output[0].copy()
        new_ret.node = node
        new_ret.score = retrieval_output[i].score if i < len(retrieval_output) else 0.0
        transformed_output.append(new_ret)
    
    return transformed_output


## Set up Agent for Query Decomposition

In [None]:
from llama_index.core.vector_stores.types import VectorStoreInfo, MetadataInfo

vector_store_info = VectorStoreInfo(
    content_info="The transformation of a query into sub-queries",
    metadata_info=[
        MetadataInfo(
            name="query_rewriting",
            type="str",
            description="Useful for finding the right thinker's words to answer a question."
        ),
        MetadataInfo(
            name="query_decomposition",
            type="str",
            description="Use this when the user asks a question."
        ),
    ],
)

In [None]:
from typing import List, Tuple, Any
from pydantic import BaseModel, Field

class AutoRetrieveModel(BaseModel):
    query: str = Field(..., description="A question the user wants to advice about")
    filter_key_list: List[str] = Field(
        ..., description="List of metadata filter field names"
    )
    filter_value_list: List[str] = Field(
        ...,
        description=(
            "List of metadata filter field values (corresponding to names specified in filter_key_list)"
        )
    )


In [None]:
from llama_index.core.tools import FunctionTool

description = f"""\
Use this tool to answer the usery query by retrieving relevant information from the vector database.
The vector database schema is given below, which you should use to find the right information to answer the user's query.:
{vector_store_info.json()}
"""

auto_retrieve_tool = FunctionTool.from_defaults(
    fn=auto_retrieve_fn,
    name="words-of-senpai-info",
    description=description,
    fn_schema=AutoRetrieveModel
)

auto_retrieve_tool = FunctionTool.from_defaults(
    fn=auto_retrieve_fn,
    name="words-of-senpai-info",
    description=description,
    fn_schema=AutoRetrieveModel
)

In [28]:
# Accept a query fom the user
query = "Test query"
# Feed query into the LLM with a prompt to analyze the query and assess the decomposition method and number of queries to use

# Have it output in a structured format so this can be used in the selection process

In [27]:
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import QueryFusionRetriever

from llama_index.core import PromptTemplate
from prompts import QUERY_GEN_PROMPT

QUERY_GEN_PROMPT_TEMPLATE = PromptTemplate(QUERY_GEN_PROMPT)

vector_retriever = index.as_retriever(similarity_top_k=10)

bm25_retriever = BM25Retriever.from_defaults(nodes=index.vector_store.get_nodes(), similarity_top_k=10)

# Define your main query and the specific sub-queries manually
main_query = "Explain the impact of AI on healthcare"
manual_sub_queries = [
    "What are the benefits of AI in healthcare?",
    "What are the potential risks of AI in healthcare?",
    "How has AI improved patient outcomes in healthcare?",
    "What ethical concerns arise with AI in healthcare?",
]

retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=10,
    
    num_queries=3,  # Set to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    llm=llm_openai,
    query_gen_prompt=QUERY_GEN_PROMPT_TEMPLATE
)

TypeError: QueryFusionRetriever.__init__() got an unexpected keyword argument 'post_retrieval_transform'

In [None]:
ret = retriever.retrieve("What challenges were encountered and how were they overcome")
for r in ret:
    print(f"Score: {r.score}")
    print(f"Text: {r.node.text}")
    print("-" * 50)

In [21]:
retriever.transform_nodes = post_retrieval_transform


In [24]:
ret = retriever.retrieve("What challenges were encountered and how were they overcome")
for r in ret:
    print(f"Score: {r.score}")
    print(f"Text: {r.node.text}")
    print("-" * 50)

Generated queries:
1. What difficulties arose during the process, and what strategies were used to resolve them?
2. What hurdles were faced, and what approaches were taken to address these issues?
Score: 0.09475006400409626
Text: Data Workflow and Text Formatting: Challenge: Determining the workflow for getting data from the application's text editor to a Word document while maintaining the text format. This involved selecting a Node module for exporting the data from the Quill editor to the Python script and selecting the Python library to handle saving the data to a Word document. Solution: Tested several different modules and Python packages, logging their features and shortcomings, eventually settling on ones that best fit the needs of the project. Non-Intrusive User Reminders Challenge: Satisfying the requirement of reminding the user to log their work while avoiding disrupting them mid-thought Solution: Implemented a fade-in animation so the reminder window gently appears without

# Build the query pipeline

In [15]:
from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.query_pipeline import InputComponent
from llama_index.core.response_synthesizers import ResponseMode

input_component = InputComponent()

query_engine = RetrieverQueryEngine.from_args(
    retriever,
    response_mode=ResponseMode.COMPACT_ACCUMULATE,
    use_async = True,
    verbose=True
    )

chain = [input_component, query_engine]

pipeline = QueryPipeline(
    
    chain=chain,
    verbose=True
)

In [16]:
response = pipeline.run(input="What challenges were encountered and how were they overcome")

[1;3;38;2;155;135;227m> Running module d5314752-a2f8-4f5d-9ae7-e3fe787369a6 with input: 
input: What challenges were encountered and how were they overcome

[0m[1;3;38;2;155;135;227m> Running module 8b2cebc1-326f-4e94-9c47-de42e853c7f0 with input: 
input: What challenges were encountered and how were they overcome

[0mGenerated queries:
1. Challenges faced in project management and strategies to overcome them
2. Common obstacles in team collaboration and effective solutions


In [17]:
print(f"""
Response:
{'-' * 50}
{response}
{'-' * 50}
""")


Response:
--------------------------------------------------
Response 1: Several challenges were encountered across different projects, each addressed with specific solutions:

1. **Data Workflow and Text Formatting**: The challenge of maintaining text formatting when exporting data from a text editor to a Word document was overcome by testing various Node modules and Python packages, ultimately selecting the most suitable ones for the project.

2. **Non-Intrusive User Reminders**: To remind users to log their work without disrupting their focus, a fade-in animation for reminder windows was implemented, ensuring they appeared subtly in the bottom right corner while remaining visible.

3. **Conditional Application Runtime**: Ensuring the application only runs during user-defined working hours was addressed by implementing a settings feature that allows for flexible scheduling.

4. **Image Resizing and Indented Lists Formatting**: Issues with images losing their dimensions and nested bu

In [None]:
from llama_index.core import SimpleKeywordTableIndex, VectorStoreIndex

vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
keyword_index = SimpleKeywordTableIndex(vector_index.docstore, storage_context=storage_context) # double check the use of docstore here (before, was node)

## Example executions for the Vector Index and Keyword Index

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

# define custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=2)
keyword_retriever = KeywordTableSimpleRetriever(index=keyword_index)
custom_retriever = CustomRetriever(vector_retriever, keyword_retriever)

# define response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

# vector query engine
vector_query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
)
# keyword query engine
keyword_query_engine = RetrieverQueryEngine(
    retriever=keyword_retriever,
    response_synthesizer=response_synthesizer,
)