In [15]:
import asyncio
import aiohttp
import os
from langchain import hub
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_fireworks import FireworksEmbeddings, ChatFireworks
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain.prompts import PromptTemplate
from langchain.schema import Document
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.tokenize import sent_tokenize
import re
import io
import time
import sys
import gradio as gr
import asyncio
from typing import List, Tuple, Any, Dict
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.schema import Document
import numpy as np
from functools import lru_cache
import faiss
import httpx
from urllib.parse import urlparse
from sentence_transformers import SentenceTransformer, CrossEncoder
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from flashrank import Ranker, RerankRequest
from pathlib import Path
import traceback
import random
from rank_bm25 import BM25Okapi
import json

In [16]:
# print(f"CUDA is available: {torch.cuda.is_available()}")

# Set up API clients
# os.environ['FIREWORKS_API_KEY'] = 'API'
# os.environ["SERPER_API_KEY"] = 'API' # my api
os.environ["SERPER_API_KEY"] = 'API'
os.environ["FIREWORKS_API_KEY"] = 'API'
os.environ["OPENAI_API_KEY"] = 'API'

# Initialize components
search = GoogleSerperAPIWrapper(k=3)
embeddings = FireworksEmbeddings(model="nomic-ai/nomic-embed-text-v1.5")
llm = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", temperature=0)
# llm = OpenAI()
llm_8b = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", temperature=0)
llm_70b = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-70b-instruct", temperature=0)

# Create a directory for caching in the user's home folder
cache_dir = Path.home() / ".flashrank_cache"
cache_dir.mkdir(parents=True, exist_ok=True)

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available. GPU will be used automatically by FlashRank.")
else:
    print("CUDA is not available. CPU will be used.")

# Initialize FlashRank rerankers
ranker_nano = Ranker(cache_dir=str(cache_dir))
ranker_small = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir=str(cache_dir))
ranker_medium_t5 = Ranker(model_name="rank-T5-flan", cache_dir=str(cache_dir))
ranker_medium_multilang = Ranker(model_name="ms-marco-MultiBERT-L-12", cache_dir=str(cache_dir))
ranker_large = Ranker(model_name="rank_zephyr_7b_v1_full", max_length=1024, cache_dir=str(cache_dir))

# Ensure models are on GPU if available
for ranker in [ranker_nano, ranker_small, ranker_medium_t5, ranker_medium_multilang, ranker_large]:
    if hasattr(ranker, 'model') and hasattr(ranker.model, 'to'):
        ranker.model.to('cuda' if torch.cuda.is_available() else 'cpu')

# Download NLTK data
# nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)

CUDA is available. GPU will be used automatically by FlashRank.


llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from /home/ubuntu/.flashrank_cache/rank_zephyr_7b_v1_full/rank_zephyr_7b_v1_full.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = hub
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   

True

In [17]:
class BM25Retriever:
    def __init__(self, documents: List[Document]):
        self.documents = documents
        self.tokenized_corpus = [doc.page_content.split() for doc in documents]
        self.bm25 = BM25Okapi(self.tokenized_corpus)
        
    def retrieve(self, query: str, extended_queries: List[str] = None, k: int = 100) -> List[Tuple[Document, float]]:
        if extended_queries:
            full_query = f"{query} {' '.join(extended_queries)}"
        else:
            full_query = query
        
        tokenized_query = full_query.split()
        doc_scores = self.bm25.get_scores(tokenized_query)
        
        # Normalize BM25 scores to 0-1 range
        max_score = max(doc_scores)
        min_score = min(doc_scores)
        score_range = max_score - min_score
        normalized_scores = [(score - min_score) / score_range for score in doc_scores] if score_range != 0 else [1.0 for _ in doc_scores]
        
        top_k_indices = sorted(range(len(normalized_scores)), key=lambda i: normalized_scores[i], reverse=True)[:k]
        return [(self.documents[i], normalized_scores[i]) for i in top_k_indices]

def combine_retrieval_methods(query: str, vectorstores: List[FAISS], bm25_retriever: BM25Retriever, 
                              hyde_embedding: List[float], num_docs: int, extended_queries: List[str] = None, 
                              alpha: float = 0.7) -> List[Document]:
    # Retrieve documents using vector search
    vector_docs = []
    for vectorstore in vectorstores:
        docs = vectorstore.similarity_search_by_vector(hyde_embedding, k=num_docs)
        vector_docs.extend(docs)
    
    # Retrieve documents using BM25
    bm25_docs = bm25_retriever.retrieve(query, extended_queries, k=num_docs)
    
    # Combine the results
    combined_docs = {}
    for doc in vector_docs:
        combined_docs[doc.page_content] = alpha * (1 - vector_docs.index(doc) / len(vector_docs))
    
    for doc, score in bm25_docs:
        if doc.page_content in combined_docs:
            combined_docs[doc.page_content] += (1 - alpha) * score
        else:
            combined_docs[doc.page_content] = (1 - alpha) * score
    
    # Sort the combined results
    sorted_docs = sorted(combined_docs.items(), key=lambda x: x[1], reverse=True)
    
    return [Document(page_content=content) for content, _ in sorted_docs[:num_docs]]

In [18]:
def count_tokens(text):
    return len(text.split())

def generate_iteratively(context, query, target_tokens, chosen_llm, max_attempts=5):
    current_response = ""
    attempts = 0
    conversation_history = [
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": f"""
        Use the following context to answer the question. Follow these steps:
        
        1. Generate a brief reasoning step.
        2. Provide a structured answer using bullet points or numbered lists and separate the contents.
        3. Aim for a comprehensive answer of approximately {target_tokens} words.
        4. If you cannot answer based on the context, say "I don't have enough information to answer that question."
        
        Context:
        {context}
        
        Question: {query}
        
        Reasoning:
        
        Answer:
        """}
    ]
    
    while count_tokens(current_response) < target_tokens * 0.9 and attempts < max_attempts:
        remaining_tokens = target_tokens - count_tokens(current_response)
        
        response = chosen_llm.invoke(conversation_history)
        
        next_chunk = response.content if hasattr(response, 'content') else str(response)
        
        current_response += next_chunk
        attempts += 1
        
        # Add the model's response to the conversation history
        conversation_history.append({"role": "assistant", "content": next_chunk})
        
        # Add a new user message asking to continue if needed
        if count_tokens(current_response) < target_tokens * 0.9:
            conversation_history.append({
                "role": "user", 
                "content": f"Please continue your answer. Aim for approximately {remaining_tokens} additional words."
            })
        
        if count_tokens(current_response) > target_tokens * 1.1:
            # If we've exceeded the upper bound, truncate
            words = current_response.split()
            truncated_response = ' '.join(words[:target_tokens])
            return truncated_response
    
    return current_response

In [None]:
async def scrape_webpage(client, url):
    try:
        response = await client.get(url, timeout=3.0)
        response.raise_for_status()
        text = response.text
        soup = BeautifulSoup(text, 'lxml')
        content = ' '.join(soup.stripped_strings)
        return content[:5000], len(content[:5000])
    except (httpx.RequestError, httpx.TimeoutException) as exc:
        print(f"An error occurred while requesting {url}: {exc}")
    except httpx.HTTPStatusError as exc:
        print(f"Error response {exc.response.status_code} while requesting {url}")
    except Exception as e:
        print(f"Error scraping {url}: {e}")
    return "", 0

async def search_and_scrape(query, num_urls):
    search_results = search.results(query)
    scraped_urls = set()
    full_texts = []

    async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=3.0)) as client:
        tasks = []
        if 'organic' in search_results:
            for result in search_results['organic']:
                url = result.get('link')
                domain = urlparse(url).netloc if url else None
                if url and domain not in scraped_urls and len(tasks) < num_urls:
                    tasks.append(scrape_webpage(client, url))
                    scraped_urls.add(domain)

        results = await asyncio.gather(*tasks, return_exceptions=True)
        for result in results:
            if isinstance(result, tuple) and result[1] > 0:
                full_texts.append(result[0])

    return " ".join(full_texts)

def query_expansion(query, num_expansions):
    expansion_prompt = f"""
    Given the following search query, generate {num_expansions} additional related queries that could help find more comprehensive information on the topic. The queries should be different from each other and explore various aspects of the main query. Provide only the additional queries, numbered 1-{num_expansions}.

    Main query: {query}

    Additional queries:
    """

    response = llm.invoke(expansion_prompt)
    response_text = response.content if hasattr(response, 'content') else str(response)

    expanded_queries = [query]
    for line in response_text.split('\n'):
        if line.strip() and line[0].isdigit():
            expanded_queries.append(line.split('. ', 1)[1].strip())

    return expanded_queries[:num_expansions + 1]

def create_sentence_windows(text, window_size=3):
    sentences = sent_tokenize(text)
    windows = []
    for i in range(len(sentences)):
        window = " ".join(sentences[max(0, i-window_size):min(len(sentences), i+window_size+1)])
        windows.append(window)
    return windows

def generate_hypothetical_document(query):
    hyde_prompt = f"""
    Given the search query below, generate a hypothetical document that would be a perfect match for this query. The document should be concise, containing only 3 sentences of relevant information that directly addresses the query.

    Query: {query}

    Hypothetical Document (3 sentences):
    """

    response = llm.invoke(hyde_prompt)
    return response.content if hasattr(response, 'content') else str(response)

def llm_rerank(query, documents):
    rerank_prompt = """
    Given the following query and a list of document excerpts, rank the documents based on their relevance to the query. Provide the rankings as a list of numbers from 1 to {}, where 1 is the most relevant. Ensure you provide a ranking for every document.

    Query: {}

    Documents:
    {}

    Rankings (1 to {}):
    """.format(len(documents), query, "\n".join([f"{i+1}. {doc.page_content[:200]}..." for i, doc in enumerate(documents)]), len(documents))

    response = llm.invoke(rerank_prompt)
    rankings = [int(x) for x in response.content.split() if x.isdigit()]

    if len(rankings) < len(documents):
        remaining = set(range(1, len(documents) + 1)) - set(rankings)
        rankings.extend(remaining)

    sorted_docs = sorted(zip(documents, rankings), key=lambda x: x[1])
    return sorted_docs

def flashrank_rerank(query, documents, ranker):
    rerank_request = RerankRequest(
        query=query,
        passages=[{"text": doc.page_content} for doc in documents]
    )
    reranked = ranker.rerank(rerank_request)
    
    if isinstance(reranked, list) and isinstance(reranked[0], dict):
        sorted_results = sorted(reranked, key=lambda x: x.get('score', 0), reverse=True)
        return [(documents[i], result.get('score', 0)) for i, result in enumerate(sorted_results)]
    
    elif isinstance(reranked, list) and hasattr(reranked[0], 'score'):
        sorted_results = sorted(reranked, key=lambda x: x.score, reverse=True)
        return [(documents[i], result.score) for i, result in enumerate(sorted_results)]
    
    else:
        print(f"Unexpected reranked result type. Using original document order.")
        return [(doc, 1.0) for doc in documents]


def batch_embed_documents(documents, embeddings, batch_size=512):
    batched_embeddings = []
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        texts = [doc.page_content for doc in batch]
        embeddings_batch = embeddings.embed_documents(texts)
        batched_embeddings.extend(embeddings_batch)
    return batched_embeddings

def create_single_vectorstore(index_documents, embeddings):
    vectorstore_start = time.time()
    
    all_documents = []
    for doc in index_documents:
        all_documents.append(Document(page_content=doc.page_content))
    
    # Batch process all embeddings
    batch_embeddings = batch_embed_documents(all_documents, embeddings)
    
    # Create a single FAISS vectorstore
    texts = [doc.page_content for doc in all_documents]
    vectorstore = FAISS.from_embeddings(
        embedding=embeddings,
        text_embeddings=list(zip(texts, batch_embeddings))
    )
    
    vectorstore_time = time.time() - vectorstore_start
    print(f"-----Vectorstore creation time: {vectorstore_time:.2f} seconds")
    
    return vectorstore, all_documents


# Update the retriever function to use the single vectorstore:
def get_hyde_retriever(vectorstore, hyde_embedding, num_docs, num_rerank, rerank_method):
    def retriever(query):
        docs = vectorstore.similarity_search_by_vector(hyde_embedding, k=num_docs)
        
        unique_docs = []
        seen_content = set()
        for doc in docs:
            content = doc.page_content
            if content not in seen_content:
                unique_docs.append(Document(page_content=content))
                seen_content.add(content)

        try:
            if rerank_method == "none":
                return unique_docs[:num_rerank]
            elif rerank_method == "llm":
                reranked_docs = llm_rerank(query, unique_docs)
            elif rerank_method in ["nano", "small", "medium_t5", "medium_multilang", "large"]:
                ranker = globals()[f"ranker_{rerank_method}"]
                reranked_docs = flashrank_rerank(query, unique_docs, ranker)
            else:
                raise ValueError(f"Unknown rerank method: {rerank_method}")

            return [doc for doc, _ in reranked_docs[:num_rerank]]
        except Exception as e:
            print(f"Error during reranking with method {rerank_method}: {str(e)}")
            print("Traceback:", traceback.format_exc())
            print("Falling back to no reranking.")
            return unique_docs[:num_rerank]

    return retriever

async def process_query(query, num_expansions, num_urls, num_docs, num_rerank, rerank_method, use_70b_model, use_combined_retrieval, use_extended_queries, target_tokens):
    try:
        start_time = time.time()

        hyde_start = time.time()
        hypothetical_doc = generate_hypothetical_document(query)
        hyde_time = time.time() - hyde_start
        print(f"hypothetical_doc length: {len(hypothetical_doc)}")
        print(f"-----HyDE generation time: {hyde_time:.2f} seconds")

        embed_start = time.time()
        hyde_embedding = embeddings.embed_query(hypothetical_doc)
        embed_time = time.time() - embed_start
        print(f"-----Embedding time: {embed_time:.2f} seconds")

        ext_start = time.time()
        extended_queries = query_expansion(query, num_expansions) if use_extended_queries else [query]
        ext_time = time.time() - ext_start
        print(f"-----Query expansion time: {ext_time:.2f} seconds")

        scrape_start = time.time()
        all_texts = await asyncio.gather(*[search_and_scrape(eq, num_urls) for eq in extended_queries])
        scrape_time = time.time() - scrape_start
        print(f"-----Web scraping time: {scrape_time:.2f} seconds")

        combined_text = " ".join(all_texts)
        print(f"Combined text length: {len(combined_text)} characters")

        sentence_windows = create_sentence_windows(combined_text)
        print(f"Number of sentence windows: {len(sentence_windows)}")

        index_documents = [Document(page_content=window) for window in sentence_windows]

        vectorstore_start = time.time()
        vectorstore, all_documents = create_single_vectorstore(index_documents, embeddings)
        
        bm25_retriever = None
        if use_combined_retrieval:
            bm25_retriever = BM25Retriever(all_documents)
        
        vectorstore_time = time.time() - vectorstore_start
        print(f"-----Vectorstore {'and BM25 ' if use_combined_retrieval else ''}creation time: {vectorstore_time:.2f} seconds")
    
        retrieval_start = time.time()
        if use_combined_retrieval:
            retrieved_docs = combine_retrieval_methods(query, [vectorstore], bm25_retriever, hyde_embedding, num_docs, extended_queries if use_extended_queries else None)
        else:
            retriever = get_hyde_retriever(vectorstore, hyde_embedding, num_docs, num_rerank, rerank_method)
            retrieved_docs = retriever(query)
        retrieval_time = time.time() - retrieval_start
        print(f"-----Retrieval and reranking time: {retrieval_time:.2f} seconds")

        print(f"Number of retrieved and reranked documents: {len(retrieved_docs)}")

        context_docs = [doc.page_content for doc in retrieved_docs]
        context = "\n\n".join(context_docs)

        total_processing_time = hyde_time + embed_time + scrape_time + vectorstore_time + retrieval_time
        print(f"-----Total processing time before answer generation: {total_processing_time:.2f} seconds")

        answer_start = time.time()
        
        chosen_llm = llm_70b if use_70b_model else llm_8b
        
        # Use the generate_iteratively function
        answer = generate_iteratively(context, query, target_tokens, chosen_llm)
        
        answer_time = time.time() - answer_start
        print(f"-----Answer generation time: {answer_time:.2f} seconds")

        print("\n")
        print("-"*120)
        print(f"Final Answer (approximately {count_tokens(answer)} words):\n", answer)
        print("-"*120)

        return answer, context_docs, [hyde_time, hyde_embedding, ext_time, scrape_time, vectorstore_time, retrieval_time, total_processing_time, answer_time]

    except Exception as e:
        print(f"An error occurred: {e}")
        import traceback
        traceback.print_exc()
        return "I'm sorry, but I encountered an error while processing your query. Please try again.", [], []


def gradio_interface(query, num_expansions, num_urls, num_docs, num_rerank, rerank_method, use_70b_model, use_combined_retrieval, use_extended_queries, target_tokens):
    old_stdout = sys.stdout
    sys.stdout = buffer = io.StringIO()

    answer, context_docs, _ = asyncio.run(process_query(query, num_expansions, num_urls, num_docs, num_rerank, rerank_method, use_70b_model, use_combined_retrieval, use_extended_queries, target_tokens))

    sys.stdout = old_stdout
    captured_output = buffer.getvalue()

    truncated_docs = [f"Document {i+1}: {doc[:150]}..." for i, doc in enumerate(context_docs)]
    truncated_context = "\n\n".join(truncated_docs)

    captured_output += f"\n\nContext used for answer generation (first 150 characters of each document, {len(context_docs)} documents in total):\n" + truncated_context

    return captured_output

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Enter your query"),
        gr.Slider(minimum=0, maximum=3, value=1, step=1, label="Number of query expansions"),
        gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of URLs to scrape per extended query"),
        gr.Slider(minimum=20, maximum=150, value=80, step=1, label="Number of documents to retrieve with HyDE"),
        gr.Slider(minimum=10, maximum=120, value=50, step=1, label="Number of documents to keep after retrieval/reranking"),
        gr.Radio(["none", "llm", "nano", "small", "medium_t5", "medium_multilang"], label="Reranking method", value="none"),
        gr.Checkbox(label="Use 70B model for QA (unchecked uses 8B)", value=True),
        gr.Checkbox(label="Use combined BM25 and embedding retrieval", value=False),
        gr.Checkbox(label="Use extended queries for BM25", value=False),
        gr.Slider(minimum=10, maximum=2000, value=1000, step=10, label="Target answer tokens")
    ],
    outputs="text",
    title="Structured - Advanced RAG Query Processing",
    description="Enter a query and adjust parameters to get a detailed answer based on web search and document analysis.",
    examples=[
        ["How can I take care of my eyes?", 3, 10, 150, 120, "none", True, False, False, 1000],
        ["How can I take care of my eyes?", 1, 3, 80, 50, "nano", False, True, True, 500]
    ]
)

if __name__ == "__main__":
    iface.launch(share=True, debug=True)

INFO:httpx:HTTP Request: GET http://127.0.0.1:7864/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7864/ "HTTP/1.1 200 OK"


Running on local URL:  http://127.0.0.1:7864


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"


Running on public URL: https://502650039c7567a1cf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


INFO:httpx:HTTP Request: HEAD https://502650039c7567a1cf.gradio.live "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://machinelearningmastery.com/how-to-code-the-generative-adversarial-network-training-algorithm-and-loss-functions/ "HTTP/1.1 403 Forbidden"
INFO:httpx:HTTP Request: GET https://www.mathworks.com/help/deeplearning/ug/train-conditional-generative-adversarial-network.html "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://neptune.ai/blog/gan-loss-functions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


In [6]:
#### evaluation 

# LLM for generating questions
llm_generator = ChatFireworks(model_name="accounts/fireworks/models/llama-v3p1-70b-instruct", temperature=0.6)

# Question generation prompt
question_gen_template = """Generate exactly {num_questions} diverse and challenging questions that would require complex web searches to answer. The questions should:

1. Cover a wide range of topics (e.g., science, history, current events, technology, arts)
2. Avoid long questions
3. Ensure there is only one question per query. Query should NOT be multiple questions

Please provide the questions as a numbered list, starting from 1 and ending at {num_questions}.

Generated Questions:"""

question_gen_prompt = PromptTemplate.from_template(question_gen_template)

def generate_questions(num_questions, max_attempts=3):
    for attempt in range(max_attempts):
        question_gen_chain = question_gen_prompt | llm_generator | StrOutputParser()
        questions_text = question_gen_chain.invoke({"num_questions": num_questions})

        questions = []
        for line in questions_text.split('\n'):
            match = re.match(r'^\s*\d+\.\s*(.+)$', line)
            if match:
                question = match.group(1).strip()
                questions.append(question)

        if len(questions) == num_questions:
            return questions

        print(f"Attempt {attempt + 1}: Generated {len(questions)} questions instead of {num_questions}. Retrying...")

    raise ValueError(f"Failed to generate exactly {num_questions} questions after {max_attempts} attempts.")

# Generate questions
num_questions = 100

evaluation_questions = generate_questions(num_questions)
print(f"Successfully generated {len(evaluation_questions)} questions:")
for i, question in enumerate(evaluation_questions, 1):
    print(f"{i}. {question}")

INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Successfully generated 100 questions:
1. What is the average lifespan of a quokka?
2. Which ancient civilization built the first known suspension bridge?
3. What is the name of the largest living organism in the world?
4. Who is the author of the first computer bug?
5. What is the chemical composition of the pigment Tyrian purple?
6. In what year did the first successful heart transplant take place?
7. What is the name of the largest waterfall in the world by volume?
8. Who is the founder of the philosophical school of Stoicism?
9. What is the name of the smallest country in the world by land area?
10. What is the average airspeed velocity of an unladen swallow?
11. Which planet in our solar system has the longest day?
12. Who is the inventor of the first practical light bulb?
13. What is the name of the largest desert in Asia?
14. What is the chemical symbol for gold?
15. In what year did the first human walk on the moon?
16. What is the name of the largest living species of lizard?
1

In [28]:
len(evaluation_questions)
evaluation_questions[:10]

['What is the average lifespan of a quokka in the wild?',
 'Which ancient civilization built the first known suspension bridge?',
 'What is the chemical composition of the pigment used in the Mona Lisa?',
 'Who is the founder of the first successful cryptocurrency?',
 'What is the name of the largest living organism in the world?',
 'In what year did the first computer bug occur?',
 'What is the name of the ancient city buried under the sands of the Taklamakan Desert?',
 'Who is the author of the first science fiction novel?',
 'What is the process by which the human brain creates new neurons?',
 "What is the name of the world's largest waterfall, by volume of water?"]

In [30]:
# Initialize the judge model (405B LLaMA)
judge_model = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-405b-instruct", temperature=0)

def evaluate_answer_quality(question: str, answer: str, judge_model: Any) -> int:
    """
    Evaluate if the answer completely addresses the question.
    Returns 1 if yes, 0 if no.
    """
    prompt = f"""
    You are an expert evaluator. Your task is to determine if the given answer completely addresses the question.
    
    Question: {question}
    Answer: {answer}
    
    Does the answer completely address the question?
    Respond with only 'Yes' or 'No'.
    
    Response:
    """
    
    response = judge_model.invoke(prompt)
    return 1 if response.content.strip().lower() == 'yes' else 0

def evaluate_document_selection(question: str, all_docs: List[str], selected_docs: List[str], judge_model: Any) -> int:
    """
    Evaluate if the selected documents are the best 10 out of the 80 to answer the question.
    Returns 1 if yes, 0 if no.
    """
    all_docs_text = "\n".join([f"{i+1}. {doc}..." for i, doc in enumerate(all_docs)])
    selected_indices = [all_docs.index(doc) + 1 for doc in selected_docs]
    
    prompt = f"""
    You are an expert information retrieval system. Your task is to determine if the selected documents are the best 10 out of the given 80 for answering the question completely.
    
    Question: {question}
    
    Here are all 80 retrieved documents:
    {all_docs_text}
    
    The system selected the following documents (by index): {', '.join(map(str, selected_indices))}
    
    Are these selected documents the best 10 out of the 80 for answering the question completely?
    Respond with only 'Yes' or 'No'.
    
    Response:
    """
    
    response = judge_model.invoke(prompt)
    return 1 if response.content.strip().lower() == 'yes' else 0

In [None]:
def safe_add(a, b):
    if isinstance(a, (int, float)) and isinstance(b, (int, float)):
        return a + b
    elif isinstance(a, list) and isinstance(b, list):
        return [safe_add(x, y) for x, y in zip(a, b)]
    else:
        print(f"Warning: Unexpected types in safe_add: {type(a)} and {type(b)}")
        return 0  # or handle this case as appropriate

async def run_evaluation(num_questions: int = 100):
    questions = evaluation_questions
    configurations = [
        {"name": "vectorstore_only", "use_bm25": False, "use_extended_queries": False},
        {"name": "vectorstore_with_bm25", "use_bm25": True, "use_extended_queries": False},
        {"name": "vectorstore_with_bm25_extended", "use_bm25": True, "use_extended_queries": True}
    ]
    
    for config in configurations:
        results = []
        total_answer_correct = 0
        total_docs_correct = 0
        total_times = [0] * 8  # For the 8 time measurements
        
        print(f"Evaluating configuration: {config['name']}")
        
        for i, question in enumerate(questions[:num_questions], 1):
            print(f"Processing question {i}/{num_questions}")
            
            try:
                answer, context_docs, times = await process_query(
                    query=question,
                    num_expansions=1,
                    num_urls=3,
                    num_docs=80,
                    num_rerank=10,
                    rerank_method="none",
                    use_70b_model=False,
                    use_combined_retrieval=config['use_bm25'],
                    use_extended_queries=config['use_extended_queries']
                )
                
                all_docs = context_docs[:80]
                selected_docs = context_docs[:10]
                
                answer_correct = evaluate_answer_quality(question, answer, judge_model)
                docs_correct = evaluate_document_selection(question, all_docs, selected_docs, judge_model)
                
                total_answer_correct += answer_correct
                total_docs_correct += docs_correct
                total_times = [safe_add(total, t) for total, t in zip(total_times, times)]
                
                result = {
                    "question": question,
                    "answer": answer,
                    "answer_correctness": answer_correct,
                    "top_10_docs_correctness": docs_correct,
                    "all_docs": all_docs,
                    "selected_docs": selected_docs,
                    "times": times
                }
                results.append(result)
            except Exception as e:
                print(f"Error processing question {i}: {str(e)}")
                continue
        
        avg_answer_correct = total_answer_correct / num_questions
        avg_docs_correct = total_docs_correct / num_questions
        avg_times = [t / num_questions if isinstance(t, (int, float)) else [x / num_questions for x in t] for t in total_times]
        
        print(f"\nAverage Results for {config['name']} over {num_questions} questions:")
        print(f"Average Answer Correctness: {avg_answer_correct:.2f}")
        print(f"Average Top 10 Documents Correctness: {avg_docs_correct:.2f}")
        print(f"Average HyDE Time: {avg_times[0]:.2f} seconds")
        print(f"Average Embedding Time: {avg_times[1]:.2f} seconds")
        print(f"Average Query Expansion Time: {avg_times[2]:.2f} seconds")
        print(f"Average Web Scraping Time: {avg_times[3]:.2f} seconds")
        print(f"Average Vectorstore {'and BM25 ' if config['use_bm25'] else ''}Creation Time: {avg_times[4]:.2f} seconds")
        print(f"Average Retrieval Time: {avg_times[5]:.2f} seconds")
        print(f"Average Total Processing Time: {avg_times[6]:.2f} seconds")
        print(f"Average Answer Generation Time: {avg_times[7]:.2f} seconds")
        
        output = {
            "results": results,
            "average_answer_correctness": avg_answer_correct,
            "average_top_10_docs_correctness": avg_docs_correct,
            "average_times": {
                "hyde_time": avg_times[0],
                "embedding_time": avg_times[1],
                "query_expansion_time": avg_times[2],
                "web_scraping_time": avg_times[3],
                "vectorstore_creation_time": avg_times[4],
                "retrieval_time": avg_times[5],
                "total_processing_time": avg_times[6],
                "answer_generation_time": avg_times[7]
            }
        }
        
        filename = f'/home/ubuntu/maziar/12_efficient_ranking/evaluation/{config["name"]}.json'
        with open(filename, 'w') as f:
            json.dump(output, f, indent=2)
        
        print(f"\nResults have been saved to '{filename}'")

# To run the evaluation, use:
await run_evaluation()

Evaluating configuration: vectorstore_only
Processing question 1/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 374
-----HyDE generation time: 0.59 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.37 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://genomics.senescence.info/species/entry.php?species=Setonix_brachyurus "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://australian.museum/learn/animals/mammals/quokka/ "HTTP/1.1 200 OK"


-----Web scraping time: 2.42 seconds
Combined text length: 10001 characters
Number of sentence windows: 38


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 0.83 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 4.21 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.46 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks for the average lifespan of a quokka in the wild, but the provided context only mentions that quokkas live over 10 years in the wild, without providing a specific average lifespan.

Answer: I don't have enough information to answer that question.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 2/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 433
-----HyDE generation time: 0.49 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.23 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/Suspension_bridge "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.quora.com/What-country-invented-the-first-suspension-bridge "HTTP/1.1 429 Too Many Requests"


Error response 429 while requesting https://www.quora.com/What-country-invented-the-first-suspension-bridge
-----Web scraping time: 1.56 seconds
Combined text length: 5000 characters
Number of sentence windows: 19


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 0.60 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 2.87 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.64 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks about the ancient civilization that built the first known suspension bridge. To answer this question, I need to look for information in the context about the earliest known suspension bridges and the civilization that built them.

Answer: The Tibetan siddha and bridge-builder Thangtong Gyalpo originated the use of iron chains in his version of simple suspension bridges. In 1433, Gyalpo built eight bridges in eastern Bhutan. Therefore, the answer is the Tibetan civilization, specifically Thangtong Gyalpo.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 3/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 650
-----HyDE generation time: 0.70 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.24 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://www.artnews.com/art-news/news/scientists-detect-rare-chemical-compound-mona-lisa-leonardo-1234681965/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.acs.org/pressroom/presspacs/2023/october/mona-lisa-hides-a-surprising-mix-of-toxic-pigments.html "HTTP/1.1 200 OK"


-----Web scraping time: 1.60 seconds
Combined text length: 10001 characters
Number of sentence windows: 38


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.03 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 3.57 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.77 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks about the chemical composition of the pigment used in the Mona Lisa. To answer this question, we need to look for information in the text that describes the chemical composition of the pigment used in the Mona Lisa.

Answer: According to the text, the oil paint used by Leonardo da Vinci in the base layer of the Mona Lisa has a chemical composition distinct from his other works—and even those made by his famous contemporaries. The presence of the rare chemical compound, named plumbonacrite, has confirmed a long-held theory among art historians that Leonardo utilized lead oxide powder to thicken and dry the paint layers of the Mona Lisa.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 4/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 621
-----HyDE generation time: 0.66 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.31 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/Satoshi_Nakamoto "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.investopedia.com/terms/s/satoshi-nakamoto.asp "HTTP/1.1 200 OK"


An error occurred while requesting https://money.usnews.com/investing/articles/the-history-of-bitcoin: 
-----Web scraping time: 4.05 seconds
Combined text length: 10001 characters
Number of sentence windows: 43


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 0.95 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 5.96 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.51 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks for the founder of the first successful cryptocurrency, which is Bitcoin. The context provided mentions Satoshi Nakamoto as the pseudonym for the person or people who developed Bitcoin, authored the Bitcoin whitepaper, and created the first blockchain database.

Answer: Satoshi Nakamoto is the founder of the first successful cryptocurrency, Bitcoin.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 5/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 336
-----HyDE generation time: 0.45 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.23 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://www.scientificamerican.com/article/strange-but-true-largest-organism-is-fungus/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/Largest_organisms "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.montrealsciencecentre.com/blog/the-two-largest-living-organisms-on-earth "HTTP/1.1 200 OK"


-----Web scraping time: 1.80 seconds
Combined text length: 15002 characters
Number of sentence windows: 106


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.59 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 4.06 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.59 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The text mentions that the Armillaria solidipes is the second largest living organism on Earth, but it does not explicitly state that it is the largest. However, it also mentions that the discovery of this giant Armillaria ostoyae in 1998 heralded a new record holder for the title of the world's largest known organism, which is believed to be the 110-foot- (33.5-meter-) long, 200-ton blue whale. This suggests that the largest living organism is not a fungus, but rather a blue whale.

Answer: The blue whale.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 6/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 568
-----HyDE generation time: 0.82 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.24 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://www.globalapptesting.com/blog/the-worlds-first-computer-bug-global-app-testing "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://education.nationalgeographic.org/resource/worlds-first-computer-bug/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://lunduke.substack.com/p/the-story-of-the-first-computer-bug "HTTP/1.1 200 OK"


-----Web scraping time: 1.40 seconds
Combined text length: 13563 characters
Number of sentence windows: 120


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.71 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 4.17 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.44 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks for the year in which the first computer bug occurred. To answer this question, I need to find the relevant information in the context provided.

Answer: According to the context, the first computer bug occurred on September 9, 1947.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 7/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 395
-----HyDE generation time: 0.50 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.24 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/Taklamakan_Desert "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.ancientpages.com/2020/06/01/secret-ancient-world-buried-under-the-vast-takla-makan-desert/ "HTTP/1.1 200 OK"


-----Web scraping time: 2.60 seconds
Combined text length: 10001 characters
Number of sentence windows: 62


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.16 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 4.49 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.98 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks for the name of the ancient city buried under the sands of the Taklamakan Desert. To answer this question, I need to identify the city mentioned in the context as being buried under the sand in the Taklamakan Desert.

Answer: The ancient city buried under the sands of the Taklamakan Desert is Loulan.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 8/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 555
-----HyDE generation time: 0.67 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.23 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://www.reddit.com/r/sciencefiction/comments/1728ki8/who_invented_science_fiction/ "HTTP/1.1 302 Found"
INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/Science_fiction "HTTP/1.1 200 OK"


Error response 302 while requesting https://www.reddit.com/r/sciencefiction/comments/1728ki8/who_invented_science_fiction/


INFO:httpx:HTTP Request: GET https://www.theguardian.com/books/2016/may/23/work-from-1616-is-the-first-ever-science-fiction-novel "HTTP/1.1 200 OK"


-----Web scraping time: 1.68 seconds
Combined text length: 10001 characters
Number of sentence windows: 30


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.01 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 3.59 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.57 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks for the author of the first science fiction novel, which is mentioned in the context as Johann Valentin Andreae's work "The Chemical Wedding". To answer the question, we need to identify the author of this work.

Answer: Johann Valentin Andreae.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 9/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


hypothetical_doc length: 774
-----HyDE generation time: 0.77 seconds
-----Embedding time: 0.22 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://www.scientificamerican.com/article/the-adult-brain-does-grow-new-neurons-after-all-study-says/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://qbi.uq.edu.au/brain-basics/brain-physiology/what-neurogenesis "HTTP/1.1 200 OK"


An error occurred while requesting https://www.ninds.nih.gov/health-information/public-education/brain-basics/brain-basics-life-and-death-neuron: 
-----Web scraping time: 3.86 seconds
Combined text length: 10001 characters
Number of sentence windows: 58


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.03 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 5.87 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.82 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question is asking about the process by which the human brain creates new neurons, which is a key concept in the provided context. To answer this question, I will look for information in the text that describes the process of creating new neurons in the human brain.

Answer: The process by which the human brain creates new neurons is called neurogenesis. According to the text, neurogenesis is the process by which new neurons are formed in the brain, and it is crucial when an embryo is developing, but also continues in certain brain regions after birth and throughout our lifespan.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 10/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 447
-----HyDE generation time: 0.79 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.23 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/List_of_waterfalls_by_flow_rate "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.worldwaterfalldatabase.com/largest-waterfalls/volume "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://smartwatermagazine.com/q-a/which-largest-waterfall-world "HTTP/1.1 200 OK"


-----Web scraping time: 2.95 seconds
Combined text length: 15002 characters
Number of sentence windows: 79


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.40 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 5.37 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.71 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: To determine the name of the world's largest waterfall by volume of water, I will look for the waterfall with the highest flow rate or volume of water in the provided text.

Answer: According to the text, Inga Falls is the largest waterfall in the world by flow rate, with an estimated flow rate of approximately 25,768.33 cubic metres per second. However, the text also provides a list of waterfalls by average volume, and Inga Falls is listed as having a volume of 910,000 cubic feet per second.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


Processing question 11/100


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


hypothetical_doc length: 594
-----HyDE generation time: 0.66 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Embedding time: 0.23 seconds
-----Query expansion time: 0.00 seconds


INFO:httpx:HTTP Request: GET https://en.wikipedia.org/wiki/Jonas_Salk "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.who.int/news-room/spotlight/history-of-vaccination/history-of-polio-vaccination "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6351694/ "HTTP/1.1 403 Forbidden"


Error response 403 while requesting https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6351694/
-----Web scraping time: 1.78 seconds
Combined text length: 10001 characters
Number of sentence windows: 38


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/embeddings "HTTP/1.1 200 OK"


-----Vectorstore creation time: 1.11 seconds
-----Retrieval and reranking time: 0.00 seconds
Number of retrieved and reranked documents: 10
-----Total processing time before answer generation: 3.78 seconds


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"


-----Answer generation time: 0.55 seconds


------------------------------------------------------------------------------------------------------------------------
Final Answer:
 Reasoning step: The question asks for the inventor of the first successful polio vaccine, which is a specific piece of information that can be found in the context. To answer this question, I will look for a statement that directly mentions the inventor of the first successful polio vaccine.

Answer: According to the context, the first successful vaccine was created by US physician Jonas Salk.
------------------------------------------------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.fireworks.ai/inference/v1/chat/completions "HTTP/1.1 200 OK"
