In [3]:
# Cell 1: Core LangChain
!pip install -q langchain

# Cell 2: LangChain integrations
!pip install -q langchain-community langchain-huggingface langchain-chroma 

# Cell 3: ML libraries
!pip install -q sentence-transformers transformers chromadb

# Cell 4: Utilities
!pip install -q python-dotenv torch unstructured

In [4]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import os

import json
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline
import os

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [5]:
class DatabaseManager:
    def __init__(self, embedding_model_name="sentence-transformers/all-MiniLM-L12-v2", 
                 embedding_model_type="huggingface"):
        """
        Initialize DatabaseManager with specified embedding model.
        
        Args:
            embedding_model_name: Name of the embedding model
            embedding_model_type: Type of model ("huggingface" or "gemini")
        """
        self.embedding_model_name = embedding_model_name
        self.embedding_model_type = embedding_model_type
        
        # Initialize embedding function based on type
        if embedding_model_type == "huggingface":
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        else:  # huggingface
            embedding_model_type == "huggingface"
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        
        print(f"Initialized {embedding_model_type} embedding model: {embedding_model_name}")

    # Rest of your DatabaseManager methods remain the same...
    def load_documents(self, data_path):
        """Load documents from the specified directory."""
        try:
            loader = DirectoryLoader(data_path, glob="*.md")
            documents = loader.load()
            print(f"Loaded {len(documents)} documents from {data_path}")
            return documents
        except Exception as e:
            print(f"Error loading documents: {e}")
            return []

    def split_text(self, documents):
        """Split documents into chunks."""
        try:
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=500,
                chunk_overlap=100,
                length_function=len,
                add_start_index=True,
            )
            chunks = text_splitter.split_documents(documents)
            print(f"Split into {len(chunks)} chunks")
            return chunks
        except Exception as e:
            print(f"Error splitting text: {e}")
            return []

    def save_to_chroma(self, chunks, persist_directory):
        """Save document chunks to Chroma database."""
        try:
            # Create directory if it doesn't exist
            os.makedirs(persist_directory, exist_ok=True)
            
            db = Chroma.from_documents(
                chunks, 
                self.embedding_function, 
                persist_directory=persist_directory
            )
            print(f"Saved {len(chunks)} chunks to Chroma database at {persist_directory}")
            return db
        except Exception as e:
            print(f"Error saving to Chroma: {e}")
            return None

    def generate_data_store(self, data_path="books", persist_directory="chroma"):
        """Complete pipeline: load documents, split text, and save to database."""
        print(f"Starting data store generation...")
        print(f"Data path: {data_path}")
        print(f"Persist directory: {persist_directory}")
        print(f"Embedding model: {self.embedding_model_name} ({self.embedding_model_type})")
        
        # Load documents
        documents = self.load_documents(data_path)
        if not documents:
            return False
        
        # Split into chunks
        chunks = self.split_text(documents)
        if not chunks:
            return False
        
        # Save to database
        db = self.save_to_chroma(chunks, persist_directory)
        return db is not None

In [6]:
class QueryEngine:
    def __init__(self, persist_directory="chroma", 
                 embedding_model_name="sentence-transformers/all-MiniLM-L12-v2",
                 embedding_model_type="huggingface",
                 text_model_name="google/flan-t5-base"):
        """
        Initialize QueryEngine with specified models.
        
        Args:
            persist_directory: Path to the Chroma database
            embedding_model_name: Name of the embedding model
            embedding_model_type: Type of embedding model ("huggingface" or "gemini")
            text_model_name: Name of the text generation model
        """
        self.persist_directory = persist_directory
        self.embedding_model_name = embedding_model_name
        self.embedding_model_type = embedding_model_type
        self.text_model_name = text_model_name
        
        # Initialize embedding function based on type
        if embedding_model_type == "huggingface":
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        else:  # huggingface
            embedding_model_type == "huggingface"
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        
        # Initialize text generation model
        if self.text_model_name.startswith("google/flan"):
            self.hf_pipeline = pipeline(
                "text2text-generation",
                model=self.text_model_name,
                max_length=512,
            )
        elif self.text_model_name.startswith("gpt") or self.text_model_name.startswith("distilgpt"):
            # Special handling for GPT-2 models to fix the token length issue
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=50,         # Generate only 50 new tokens
                do_sample=True,
                temperature=0.7,
                pad_token_id=50256,
                truncation=True,           # Truncate long inputs
                return_full_text=False,    # Only return generated text, not input
                device=-1                  # Force CPU for stability
            )
        else:
            self.text_model_name = "google/flan-t5-small"
            self.hf_pipeline = pipeline(
                "text2text-generation",
                model=self.text_model_name,
                max_length=512,
            )
        
        self.model = HuggingFacePipeline(pipeline=self.hf_pipeline)
        
        # Initialize database
        self.db = Chroma(persist_directory=persist_directory, 
                        embedding_function=self.embedding_function)
    
        self.PROMPT_TEMPLATE = """
            Answer the question based only on the following context:

            {context}

            ---

            Answer the question based on the above context: {question}
            here are the options:
            {options}

            Respond only the Letter of the correct options like A, B, C and D. Do not inlcude the source.
            """
        
        print(f"QueryEngine initialized:")
        print(f"  Embedding: {embedding_model_name} ({embedding_model_type})")
        print(f"  Text Generation: {text_model_name}")
        print(f"  Database: {persist_directory}")

    # Rest of your QueryEngine methods remain the same...
    
    def load_quiz_data(self, quiz_file_path='test_questions.json'):
        """Load quiz data from JSON file."""
        try:
            with open(quiz_file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                print(f"Loaded {len(data)} questions from {quiz_file_path}")
                return data
        except FileNotFoundError:
            print(f"Error: {quiz_file_path} file not found!")
            return []
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
            return []
 
    def semantic_search_database(self, query, k=5):
        """Search the database for relevant documents."""
        if self.db is None:
            return []
        
        try:
            results = self.db.similarity_search_with_relevance_scores(query, k=k)
            return results
        except Exception as e:
            print(f"Error searching database: {e}")
            return []
    
    def filter_response(self, response):
        edit_response = response.replace('-', '').strip()
        return edit_response

    def generate_response(self, question, options, context_text):
        """Generate a response using the LLM."""
        # Format the prompt
        options_text = "\n".join(options) if isinstance(options, list) else str(options)
        prompt = self.PROMPT_TEMPLATE.format(
            context=context_text, 
            question=question, 
            options=options_text
        )
        
        try:
            # Use the HuggingFace model to generate response
            response_text = self.model.invoke(prompt)
            response_text = self.filter_response(response_text)
            return response_text
        except Exception as e:
            print(f"Error generating response: {e}")
            return "Error generating response."
    
    def query_single_question(self, question, options=None, show_context=False):
        """Query a single question and return the response."""
        # Search the database
        results = self.semantic_search_database(question, k=5)
        
        if not results:
            return {
                'question': question,
                'response': 'No relevant context found.',
                'context': '',
                'sources': []
            }
        
        # Prepare context from search results
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
        # sources = [doc.metadata.get("source", "Unknown") for doc, _score in results]
        sources = [(_score, doc.metadata.get("source", "Unknown"), doc.page_content) for doc, _score in results]
        all_scores = [_score for doc, _score in results]
        avg = sum(all_scores) / len(all_scores) if all_scores else 0

        
        # Generate response
        response_text = self.generate_response(question, options or [], context_text)
        
        result = {
            'question': question,
            'response': response_text.replace('-', '').strip(),
            'sources': sources,
            "avg relevance sources" : avg
        }
        
        if show_context:
            result['context'] = context_text
        
        return result
    
    def run_quiz(self, quiz_file_path='test_questions.json', show_details=False, limit=None):
        """Run the complete quiz and return results."""
        # Load quiz data
        quiz_data = self.load_quiz_data(quiz_file_path)
        
        if not quiz_data:
            print("No quiz data loaded. Exiting.")
            return []
        
        # Limit questions if specified
        if limit:
            quiz_data = quiz_data[:limit]
            print(f"Running quiz with {limit} questions.")
        
        results = []
        correct_count = 0
        
        for i, question_data in enumerate(quiz_data, 1):
            print(f"Question {i} of {len(quiz_data)}")
            
            question_id = question_data.get("id", i)
            question = question_data["question"]
            options = question_data["options"]
            correct_answer = question_data["answer"]
            
            # Query the database and generate response
            result = self.query_single_question(question, options, show_context=False)
            
            # Add quiz-specific information
            result.update({
                'id': question_id,
                'options': options,
                'correct_answer': correct_answer,
                'response' : result['response'],
                'is_correct': result['response'].strip().upper() == correct_answer.upper()
            })

            if result["is_correct"] == False and len(result["response"]) != 1:
                if result["correct_answer"].upper().strip() == "A":
                    alternate_correct_answer = result["options"][0][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "B":
                    alternate_correct_answer = result["options"][1][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "C":
                    alternate_correct_answer = result["options"][2][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "D":
                    alternate_correct_answer = result["options"][3][4:].replace('-', '').strip()
                else:
                    alternate_correct_answer = ""

                if alternate_correct_answer.upper() == result["response"].upper():
                    result["is_correct"] = True
                else:
                    if result["response"].upper().startswith(alternate_correct_answer.upper()):
                        result["response"] = alternate_correct_answer
                        result["is_correct"] = True
                    else:
                        result["is_correct"] = False

            if result['is_correct']:
                correct_count += 1
            
            results.append(result)
            
            # Show details if requested
            if show_details:
                print("=" * 50)
                print(f"Question {question_id}: {question}")
                for j, option in enumerate(options, 1):
                    print(f"  {option}")
                print(f"AI Response: {result['response']}")
                print(f"Correct Answer: {correct_answer}")
                print(f"Result: {'✓ Correct' if result['is_correct'] else '✗ Incorrect'}")
                print()
        
        # Summary
        accuracy = (correct_count / len(quiz_data)) * 100 if quiz_data else 0
        print(f"\nQuiz Summary:")
        print(f"Total Questions: {len(quiz_data)}")
        print(f"Correct Answers: {correct_count}")
        print(f"Accuracy: {accuracy:.1f}%")
        
        return results
    
    def set_prompt_template(self, new_template):
        """Set a custom prompt template."""
        self.PROMPT_TEMPLATE = new_template
        print("Prompt template updated.")


In [7]:
EMBEDDING_MODEL_OPTIONS = [
    "sentence-transformers/all-MiniLM-L6-v2", # success
    "sentence-transformers/all-mpnet-base-v2", # success
    "BAAI/bge-large-en", # success
    "intfloat/e5-base-v2", # success
    "sentence-transformers/static-retrieval-mrl-en-v1", # success
    "sentence-transformers/all-MiniLM-L12-v2", # success # best one so far
    # "gemini/embedding-001",       # Older Gemini model # horrible
    "gemini/text-embedding-005",  # New Gemini model
]

# Add model types to distinguish between HuggingFace and Gemini
EMBEDDING_MODEL_TYPES = [
    "huggingface",  # 0
    "huggingface",  # 1
    "huggingface",  # 2
    "huggingface",  # 3
    "huggingface",  # 4
    "huggingface",  # 5
    "gemini",       # 6 - New
    # "gemini",       # 7 - New
]

TEXT_GENERATION_MODEL_OPTIONS = [
    "google/flan-t5-small",
    "google/flan-t5-base", # have been using this for default development testing
    "google/flan-t5-large",
    "google/flan-t5-xl",
    "gpt2",
    "distilgpt2"
]



In [8]:
def main(embedding_model_index=0, text_generation_model_index=0):
    embedding_model_index = embedding_model_index
    text_generation_model_index = text_generation_model_index
    
    raw_knowledge_directory = "/kaggle/input/text-for-summarizing/books"
    test_questions_directory = "/kaggle/input/test-questions/test_questions.json"
    
    os.makedirs("chroma", exist_ok=True)
    os.makedirs("quiz_results", exist_ok=True)
    
    # Get selected models
    embedding_model = EMBEDDING_MODEL_OPTIONS[embedding_model_index]
    embedding_model_type = EMBEDDING_MODEL_TYPES[embedding_model_index]
    text_model = TEXT_GENERATION_MODEL_OPTIONS[text_generation_model_index]
    
    
    db_data_path = f"chroma/{embedding_model.split('/')[-1].replace('/', '_').replace('-', '_')}"
    result_file_path = f"quiz_results/{embedding_model.split('/')[-1].replace('/', '_').replace('-', '_')}--{text_model.split('/')[-1].replace('/', '_').replace('-', '_')}_quiz_results.json"


    
    
    print(f"Using embedding model: {embedding_model} ({embedding_model_type})")
    print(f"Using text generation model: {text_model}")
    
    
    def create_mode():
        print("Creating database...")
        db_manager = DatabaseManager(embedding_model_name=embedding_model, 
                                   embedding_model_type=embedding_model_type)
        success = db_manager.generate_data_store(data_path=raw_knowledge_directory, 
                                                persist_directory=db_data_path)
        
        if success:
            print("\n✓ Database created successfully!")
        else:
            print("\n✗ Failed to create database.")
    
    def quiz_mode():
        print("Running Alice in Wonderland quiz...")
        query_engine = QueryEngine(persist_directory=db_data_path,
                                 embedding_model_name=embedding_model,
                                 embedding_model_type=embedding_model_type,
                                 text_model_name=text_model)
        
        # Run the quiz
        results = query_engine.run_quiz(test_questions_directory)
        
        # Rest of quiz_mode code remains the same...
        if results:
            print("\n" + "="*50)
            print("DETAILED ANALYSIS")
            print("="*50)
            print("Embedding Model:", embedding_model)
            print("Text Generation Model:", text_model)
    
            correct_results = [r for r in results if r['is_correct']]
            incorrect_results = [r for r in results if not r['is_correct']]
            
            # if incorrect_results:
            #     print(f"\nIncorrect answers ({len(incorrect_results)}):")
            #     for result in incorrect_results:
            #         print(f"Q{result['id']}: Expected {result['correct_answer']}, got {result['response']}")
            
            # if correct_results:
            #     print(f"\nCorrect answers: {len(correct_results)}")
    
            with open(result_file_path, "w") as f:
                json.dump(results, f, indent=4)
            print(f"Quiz results saved to {result_file_path}")

    create_mode()
    quiz_mode()
    


In [9]:
# EMBEDDING_MODEL_OPTIONS = [
#     "sentence-transformers/all-MiniLM-L6-v2" # 0
#     "sentence-transformers/all-mpnet-base-v2", # 
#     "BAAI/bge-large-en", # 2
#     "intfloat/e5-base-v2", # 3
#     "sentence-transformers/static-retrieval-mrl-en-v1", # 4
#     "sentence-transformers/all-MiniLM-L12-v2", # 5 # best one so far
#     # "gemini/embedding-001",       # Older Gemini model # horrible
#     "gemini/text-embedding-005",  # 6
# ]

# TEXT_GENERATION_MODEL_OPTIONS = [
#     "google/flan-t5-small", # 0
#     "google/flan-t5-base", # 1
#     "google/flan-t5-large", # 2
#     "google/flan-t5-xl", # 3
#     "gpt2",
#     "distilgpt2"
# ]

main(embedding_model_index=0, text_generation_model_index=0)
main(embedding_model_index=0, text_generation_model_index=1)
main(embedding_model_index=0, text_generation_model_index=2)
main(embedding_model_index=0, text_generation_model_index=3)

main(embedding_model_index=1, text_generation_model_index=0)
main(embedding_model_index=1, text_generation_model_index=1)
main(embedding_model_index=1, text_generation_model_index=2)
main(embedding_model_index=1, text_generation_model_index=3)

main(embedding_model_index=2, text_generation_model_index=0)
main(embedding_model_index=2, text_generation_model_index=1)
main(embedding_model_index=2, text_generation_model_index=2)
main(embedding_model_index=2, text_generation_model_index=3)

main(embedding_model_index=3, text_generation_model_index=0)
main(embedding_model_index=3, text_generation_model_index=1)
main(embedding_model_index=3, text_generation_model_index=2)
main(embedding_model_index=3, text_generation_model_index=3)

main(embedding_model_index=4, text_generation_model_index=0)
main(embedding_model_index=4, text_generation_model_index=1)
main(embedding_model_index=4, text_generation_model_index=2)
main(embedding_model_index=4, text_generation_model_index=3)

main(embedding_model_index=5, text_generation_model_index=0)
main(embedding_model_index=5, text_generation_model_index=1)
main(embedding_model_index=5, text_generation_model_index=2)
main(embedding_model_index=5, text_generation_model_index=3)

Using embedding model: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
Using text generation model: google/flan-t5-small
Creating database...
Initialized huggingface embedding model: sentence-transformers/all-MiniLM-L12-v2
Starting data store generation...
Data path: /kaggle/input/text-for-summarizing/books
Persist directory: chroma/all_MiniLM_L12_v2
Embedding model: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
Loaded 1 documents from /kaggle/input/text-for-summarizing/books
Split into 462 chunks
Saved 462 chunks to Chroma database at chroma/all_MiniLM_L12_v2

✓ Database created successfully!
Running Alice in Wonderland quiz...


Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (666 > 512). Running this sequence through the model will result in indexing errors


QueryEngine initialized:
  Embedding: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
  Text Generation: google/flan-t5-small
  Database: chroma/all_MiniLM_L12_v2
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90
Question 3 of 90
Question 4 of 90
Question 5 of 90
Question 6 of 90
Question 7 of 90
Question 8 of 90
Question 9 of 90
Question 10 of 90


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Question 11 of 90
Question 12 of 90
Question 13 of 90
Question 14 of 90
Question 15 of 90
Question 16 of 90
Question 17 of 90
Question 18 of 90
Question 19 of 90
Question 20 of 90
Question 21 of 90
Question 22 of 90
Question 23 of 90
Question 24 of 90
Question 25 of 90
Question 26 of 90
Question 27 of 90
Question 28 of 90
Question 29 of 90
Question 30 of 90
Question 31 of 90
Question 32 of 90
Question 33 of 90
Question 34 of 90
Question 35 of 90
Question 36 of 90
Question 37 of 90
Question 38 of 90
Question 39 of 90
Question 40 of 90
Question 41 of 90
Question 42 of 90
Question 43 of 90
Question 44 of 90
Question 45 of 90
Question 46 of 90
Question 47 of 90
Question 48 of 90
Question 49 of 90
Question 50 of 90
Question 51 of 90
Question 52 of 90
Question 53 of 90
Question 54 of 90
Question 55 of 90
Question 56 of 90
Question 57 of 90
Question 58 of 90
Question 59 of 90
Question 60 of 90
Question 61 of 90
Question 62 of 90
Question 63 of 90
Question 64 of 90
Question 65 of 90
Question 6

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors


QueryEngine initialized:
  Embedding: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
  Text Generation: google/flan-t5-base
  Database: chroma/all_MiniLM_L12_v2
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90
Question 3 of 90
Question 4 of 90
Question 5 of 90
Question 6 of 90
Question 7 of 90
Question 8 of 90
Question 9 of 90
Question 10 of 90
Question 11 of 90
Question 12 of 90
Question 13 of 90
Question 14 of 90
Question 15 of 90
Question 16 of 90
Question 17 of 90
Question 18 of 90
Question 19 of 90
Question 20 of 90
Question 21 of 90
Question 22 of 90
Question 23 of 90
Question 24 of 90
Question 25 of 90
Question 26 of 90
Question 27 of 90
Question 28 of 90
Question 29 of 90
Question 30 of 90
Question 31 of 90
Question 32 of 90
Question 33 of 90
Question 34 of 90
Question 35 of 90
Question 36 of 90
Question 37 of 90
Question 38 of 90
Question 39 of 90
Question 40 of 90
Question 41 of 90
Question 42 of 90
Question 43

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors


QueryEngine initialized:
  Embedding: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
  Text Generation: google/flan-t5-large
  Database: chroma/all_MiniLM_L12_v2
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90
Question 3 of 90
Question 4 of 90
Question 5 of 90
Question 6 of 90
Question 7 of 90
Question 8 of 90
Question 9 of 90
Question 10 of 90
Question 11 of 90
Question 12 of 90
Question 13 of 90
Question 14 of 90
Question 15 of 90
Question 16 of 90
Question 17 of 90
Question 18 of 90
Question 19 of 90
Question 20 of 90
Question 21 of 90
Question 22 of 90
Question 23 of 90
Question 24 of 90
Question 25 of 90
Question 26 of 90
Question 27 of 90
Question 28 of 90
Question 29 of 90
Question 30 of 90
Question 31 of 90
Question 32 of 90
Question 33 of 90
Question 34 of 90
Question 35 of 90
Question 36 of 90
Question 37 of 90
Question 38 of 90
Question 39 of 90
Question 40 of 90
Question 41 of 90
Question 42 of 90
Question 4

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors


QueryEngine initialized:
  Embedding: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
  Text Generation: google/flan-t5-xl
  Database: chroma/all_MiniLM_L12_v2
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90
Question 3 of 90
Question 4 of 90
Question 5 of 90
Question 6 of 90
Question 7 of 90
Question 8 of 90
Question 9 of 90
Question 10 of 90
Question 11 of 90
Question 12 of 90
Question 13 of 90
Question 14 of 90
Question 15 of 90
Question 16 of 90
Question 17 of 90
Question 18 of 90
Question 19 of 90
Question 20 of 90
Question 21 of 90
Question 22 of 90
Question 23 of 90
Question 24 of 90
Question 25 of 90
Question 26 of 90
Question 27 of 90
Question 28 of 90
Question 29 of 90
Question 30 of 90
Question 31 of 90
Question 32 of 90
Question 33 of 90
Question 34 of 90
Question 35 of 90
Question 36 of 90
Question 37 of 90
Question 38 of 90
Question 39 of 90
Question 40 of 90
Question 41 of 90
Question 42 of 90
Question 43 o

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


QueryEngine initialized:
  Embedding: intfloat/e5-base-v2 (huggingface)
  Text Generation: google/flan-t5-xl
  Database: chroma/e5_base_v2
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90


Token indices sequence length is longer than the specified maximum sequence length for this model (637 > 512). Running this sequence through the model will result in indexing errors


Question 3 of 90
Question 4 of 90
Question 5 of 90
Question 6 of 90
Question 7 of 90
Question 8 of 90
Question 9 of 90
Question 10 of 90
Question 11 of 90
Question 12 of 90
Question 13 of 90
Question 14 of 90
Question 15 of 90
Question 16 of 90
Question 17 of 90
Question 18 of 90
Question 19 of 90
Question 20 of 90
Question 21 of 90
Question 22 of 90
Question 23 of 90
Question 24 of 90
Question 25 of 90
Question 26 of 90
Question 27 of 90
Question 28 of 90
Question 29 of 90
Question 30 of 90
Question 31 of 90
Question 32 of 90
Question 33 of 90
Question 34 of 90
Question 35 of 90
Question 36 of 90
Question 37 of 90
Question 38 of 90
Question 39 of 90
Question 40 of 90
Question 41 of 90
Question 42 of 90
Question 43 of 90
Question 44 of 90
Question 45 of 90
Question 46 of 90
Question 47 of 90
Question 48 of 90
Question 49 of 90
Question 50 of 90
Question 51 of 90
Question 52 of 90
Question 53 of 90
Question 54 of 90
Question 55 of 90
Question 56 of 90
Question 57 of 90
Question 58 of 90

Device set to use cuda:0
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


QueryEngine initialized:
  Embedding: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
  Text Generation: google/flan-t5-small
  Database: chroma/static_retrieval_mrl_en_v1
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
Token indices sequence length is longer than the specified maximum sequence length for this model (660 > 512). Running this sequence through the model will result in indexing errors
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 3 of 90
Question 4 of 90
Question 5 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 6 of 90
Question 7 of 90
Question 8 of 90
Question 9 of 90
Question 10 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 11 of 90
Question 12 of 90
Question 13 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 14 of 90
Question 15 of 90
Question 16 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 17 of 90
Question 18 of 90
Question 19 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 20 of 90
Question 21 of 90
Question 22 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 23 of 90
Question 24 of 90
Question 25 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 26 of 90
Question 27 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 28 of 90
Question 29 of 90
Question 30 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 31 of 90
Question 32 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 33 of 90
Question 34 of 90
Question 35 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 36 of 90
Question 37 of 90
Question 38 of 90
Question 39 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 40 of 90
Question 41 of 90
Question 42 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 43 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 44 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 45 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 46 of 90
Question 47 of 90
Question 48 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 49 of 90
Question 50 of 90
Question 51 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 52 of 90
Question 53 of 90
Question 54 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 55 of 90
Question 56 of 90
Question 57 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 58 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 59 of 90
Question 60 of 90
Question 61 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 62 of 90
Question 63 of 90
Question 64 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 65 of 90
Question 66 of 90
Question 67 of 90
Question 68 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 69 of 90
Question 70 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 71 of 90
Question 72 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 73 of 90
Question 74 of 90
Question 75 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 76 of 90
Question 77 of 90
Question 78 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 79 of 90
Question 80 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 81 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 82 of 90
Question 83 of 90
Question 84 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 85 of 90
Question 86 of 90
Question 87 of 90
Question 88 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 89 of 90
Question 90 of 90

Quiz Summary:
Total Questions: 90
Correct Answers: 48
Accuracy: 53.3%

DETAILED ANALYSIS
Embedding Model: sentence-transformers/static-retrieval-mrl-en-v1
Text Generation Model: google/flan-t5-small
Quiz results saved to quiz_results/static_retrieval_mrl_en_v1--flan_t5_small_quiz_results.json
Using embedding model: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
Using text generation model: google/flan-t5-base
Creating database...


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Initialized huggingface embedding model: sentence-transformers/static-retrieval-mrl-en-v1
Starting data store generation...
Data path: /kaggle/input/text-for-summarizing/books
Persist directory: chroma/static_retrieval_mrl_en_v1
Embedding model: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
Loaded 1 documents from /kaggle/input/text-for-summarizing/books
Split into 462 chunks
Saved 462 chunks to Chroma database at chroma/static_retrieval_mrl_en_v1

✓ Database created successfully!
Running Alice in Wonderland quiz...


Device set to use cuda:0
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
Token indices sequence length is longer than the specified maximum sequence length for this model (703 > 512). Running this sequence through the model will result in indexing errors


QueryEngine initialized:
  Embedding: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
  Text Generation: google/flan-t5-base
  Database: chroma/static_retrieval_mrl_en_v1
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90
Question 3 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 4 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 5 of 90
Question 6 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 7 of 90
Question 8 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 9 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 10 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 11 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 12 of 90
Question 13 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 14 of 90
Question 15 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 16 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 17 of 90
Question 18 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 19 of 90
Question 20 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 21 of 90
Question 22 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 23 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 24 of 90
Question 25 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 26 of 90
Question 27 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 28 of 90
Question 29 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 30 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 31 of 90
Question 32 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 33 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 34 of 90
Question 35 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 36 of 90
Question 37 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 38 of 90
Question 39 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 40 of 90
Question 41 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 42 of 90
Question 43 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 44 of 90
Question 45 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 46 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 47 of 90
Question 48 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 49 of 90
Question 50 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 51 of 90
Question 52 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 53 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 54 of 90
Question 55 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 56 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 57 of 90
Question 58 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 59 of 90
Question 60 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 61 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 62 of 90
Question 63 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 64 of 90
Question 65 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 66 of 90
Question 67 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 68 of 90
Question 69 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 70 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 71 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 72 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 73 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 74 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 75 of 90
Question 76 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 77 of 90
Question 78 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 79 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 80 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 81 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 82 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 83 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 84 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 85 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 86 of 90
Question 87 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 88 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 89 of 90
Question 90 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Total Questions: 90
Correct Answers: 32
Accuracy: 35.6%

DETAILED ANALYSIS
Embedding Model: sentence-transformers/static-retrieval-mrl-en-v1
Text Generation Model: google/flan-t5-base
Quiz results saved to quiz_results/static_retrieval_mrl_en_v1--flan_t5_base_quiz_results.json
Using embedding model: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
Using text generation model: google/flan-t5-large
Creating database...
Initialized huggingface embedding model: sentence-transformers/static-retrieval-mrl-en-v1
Starting data store generation...
Data path: /kaggle/input/text-for-summarizing/books
Persist directory: chroma/static_retrieval_mrl_en_v1
Embedding model: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
Loaded 1 documents from /kaggle/input/text-for-summarizing/books
Split into 462 chunks
Saved 462 chunks to Chroma database at chroma/static_retrieval_mrl_en_v1

✓ Database created successfully!
Running Alice in Wonderland quiz...


Device set to use cuda:0
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors


QueryEngine initialized:
  Embedding: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
  Text Generation: google/flan-t5-large
  Database: chroma/static_retrieval_mrl_en_v1
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90
Question 2 of 90
Question 3 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 4 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 5 of 90
Question 6 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 7 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 8 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 9 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 10 of 90
Question 11 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 12 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 13 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 14 of 90
Question 15 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 16 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 17 of 90
Question 18 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 19 of 90
Question 20 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 21 of 90
Question 22 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 23 of 90
Question 24 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 25 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 26 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 27 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 28 of 90
Question 29 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 30 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 31 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 32 of 90
Question 33 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 34 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 35 of 90
Question 36 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 37 of 90
Question 38 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 39 of 90
Question 40 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 41 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 42 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 43 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 44 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 45 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 46 of 90
Question 47 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 48 of 90
Question 49 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 50 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 51 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 52 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 53 of 90
Question 54 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 55 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 56 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 57 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 58 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 59 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 60 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 61 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 62 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 63 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 64 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 65 of 90
Question 66 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 67 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 68 of 90
Question 69 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 70 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 71 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 72 of 90
Question 73 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 74 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 75 of 90
Question 76 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 77 of 90
Question 78 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 79 of 90
Question 80 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 81 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 82 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 83 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 84 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 85 of 90
Question 86 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 87 of 90
Question 88 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 89 of 90
Question 90 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Total Questions: 90
Correct Answers: 48
Accuracy: 53.3%

DETAILED ANALYSIS
Embedding Model: sentence-transformers/static-retrieval-mrl-en-v1
Text Generation Model: google/flan-t5-large
Quiz results saved to quiz_results/static_retrieval_mrl_en_v1--flan_t5_large_quiz_results.json
Using embedding model: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
Using text generation model: google/flan-t5-xl
Creating database...
Initialized huggingface embedding model: sentence-transformers/static-retrieval-mrl-en-v1
Starting data store generation...
Data path: /kaggle/input/text-for-summarizing/books
Persist directory: chroma/static_retrieval_mrl_en_v1
Embedding model: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
Loaded 1 documents from /kaggle/input/text-for-summarizing/books
Split into 462 chunks
Saved 462 chunks to Chroma database at chroma/static_retrieval_mrl_en_v1

✓ Database created successfully!
Running Alice in Wonderland quiz...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


QueryEngine initialized:
  Embedding: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
  Text Generation: google/flan-t5-xl
  Database: chroma/static_retrieval_mrl_en_v1
Loaded 90 questions from /kaggle/input/test-questions/test_questions.json
Question 1 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
Token indices sequence length is longer than the specified maximum sequence length for this model (619 > 512). Running this sequence through the model will result in indexing errors


Question 2 of 90
Question 3 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 4 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 5 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 6 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 7 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 8 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 9 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 10 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 11 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 12 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 13 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 14 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 15 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 16 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 17 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 18 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 19 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 20 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 21 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 22 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 23 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 24 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 25 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 26 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 27 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 28 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 29 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 30 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 31 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 32 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 33 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 34 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 35 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 36 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 37 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 38 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 39 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 40 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 41 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 42 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 43 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 44 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 45 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 46 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 47 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 48 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 49 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 50 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 51 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 52 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 53 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 54 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 55 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 56 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 57 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 58 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 59 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 60 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 61 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 62 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 63 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 64 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 65 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 66 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 67 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 68 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 69 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 70 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 71 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 72 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 73 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 74 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 75 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 76 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 77 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 78 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 79 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 80 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 81 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 82 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 83 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 84 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 85 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 86 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 87 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 88 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 89 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)


Question 90 of 90


  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Total Questions: 90
Correct Answers: 36
Accuracy: 40.0%

DETAILED ANALYSIS
Embedding Model: sentence-transformers/static-retrieval-mrl-en-v1
Text Generation Model: google/flan-t5-xl
Quiz results saved to quiz_results/static_retrieval_mrl_en_v1--flan_t5_xl_quiz_results.json
