In [1]:
# Cell 1: Core LangChain
!pip install -q langchain
!pip install torch gc

# Cell 2: LangChain integrations
!pip install -q langchain-community langchain-huggingface langchain-chroma langchain_google_genai langchain_experimental

# Cell 3: ML libraries
!pip install -q sentence-transformers transformers chromadb

# Cell 4: Utilities
!pip install -q python-dotenv torch unstructured

[31mERROR: Could not find a version that satisfies the requirement gc (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for gc[0m[31m
[0m

In [2]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings  
from langchain_experimental.text_splitter import SemanticChunker
import os
import shutil

import json
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline
import os
from pathlib import Path
import torch
import gc
import warnings

2025-08-18 23:58:42.403998: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755561522.427114     192 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755561522.434367     192 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
class DatabaseManager:
    def __init__(self, embedding_model_name="sentence-transformers/all-MiniLM-L12-v2", 
                 embedding_model_type="huggingface"):
        """
        Initialize DatabaseManager with specified embedding model.
        
        Args:
            embedding_model_name: Name of the embedding model
            embedding_model_type: Type of model ("huggingface" or "gemini")
        """
        self.embedding_model_name = embedding_model_name
        self.embedding_model_type = embedding_model_type
        
        # Initialize embedding function based on type
        if embedding_model_type == "gemini":
            api_key = os.getenv("GEMINI_API_KEY")  # Changed from GEMINI_API_KEY
            if not api_key:
                raise ValueError("GEMINI_API_KEY environment variable is required for Gemini models")
            
            # Extract model name (remove 'gemini/' prefix)
            model_name = self.embedding_model_name.replace("gemini/", "")
            self.embedding_function = GoogleGenerativeAIEmbeddings(
                model=model_name,
                google_api_key=api_key
            )
        elif embedding_model_type == "huggingface":
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        else:  # huggingface
            embedding_model_type == "huggingface"
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        
        print(f"Initialized {embedding_model_type} embedding model: {embedding_model_name}")

    # Rest of your DatabaseManager methods remain the same...
    def load_documents(self, data_path):
        """Load documents from the specified directory."""
        try:
            loader = DirectoryLoader(data_path, glob="*.md")
            documents = loader.load()
            # print(f"Loaded {len(documents)} documents from {data_path}")
            return documents
        except Exception as e:
            print(f"Error loading documents: {e}")
            return []

    def split_text(self, documents):
        """Split documents into chunks."""
        try:
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=500,
                chunk_overlap=150,
                length_function=len,
                add_start_index=True,
            )
            chunks = text_splitter.split_documents(documents)
            # print(f"Split into {len(chunks)} chunks")
            return chunks
        except Exception as e:
            print(f"Error splitting text: {e}")
            return []

    def save_to_chroma(self, chunks, persist_directory):
        """Save document chunks to Chroma database."""
        try:
            # Create directory if it doesn't exist
            os.makedirs(persist_directory, exist_ok=True)
            
            db = Chroma.from_documents(
                chunks, 
                self.embedding_function, 
                persist_directory=persist_directory
            )
            # print(f"Saved {len(chunks)} chunks to Chroma database at {persist_directory}")
            return db
        except Exception as e:
            print(f"Error saving to Chroma: {e}")
            return None

    def generate_data_store(self, data_path="books", persist_directory="chroma"):
        """Complete pipeline: load documents, split text, and save to database."""
        # print(f"Starting data store generation...")
        # print(f"Data path: {data_path}")
        # print(f"Persist directory: {persist_directory}")
        print(f"Embedding model: {self.embedding_model_name} ({self.embedding_model_type})")
        
        # Load documents
        documents = self.load_documents(data_path)
        if not documents:
            return False
        
        # Split into chunks
        chunks = self.split_text(documents)
        if not chunks:
            return False
        
        # Save to database
        db = self.save_to_chroma(chunks, persist_directory)
        return db is not None

In [4]:
class QueryEngine:
    def __init__(self, persist_directory="chroma", 
                 embedding_model_name="sentence-transformers/all-MiniLM-L12-v2",
                 embedding_model_type="huggingface",
                 text_model_name="google/flan-t5-base"):
        """
        Initialize QueryEngine with specified models.
        
        Args:
            persist_directory: Path to the Chroma database
            embedding_model_name: Name of the embedding model
            embedding_model_type: Type of embedding model ("huggingface" or "gemini")
            text_model_name: Name of the text generation model
        """
        self.persist_directory = persist_directory
        self.embedding_model_name = embedding_model_name
        self.embedding_model_type = embedding_model_type
        self.text_model_name = text_model_name
        
        # Initialize embedding function based on type
        if embedding_model_type == "gemini":
            api_key = os.getenv("GEMINI_API_KEY")  # Changed from GEMINI_API_KEY
            if not api_key:
                raise ValueError("GEMINI_API_KEY environment variable is required for Gemini models")
            
            # Extract model name (remove 'gemini/' prefix)
            model_name = self.embedding_model_name.replace("gemini/", "")
            self.embedding_function = GoogleGenerativeAIEmbeddings(
                model=model_name,
                google_api_key=api_key
            )
        elif embedding_model_type == "huggingface":
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        else:  # huggingface
            embedding_model_type == "huggingface"
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        
        # Initialize text generation model
        if self.text_model_name.startswith("google/flan"):
            self.hf_pipeline = pipeline(
                "text2text-generation",
                model=self.text_model_name,
                max_length=512,
            )
        elif self.text_model_name.startswith("mistralai/"):
            self.hf_pipeline = pipeline(
                "text-generation",  # Mistral uses text-generation
                model=self.text_model_name,
                max_new_tokens=100,     # Limit output length
                do_sample=True,
                temperature=0.3,        # Lower temp for more focused answers
                pad_token_id=2,         # Mistral's pad token
                truncation=True,
                return_full_text=False, # Only return generated text
                device=-1               # Force CPU for stability
            )
        elif self.text_model_name.startswith("gpt") or self.text_model_name.startswith("distilgpt"):
            # Special handling for GPT-2 models to fix the token length issue
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=50,         # Generate only 50 new tokens
                do_sample=True,
                temperature=0.7,
                pad_token_id=50256,
                truncation=True,           # Truncate long inputs
                return_full_text=False,    # Only return generated text, not input
                device=-1                  # Force CPU for stability
            )
        else:
            self.text_model_name = "google/flan-t5-large"
            self.hf_pipeline = pipeline(
                "text2text-generation",
                model=self.text_model_name,
                max_length=512,
            )
        
        self.model = HuggingFacePipeline(pipeline=self.hf_pipeline)
        
        # Initialize database
        self.db = Chroma(persist_directory=persist_directory, 
                        embedding_function=self.embedding_function)
    
        self.PROMPT_TEMPLATE = """
        You are answering questions about Alice in Wonderland based on the provided context.

        CONTEXT:
        {context}
        
        QUESTION: {question}
        
        OPTIONS:
        {options}
        
        INSTRUCTIONS:
        - Read the context carefully
        - Answer based ONLY on the information provided in the context.
        - Respond with ONLY the letter (A, B, C, or D) of the correct answer
        - Do not include explanations or sources
        """
            # """
            # Answer the question based only on the following context:

            # {context}

            # ---

            # Answer the question based on the above context: {question}
            # here are the options:
            # {options}

            # Respond only the Letter of the correct options like A, B, C and D. Do not inlcude the source.
            # """
        # prompt 2: 

        # prompt 3: 
        # """
        # <s>[INST] You are answering questions about Alice in Wonderland. 

        # Context: {context_text}
        # Question: {question}
        # Options: {options_text}
        
        # INSTRUCTIONS:
        # - Read the context carefully
        # - Answer based ONLY on the information provided in the context.
        # - Respond with ONLY the letter (A, B, C, or D) of the correct answer
        # - Do not include explanations or sources
        # [/INST]"""
        
        # print(f"QueryEngine initialized:")
        # print(f"  Embedding: {embedding_model_name} ({embedding_model_type})")
        # print(f"  Text Generation: {text_model_name}")
        # print(f"  Database: {persist_directory}")
        print(f"Initialized {embedding_model_type} embedding model: {embedding_model_name} with chat model : {text_model_name}")

    # Rest of your QueryEngine methods remain the same...
    
    def load_quiz_data(self, quiz_file_path='test_questions.json'):
        """Load quiz data from JSON file."""
        try:
            with open(quiz_file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                # print(f"Loaded {len(data)} questions from {quiz_file_path}")
                return data
        except FileNotFoundError:
            print(f"Error: {quiz_file_path} file not found!")
            return []
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
            return []
 
    def semantic_search_database(self, query, k=5):
        """Search the database for relevant documents."""
        if self.db is None:
            return []
        
        try:
            results = self.db.similarity_search_with_relevance_scores(query, k=k)
            return results
        except Exception as e:
            print(f"Error searching database: {e}")
            return []
    
    def filter_response(self, response):
        edit_response = response.replace('-', '').strip()
        return edit_response

    def generate_response(self, question, options, context_text):
        """Generate a response using the LLM."""
        # Format the prompt
        options_text = "\n".join(options) if isinstance(options, list) else str(options)
        prompt = self.PROMPT_TEMPLATE.format(
            context=context_text, 
            question=question, 
            options=options_text
        )
        
        try:
            # Use the HuggingFace model to generate response
            response_text = self.model.invoke(prompt)
            response_text = self.filter_response(response_text)
            return response_text
        except Exception as e:
            print(f"Error generating response: {e}")
            return "Error generating response."
    
    def query_single_question(self, question, options=None, show_context=False):
        """Query a single question and return the response."""
        # Search the database
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
        results = self.semantic_search_database(question, k=5)
        
        if not results:
            return {
                'question': question,
                'response': 'No relevant context found.',
                'context': '',
                'sources': []
            }
        
        # Prepare context from search results
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
        # sources = [doc.metadata.get("source", "Unknown") for doc, _score in results]
        sources = [(_score, doc.metadata.get("source", "Unknown"), doc.page_content) for doc, _score in results]
        all_scores = [_score for doc, _score in results]
        avg = sum(all_scores) / len(all_scores) if all_scores else 0

        
        # Generate response
        response_text = self.generate_response(question, options or [], context_text)
        
        result = {
            'question': question,
            'response': response_text.replace('-', '').strip(),
            'sources': sources,
            "avg relevance sources" : avg
        }
        
        if show_context:
            result['context'] = context_text
        
        return result
    
    def run_quiz(self, quiz_file_path='test_questions.json', show_details=False, limit=None):
        """Run the complete quiz and return results."""
        # Load quiz data
        quiz_data = self.load_quiz_data(quiz_file_path)
        
        if not quiz_data:
            print(f"No quiz data loaded. quiz_file_path = {quiz_file_path} Exiting.")
            return []
        
        # Limit questions if specified
        if limit:
            quiz_data = quiz_data[:limit]
            # print(f"Running quiz with {limit} questions.")
        
        results = []
        correct_count = 0
        
        for i, question_data in enumerate(quiz_data, 1):
            # print(f"Question {i} of {len(quiz_data)}")
            
            question_id = question_data.get("id", i)
            question = question_data["question"]
            options = question_data["options"]
            correct_answer = question_data["answer"]
            
            # Query the database and generate response
            result = self.query_single_question(question, options, show_context=False)
            
            # Add quiz-specific information
            result.update({
                'id': question_id,
                'options': options,
                'correct_answer': correct_answer,
                'response' : result['response'],
                'is_correct': result['response'].strip().upper() == correct_answer.upper()
            })

            if result["is_correct"] == False and len(result["response"]) != 1:
                if result["correct_answer"].upper().strip() == "A":
                    alternate_correct_answer = result["options"][0][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "B":
                    alternate_correct_answer = result["options"][1][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "C":
                    alternate_correct_answer = result["options"][2][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "D":
                    alternate_correct_answer = result["options"][3][4:].replace('-', '').strip()
                else:
                    alternate_correct_answer = ""

                if alternate_correct_answer.upper() == result["response"].upper():
                    result["is_correct"] = True
                else:
                    if result["response"].upper().startswith(alternate_correct_answer.upper()):
                        result["response"] = alternate_correct_answer
                        result["is_correct"] = True
                    else:
                        result["is_correct"] = False

            if result['is_correct']:
                correct_count += 1
            
            results.append(result)
            
            # Show details if requested
            if show_details:
                print("=" * 50)
                print(f"Question {question_id}: {question}")
                for j, option in enumerate(options, 1):
                    print(f"  {option}")
                print(f"AI Response: {result['response']}")
                print(f"Correct Answer: {correct_answer}")
                print(f"Result: {'✓ Correct' if result['is_correct'] else '✗ Incorrect'}")
                print()
        
        # Summary
        accuracy = (correct_count / len(quiz_data)) * 100 if quiz_data else 0
        print(f"\nQuiz Summary:")
        print(f"Correct Answers: {correct_count} / {len(quiz_data)}. Accuracy: {accuracy:.1f}%")
        
        return results
    
    def set_prompt_template(self, new_template):
        """Set a custom prompt template."""
        self.PROMPT_TEMPLATE = new_template


In [5]:
EMBEDDING_MODEL_OPTIONS = [
    "sentence-transformers/all-MiniLM-L6-v2", # success
    "sentence-transformers/all-mpnet-base-v2", # success
    "BAAI/bge-m3",
    "BAAI/bge-large-en", # success
    "BAAI/bge-base-en-v1.5",
    "BAAI/bge-large-en-v1.5",
    "intfloat/e5-base-v2", # success
    "sentence-transformers/static-retrieval-mrl-en-v1", # success
    "sentence-transformers/all-MiniLM-L12-v2", # success # best one so far
    # "gemini/embedding-001",       # Older Gemini model # horrible
    # "gemini/text-embedding-005",  # New Gemini model
    "nomic-ai/nomic-embed-text-v1.5",
    "sentence-transformers/multi-qa-mpnet-base-dot-v1",
    "sentence-transformers/multi-qa-mpnet-base-cos-v1",
    "hkunlp/instructor-large",
    "hkunlp/instructor-xl"
]

TEXT_GENERATION_MODEL_OPTIONS = [
    "google/flan-t5-small",
    "google/flan-t5-base", # have been using this for default development testing
    "google/flan-t5-large",
    "google/flan-t5-xl",
    "tiiuae/falcon-7b",
    "tiiuae/Falcon-H1-0.5B-Instruct",
    "tiiuae/falcon-7b-instruct",
    # "mistralai/Mistral-7B-Instruct-v0.3", # not free
    # "mistralai/Mistral-7B-Instruct-v0.2", # not free
    # "mistralai/Mistral-Small-3.2-24B-Instruct-2506", # not free
    "HuggingFaceH4/zephyr-7b-beta",
    "google/gemma-3-1b-it",
    "google/gemma-2-2b",
    "google/gemma-2-2b-it",
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "meta-llama/Llama-3.2-1B",
    # "gpt2",
    # "distilgpt2",
]

# Fixed model types to match all embedding models (all are HuggingFace)
EMBEDDING_MODEL_TYPES = [
    "huggingface",  # 0 - all-MiniLM-L6-v2
    "huggingface",  # 1 - all-mpnet-base-v2
    "huggingface",  # 2 - bge-m3
    "huggingface",  # 3 - bge-large-en
    "huggingface",  # 4 - bge-base-en-v1.5
    "huggingface",  # 5 - bge-large-en-v1.5
    "huggingface",  # 6 - e5-base-v2 (Fixed from "gemini")
    "huggingface",  # 7 - static-retrieval-mrl-en-v1
    "huggingface",  # 8 - all-MiniLM-L12-v2 (Your best!)
    "huggingface",  # 9 - nomic-embed-text-v1.5
    "huggingface",  # 10 - multi-qa-mpnet-base-dot-v1
    "huggingface",  # 11 - multi-qa-mpnet-base-cos-v1
    "huggingface",  # 12 - instructor-large
    "huggingface",  # 13 - instructor-xl
]

In [6]:
def list_models():
    """List all available models."""
    print("Available Embedding Models:")
    for i, (model, model_type) in enumerate(zip(EMBEDDING_MODEL_OPTIONS, EMBEDDING_MODEL_TYPES)):
        print(f"  {i}: {model} ({model_type})")
    
    print("\nAvailable Text Generation Models:")
    for i, model in enumerate(TEXT_GENERATION_MODEL_OPTIONS):
        print(f"  {i}: {model}")
        
def clear_cuda_memory():
    """Clear CUDA memory and run garbage collection."""
    
    
    if torch.cuda.is_available():
        # print("🧹 Clearing CUDA memory...")
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
        # print(f"💾 GPU Memory before cleanup: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
        
        # Force garbage collection
        gc.collect()
        
        # Clear cache again after garbage collection
        torch.cuda.empty_cache()
        
        # print(f"✅ GPU Memory after cleanup: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
        # print(f"📊 GPU Memory cached: {torch.cuda.memory_reserved()/1024**3:.2f} GB")
    else:
        # print("🖥️  No CUDA device available - running on CPU")
        # Still run garbage collection for CPU
        gc.collect()
        
def main(mode="create", embedding_model_index=0, text_generation_model_index=-1, save_result=1):
    clear_cuda_memory()
    embedding_model_index = embedding_model_index
    
    raw_knowledge_directory = "/kaggle/input/text-for-summarizing/books"
    test_questions_directory = "/kaggle/input/test-questions/test_questions.json"
    
    os.makedirs("chroma", exist_ok=True)
    os.makedirs("quiz_results", exist_ok=True)
    
    # Get selected models
    embedding_model = EMBEDDING_MODEL_OPTIONS[embedding_model_index]
    embedding_model_type = EMBEDDING_MODEL_TYPES[embedding_model_index]
    
    db_data_path = f"chroma/{embedding_model.split('/')[-1].replace('/', '_').replace('-', '_')}"
    print(f"Using embedding model: {embedding_model} ({embedding_model_type})")
    
    if text_generation_model_index != -1:
        text_model = TEXT_GENERATION_MODEL_OPTIONS[text_generation_model_index]
        result_file_path = f"quiz_results/{embedding_model.split('/')[-1].replace('/', '_').replace('-', '_')}--{text_model.split('/')[-1].replace('/', '_').replace('-', '_')}_quiz_results.json"
        print(f"Using text generation model: {text_model}")
    
    
    
    
    def create_mode():
        db_manager = DatabaseManager(embedding_model_name=embedding_model, 
                                   embedding_model_type=embedding_model_type)
        success = db_manager.generate_data_store(data_path=raw_knowledge_directory, 
                                                persist_directory=db_data_path)

    def quiz_mode():
        print("Running Alice in Wonderland quiz...")
        query_engine = QueryEngine(persist_directory=db_data_path,
                                 embedding_model_name=embedding_model,
                                 embedding_model_type=embedding_model_type,
                                 text_model_name=text_model)
        
        # Run the quiz
        results = query_engine.run_quiz(test_questions_directory)
        
        # Rest of quiz_mode code remains the same...
        if results:
            print("\n" + "="*50)
            # print("DETAILED ANALYSIS")
            # print("="*50)
            # print("Embedding Model:", embedding_model)
            # print("Text Generation Model:", text_model)
    
            # correct_results = [r for r in results if r['is_correct']]
            # incorrect_results = [r for r in results if not r['is_correct']]
            
            # if incorrect_results:
            #     print(f"\nIncorrect answers ({len(incorrect_results)}):")
            #     for result in incorrect_results:
            #         print(f"Q{result['id']}: Expected {result['correct_answer']}, got {result['response']}")
            
            # if correct_results:
            #     print(f"\nCorrect answers: {len(correct_results)}")

            if save_result==1:
                with open(result_file_path, "w") as f:
                    json.dump(results, f, indent=4)
            # print(f"Quiz results saved to {result_file_path}")
    if mode=="create":
        create_mode()
    elif mode=="quiz":
        quiz_mode()
    clear_cuda_memory()
def clear_specific_model_cache(model_name):
    """Clear cache for a specific model."""
    import shutil
    from pathlib import Path
    
    # Convert model name to cache-safe format
    safe_model_name = model_name.replace("/", "--")
    
    cache_locations = [
        Path.home() / ".cache" / "huggingface" / "hub",
        Path.home() / ".cache" / "huggingface" / "transformers",
    ]
    
    for cache_dir in cache_locations:
        if cache_dir.exists():
            # Look for directories containing the model name
            for model_cache in cache_dir.glob(f"*{safe_model_name}*"):
                try:
                    if model_cache.is_dir():
                        size = sum(f.stat().st_size for f in model_cache.rglob('*') if f.is_file()) / (1024**2)
                        shutil.rmtree(model_cache)
                        print(f"🗑️ Cleared {model_name} cache: {size:.1f} MB freed")
                except Exception as e:
                    print(f"❌ Could not clear {model_cache}: {e}")
                    continue
                    
def run_mains(test_embedding_models=[], test_text_generation_models=[]):
    for embedding_model_index in test_embedding_models:
        try:
            main(mode="create", embedding_model_index=embedding_model_index, text_generation_model_index=0)
        except:
            print(f"failed to create db with {EMBEDDING_MODEL_OPTIONS[embedding_model_index]}")
            continue
        for text_generation_model_index in test_text_generation_models:
            try: 
                main(mode="quiz", embedding_model_index=embedding_model_index, text_generation_model_index=text_generation_model_index)
                print(f"")
                # clear_specific_model_cache(EMBEDDING_MODEL_OPTIONS[embedding_model_index])
            except:
                print(f"failed to run quiz with {EMBEDDING_MODEL_OPTIONS[embedding_model_index]} and {TEXT_GENERATION_MODEL_OPTIONS[text_generation_model_index]}")
                continue
        if os.path.exists("chroma"):
            if os.path.exists("chroma"):
                shutil.rmtree("chroma")


In [7]:
list_models()

Available Embedding Models:
  0: sentence-transformers/all-MiniLM-L6-v2 (huggingface)
  1: sentence-transformers/all-mpnet-base-v2 (huggingface)
  2: BAAI/bge-m3 (huggingface)
  3: BAAI/bge-large-en (huggingface)
  4: BAAI/bge-base-en-v1.5 (huggingface)
  5: BAAI/bge-large-en-v1.5 (huggingface)
  6: intfloat/e5-base-v2 (huggingface)
  7: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
  8: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
  9: nomic-ai/nomic-embed-text-v1.5 (huggingface)
  10: sentence-transformers/multi-qa-mpnet-base-dot-v1 (huggingface)
  11: sentence-transformers/multi-qa-mpnet-base-cos-v1 (huggingface)
  12: hkunlp/instructor-large (huggingface)
  13: hkunlp/instructor-xl (huggingface)

Available Text Generation Models:
  0: google/flan-t5-small
  1: google/flan-t5-base
  2: google/flan-t5-large
  3: google/flan-t5-xl
  4: tiiuae/falcon-7b
  5: tiiuae/Falcon-H1-0.5B-Instruct
  6: tiiuae/falcon-7b-instruct
  7: HuggingFaceH4/zephyr-7b-beta
  8: go

In [None]:
run_mains(
    test_embedding_models=[i for i in range(len(EMBEDDING_MODEL_OPTIONS))],
    test_text_generation_models=[i for i in range(len(TEXT_GENERATION_MODEL_OPTIONS))]
)



Using embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface)
Using text generation model: google/flan-t5-small


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Initialized huggingface embedding model: sentence-transformers/all-MiniLM-L6-v2
Embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface)
failed to create db with sentence-transformers/all-MiniLM-L6-v2
Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: google/flan-t5-small


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2
Embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: google/flan-t5-small
Running Alice in Wonderland quiz...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors


Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2 with chat model : google/flan-t5-small


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Correct Answers: 9 / 90. Accuracy: 10.0%


Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: google/flan-t5-base
Running Alice in Wonderland quiz...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors


Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2 with chat model : google/flan-t5-base


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Correct Answers: 35 / 90. Accuracy: 38.9%


Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: google/flan-t5-large
Running Alice in Wonderland quiz...


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors


Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2 with chat model : google/flan-t5-large


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Correct Answers: 42 / 90. Accuracy: 46.7%


Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: google/flan-t5-xl
Running Alice in Wonderland quiz...


config.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors


Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2 with chat model : google/flan-t5-xl


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Correct Answers: 62 / 90. Accuracy: 68.9%


Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: tiiuae/falcon-7b
Running Alice in Wonderland quiz...


Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors


Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2 with chat model : tiiuae/falcon-7b


  results = self.db.similarity_search_with_relevance_scores(query, k=k)
  results = self.db.similarity_search_with_relevance_scores(query, k=k)



Quiz Summary:
Correct Answers: 42 / 90. Accuracy: 46.7%


Using embedding model: sentence-transformers/all-mpnet-base-v2 (huggingface)
Using text generation model: tiiuae/Falcon-H1-0.5B-Instruct
Running Alice in Wonderland quiz...


Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors


Initialized huggingface embedding model: sentence-transformers/all-mpnet-base-v2 with chat model : tiiuae/Falcon-H1-0.5B-Instruct
failed to run quiz with sentence-transformers/all-mpnet-base-v2 and tiiuae/Falcon-H1-0.5B-Instruct


In [None]:
model_response_directory = f"/kaggle/working/quiz_results"
for model_response_fp in os.listdir(model_response_directory):
    avg_relevance_sources = []
    count = 0
    num_questions = 0
    with open(os.path.join(model_response_directory, model_response_fp), "r") as f:
        model_responses = json.load(f)
        for response in model_responses:
            if response["is_correct"] == True:
                count += 1
            num_questions += 1
            avg_relevance_sources.append(response["avg relevance sources"])
    print(f"Model: {model_response_fp}, Correct: {count}/{num_questions}, Avg Relevance: {sum(avg_relevance_sources) / len(avg_relevance_sources) if avg_relevance_sources else 0}")
                