In [None]:
# # Cell 1: Core LangChain
# !pip install -q langchain
# !pip install torch
# !pip install gc

# # Cell 2: LangChain integrations
# !pip install -q langchain-community langchain-huggingface langchain-chroma langchain_experimental
# # !pip install -q langchain_google_genai
# !pip install -q huggingface_hub
# # Cell 3: ML libraries
# !pip install -q sentence-transformers transformers chromadb

# # Cell 4: Utilities
# !pip install -q python-dotenv torch unstructured
# !pip install -q hf_xet

In [None]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
# from langchain_google_genai import GoogleGenerativeAIEmbeddings  
from langchain_experimental.text_splitter import SemanticChunker
import os
import shutil

import json
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline
from pathlib import Path
import torch
import gc
import warnings
from huggingface_hub import login
from dotenv import load_dotenv
import datetime
from transformers.utils import logging
logging.set_verbosity_error()
logging.disable_progress_bar()

# os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
# os.environ["TOKENIZERS_PARALLELISM"] = "false"
# warnings.filterwarnings("ignore", category=FutureWarning)
# warnings.filterwarnings("ignore", category=UserWarning)

# # Add this to your existing imports cell
# from transformers import logging
# logging.set_verbosity_error()  # Only show errors, not info/warnings

In [8]:
load_dotenv()
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
HUGGING_FACE_API = os.getenv("HUGGING_FACE_TOKEN")
login(token=HUGGING_FACE_API)
model_response_directory = f"quiz_results"
raw_knowledge_directory = f"books"
test_questions_directory = f"test_questions.json"
persist_filepath = f"model_results.json"

In [9]:
class DatabaseManager:
    def __init__(self, embedding_model_name="sentence-transformers/all-MiniLM-L12-v2", 
                 embedding_model_type="huggingface"):
        """
        Initialize DatabaseManager with specified embedding model.
        
        Args:
            embedding_model_name: Name of the embedding model
            embedding_model_type: Type of model ("huggingface" or "gemini")
        """
        self.embedding_model_name = embedding_model_name
        self.embedding_model_type = embedding_model_type
        
        # Initialize embedding function based on type
        # if embedding_model_type == "gemini":
        #     api_key = os.getenv("GEMINI_API_KEY")  # Changed from GEMINI_API_KEY
        #     if not api_key:
        #         raise ValueError("GEMINI_API_KEY environment variable is required for Gemini models")
            
        #     # Extract model name (remove 'gemini/' prefix)
        #     model_name = self.embedding_model_name.replace("gemini/", "")
        #     self.embedding_function = GoogleGenerativeAIEmbeddings(
        #         model=model_name,
        #         google_api_key=api_key
        #     )
        # el
        if embedding_model_type == "huggingface":
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        else:  # huggingface
            self.embedding_model_type == "huggingface"
            print(f"{embedding_model_type} embedding_model_type not recognized. Using {self.embedding_model_type}")
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
            
        print(f"Initialized DatabaseManager: Embedding embedding model: {self.embedding_model_name} ({self.embedding_model_type})")
        
    # Rest of your DatabaseManager methods remain the same...
    def load_documents(self, data_path):
        """Load documents from the specified directory."""
        try:
            loader = DirectoryLoader(data_path, glob="*.md")
            documents = loader.load()
            # print(f"Loaded {len(documents)} documents from {data_path}")
            return documents
        except Exception as e:
            print(f"Error loading documents: {e}")
            return []

    def split_text(self, documents):
        """Split documents into chunks."""
        try:
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=500,
                chunk_overlap=150,
                length_function=len,
                add_start_index=True,
            )
            chunks = text_splitter.split_documents(documents)
            # print(f"Split into {len(chunks)} chunks")
            return chunks
        except Exception as e:
            print(f"Error splitting text: {e}")
            return []

    def save_to_chroma(self, chunks, persist_directory):
        """Save document chunks to Chroma database."""
        # try:
        #     existing_db = Chroma(persist_directory=persist_directory, embedding_function=self.embedding_function)
        #     existing_db.delete_collection()
        # except Exception as e:
        #     pass
            
        try:
            import time
            for i in range(3):
                try:
                    # Create directory if it doesn't exist
                    if os.path.exists(persist_directory):
                        print(f"attempt {i+1}: trying to remove {persist_directory}")
                        shutil.rmtree(persist_directory)
                    break
                except Exception as e:
                    print(f"error removing {persist_directory}: {e}")
                    if i < 2:
                        gc.collect()
                        time.sleep(1)
                        
                        
            # if os.path.exists(persist_directory):
            #     print(f"path exist: {persist_directory}")
            # else:
            #     print(f"path dont exist: {persist_directory}")
                
            print(f"making directory: {persist_directory}")
            os.makedirs(persist_directory, exist_ok=True)
            
            db = Chroma.from_documents(
                chunks, 
                self.embedding_function, 
                persist_directory=persist_directory
            )
            # print(f"Saved {len(chunks)} chunks to Chroma database at {persist_directory}")
            return db
        except Exception as e:
            print(f"Error saving to Chroma: {e}")
            return None

    def generate_data_store(self, data_path="books", persist_directory="chroma"):
        """Complete pipeline: load documents, split text, and save to database."""
        
        # Load documents
        documents = self.load_documents(data_path)
        if not documents:
            return False
        
        # Split into chunks
        chunks = self.split_text(documents)
        if not chunks:
            return False
        
        # Save to database
        db = self.save_to_chroma(chunks, persist_directory)
        return db is not None

In [10]:
class QueryEngine:
    def __init__(self, persist_directory="chroma", 
                 embedding_model_name="sentence-transformers/all-MiniLM-L12-v2",
                 embedding_model_type="huggingface",
                 text_model_name="google/flan-t5-base"):
        """
        Initialize QueryEngine with specified models.
        
        Args:
            persist_directory: Path to the Chroma database
            embedding_model_name: Name of the embedding model
            embedding_model_type: Type of embedding model ("huggingface" or "gemini")
            text_model_name: Name of the text generation model
        """
        self.persist_directory = persist_directory
        self.embedding_model_name = embedding_model_name
        self.embedding_model_type = embedding_model_type
        self.text_model_name = text_model_name
        
        # Initialize embedding function based on type
        # if embedding_model_type == "gemini":
        #     api_key = os.getenv("GEMINI_API_KEY")  # Changed from GEMINI_API_KEY
        #     if not api_key:
        #         raise ValueError("GEMINI_API_KEY environment variable is required for Gemini models")
            
        #     # Extract model name (remove 'gemini/' prefix)
        #     model_name = self.embedding_model_name.replace("gemini/", "")
        #     self.embedding_function = GoogleGenerativeAIEmbeddings(
        #         model=model_name,
        #         google_api_key=api_key
        #     )
        # el
        if embedding_model_type == "huggingface":
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        else:  # huggingface
            self.embedding_model_type == "huggingface"
            print(f"{embedding_model_type} embedding_model_type not recognized. Using {self.embedding_model_type}")
            self.embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_name)
        
        # Initialize text generation model
        if self.text_model_name.startswith("google/flan"):
            self.hf_pipeline = pipeline(
                "text2text-generation",
                model=self.text_model_name,
                max_length=768,
                max_new_tokens=100,
            )
        elif self.text_model_name.startswith("mistralai/"):
            self.hf_pipeline = pipeline(
                "text-generation",  # Mistral uses text-generation
                model=self.text_model_name,
                max_new_tokens=100,     # Limit output length
                do_sample=True,
                temperature=0.3,        # Lower temp for more focused answers
                pad_token_id=2,         # Mistral's pad token
                return_full_text=False, # Only return generated text
            )
        elif self.text_model_name.startswith("gpt") or self.text_model_name.startswith("distilgpt"):
            # Special handling for GPT-2 models to fix the token length issue
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=50,         # Generate only 50 new tokens
                do_sample=True,
                temperature=0.7,
                pad_token_id=50256,
                return_full_text=False,    # Only return generated text, not input
            )
        elif "falcon" in self.text_model_name.lower():
            # 🔧 ADD: Handle Falcon models
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=100,
                do_sample=True,
                temperature=0.3,
                return_full_text=False,
                trust_remote_code=True  # Falcon needs this
            )
        elif "zephyr" in self.text_model_name.lower():
            # 🔧 ADD: Handle Zephyr models
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=100,
                do_sample=True,
                temperature=0.3,
                return_full_text=False,
            )
        elif "gemma" in self.text_model_name.lower():
            import torch._dynamo
            torch._dynamo.config.suppress_errors = True
            
            # 🔧 ADD: Force eager execution for P100 compatibility
            os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
            
            # 🔧 ADD: Handle Gemma models
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=100,
                do_sample=True,
                temperature=0.3,
                return_full_text=False,
            )
        elif "llama" in self.text_model_name.lower():
            # 🔧 ADD: Handle Llama models
            self.hf_pipeline = pipeline(
                "text-generation",
                model=self.text_model_name,
                max_new_tokens=100,
                do_sample=True,
                temperature=0.3,
                return_full_text=False,
            )
        else:
            self.text_model_name = "google/flan-t5-large"
            print(f"{embedding_model_name} text_model_name not recognized. Using {self.text_model_name}")
            self.hf_pipeline = pipeline(
                "text2text-generation",
                model=self.text_model_name,
                max_length=768,
                max_new_tokens=100,
            )
        
        self.model = HuggingFacePipeline(pipeline=self.hf_pipeline)
        
        # Initialize database
        self.db = Chroma(persist_directory=persist_directory, 
                        embedding_function=self.embedding_function)
    
        self.PROMPT_TEMPLATE = """
            Answer the question based only on the following context:

            {context}

            ---

            Answer the question based on the above context: {question}
            here are the options:
            {options}

            Respond only the Letter of the correct options like A, B, C and D. Do not inlcude the source.
            """
        # prompt 2: 
        # """
        # You are answering questions about Alice in Wonderland based on the provided context.

        # CONTEXT:
        # {context}
        
        # QUESTION: {question}
        
        # OPTIONS:
        # {options}
        
        # INSTRUCTIONS:
        # - Read the context carefully
        # - Answer based ONLY on the information provided in the context.
        # - Respond with ONLY the letter (A, B, C, or D) of the correct answer
        # - Do not include explanations or sources
        # """

        # prompt 3: 
        # """
        # <s>[INST] You are answering questions about Alice in Wonderland. 

        # Context: {context_text}
        # Question: {question}
        # Options: {options_text}
        
        # INSTRUCTIONS:
        # - Read the context carefully
        # - Answer based ONLY on the information provided in the context.
        # - Respond with ONLY the letter (A, B, C, or D) of the correct answer
        # - Do not include explanations or sources
        # [/INST]"""
        
        # print(f"QueryEngine initialized:")
        # print(f"  Embedding: {embedding_model_name} ({embedding_model_type})")
        # print(f"  Text Generation: {text_model_name}")
        # print(f"  Database: {persist_directory}")
        print(f"Initialized QueryEngine: embedding model: {embedding_model_name} ({embedding_model_type}); chat model : {text_model_name}")

    # Rest of your QueryEngine methods remain the same...
    
    def load_quiz_data(self, quiz_file_path='test_questions.json'):
        """Load quiz data from JSON file."""
        try:
            with open(quiz_file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                # print(f"Loaded {len(data)} questions from {quiz_file_path}")
                return data
        except FileNotFoundError:
            print(f"Error: {quiz_file_path} file not found!")
            return []
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
            return []
 
    def semantic_search_database(self, query, k=5):
        """Search the database for relevant documents."""
        if self.db is None:
            return []
        
        try:
            results = self.db.similarity_search_with_relevance_scores(query, k=k)
            return results
        except Exception as e:
            print(f"Error searching database: {e}")
            return []
    
    def filter_response(self, response):
        edit_response = response.replace('-', '').strip()
        return edit_response

    def generate_response(self, question, options, context_text):
        """Generate a response using the LLM."""
        # Format the prompt
        options_text = "\n".join(options) if isinstance(options, list) else str(options)
        prompt = self.PROMPT_TEMPLATE.format(
            context=context_text, 
            question=question, 
            options=options_text
        )
        
        try:
            # Use the HuggingFace model to generate response
            response_text = self.model.invoke(prompt)
            response_text = self.filter_response(response_text)
            return response_text
        except Exception as e:
            print(f"Error generating response: {e}")
            return "Error generating response."
    
    def query_single_question(self, question, options=None, show_context=False):
        """Query a single question and return the response."""
        # Search the database
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
        results = self.semantic_search_database(question, k=5)
        
        if not results:
            return {
                'question': question,
                'response': 'No relevant context found.',
                'context': '',
                'sources': []
            }
        
        # Prepare context from search results
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
        # sources = [doc.metadata.get("source", "Unknown") for doc, _score in results]
        sources = [(_score, doc.metadata.get("source", "Unknown"), doc.page_content) for doc, _score in results]
        all_scores = [_score for doc, _score in results]
        avg = sum(all_scores) / len(all_scores) if all_scores else 0

        
        # Generate response
        response_text = self.generate_response(question, options or [], context_text)
        
        result = {
            'question': question,
            'response': response_text.replace('-', '').strip(),
            'sources': sources,
            "avg relevance sources" : avg
        }
        
        if show_context:
            result['context'] = context_text
        
        return result
    
    def run_quiz(self, quiz_file_path='test_questions.json', show_details=False, limit=None):
        """Run the complete quiz and return results."""
        # Load quiz data
        quiz_data = self.load_quiz_data(quiz_file_path)
        
        if not quiz_data:
            print(f"No quiz data loaded. quiz_file_path = {quiz_file_path} Exiting.")
            return []
        
        # Limit questions if specified
        if limit:
            quiz_data = quiz_data[:limit]
            # print(f"Running quiz with {limit} questions.")
        
        results = []
        correct_count = 0
        
        for i, question_data in enumerate(quiz_data, 1):
            # print(f"Question {i} of {len(quiz_data)}")
            
            question_id = question_data.get("id", i)
            question = question_data["question"]
            options = question_data["options"]
            correct_answer = question_data["answer"]
            
            # Query the database and generate response
            result = self.query_single_question(question, options, show_context=False)
            
            # Add quiz-specific information
            result.update({
                'id': question_id,
                'options': options,
                'correct_answer': correct_answer,
                'response' : result['response'],
                'is_correct': result['response'].strip().upper() == correct_answer.upper()
            })

            if result["is_correct"] == False and len(result["response"]) != 1:
                if result["correct_answer"].upper().strip() == "A":
                    alternate_correct_answer = result["options"][0][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "B":
                    alternate_correct_answer = result["options"][1][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "C":
                    alternate_correct_answer = result["options"][2][4:].replace('-', '').strip()
                elif result["correct_answer"].upper().strip() == "D":
                    alternate_correct_answer = result["options"][3][4:].replace('-', '').strip()
                else:
                    alternate_correct_answer = ""

                if alternate_correct_answer.upper() == result["response"].upper():
                    result["is_correct"] = True
                else:
                    if result["response"].upper().startswith(alternate_correct_answer.upper()):
                        result["response"] = alternate_correct_answer
                        result["is_correct"] = True
                    else:
                        result["is_correct"] = False

            if result['is_correct']:
                correct_count += 1
            
            results.append(result)
            
        
        # Summary
        accuracy = (correct_count / len(quiz_data)) * 100 if quiz_data else 0
        print(f"\nQuiz Summary:")
        print(f"Correct Answers: {correct_count} / {len(quiz_data)}. Accuracy: {accuracy:.1f}%")
        self.save_results(self.embedding_model_name, self.text_model_name, results, filepath=persist_filepath)
        return results
    
    def set_prompt_template(self, new_template):
        """Set a custom prompt template."""
        self.PROMPT_TEMPLATE = new_template

    def save_results(self, embedding_model, text_model, result, filepath=persist_filepath):
        """Save quiz results to a JSON file."""
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            output = {
                "timestamp" : datetime.datetime.now(),
                "results" : result
            }
            
            if embedding_model in data:
                embedding_model_info = data[embedding_model]
                if text_model in embedding_model_info:
                    embedding_model_info[text_model].append(output)
                else:
                    embedding_model_info[text_model] = [output]
            else:
                data[embedding_model] = {
                    text_model: [output]
                }

            with open(filepath, 'w', encoding='utf-8') as file:
                json.dump(data, file, indent=4)
                return

            # with open(filepath, 'w', encoding='utf-8') as file:
            #     json.dump(output, file, indent=4)
            # print(f"Results saved to {filepath}")
        except Exception as e:
            print(f"Error saving results: {e}")


In [11]:
EMBEDDING_MODEL_OPTIONS = [
    "sentence-transformers/all-MiniLM-L6-v2", # success
    "sentence-transformers/all-mpnet-base-v2", # success
    "BAAI/bge-m3",
    "BAAI/bge-large-en", # success
    "BAAI/bge-base-en-v1.5",
    "BAAI/bge-large-en-v1.5",
    "intfloat/e5-base-v2", # success
    "sentence-transformers/static-retrieval-mrl-en-v1", # success
    "sentence-transformers/all-MiniLM-L12-v2", # success # best one so far
    # "gemini/embedding-001",       # Older Gemini model # horrible
    # "gemini/text-embedding-005",  # New Gemini model
    "nomic-ai/nomic-embed-text-v1.5",
    "sentence-transformers/multi-qa-mpnet-base-dot-v1",
    "sentence-transformers/multi-qa-mpnet-base-cos-v1",
    "hkunlp/instructor-large",
    "hkunlp/instructor-xl"
]

TEXT_GENERATION_MODEL_OPTIONS = [
    "google/flan-t5-small",
    "google/flan-t5-base", # have been using this for default development testing
    "google/flan-t5-large",
    "google/flan-t5-xl",
    "tiiuae/Falcon3-7B-Base",
    "tiiuae/Falcon3-1B-Instruct",
    "tiiuae/Falcon3-3B-Instruct",
    "tiiuae/Falcon3-7B-Instruct",
    "tiiuae/Falcon3-10B-Instruct",
    "tiiuae/Falcon-H1-0.5B-Instruct",
    "HuggingFaceH4/zephyr-7b-beta",
    "google/gemma-3-1b-it",
    "google/gemma-2-2b",
    "google/gemma-2-2b-it",
    "meta-llama/Llama-3.1-8B-Instruct", 
    "meta-llama/Llama-3.2-3B-Instruct", 
    "meta-llama/Meta-Llama-3-8B-Instruct", 
    "meta-llama/Llama-3.2-1B", 
]

# Fixed model types to match all embedding models (all are HuggingFace)
EMBEDDING_MODEL_TYPES = [
    "huggingface",  # 0 - all-MiniLM-L6-v2
    "huggingface",  # 1 - all-mpnet-base-v2
    "huggingface",  # 2 - bge-m3
    "huggingface",  # 3 - bge-large-en
    "huggingface",  # 4 - bge-base-en-v1.5
    "huggingface",  # 5 - bge-large-en-v1.5
    "huggingface",  # 6 - e5-base-v2 (Fixed from "gemini")
    "huggingface",  # 7 - static-retrieval-mrl-en-v1
    "huggingface",  # 8 - all-MiniLM-L12-v2 (Your best!)
    "huggingface",  # 9 - nomic-embed-text-v1.5
    "huggingface",  # 10 - multi-qa-mpnet-base-dot-v1
    "huggingface",  # 11 - multi-qa-mpnet-base-cos-v1
    "huggingface",  # 12 - instructor-large
    "huggingface",  # 13 - instructor-xl
]

In [12]:
def list_models():
    """List all available models."""
    print("Available Embedding Models (EMBEDDING_MODEL_OPTIONS):")
    for i, (model, model_type) in enumerate(zip(EMBEDDING_MODEL_OPTIONS, EMBEDDING_MODEL_TYPES)):
        print(f"  {i}: {model} ({model_type})")
    
    print("\nAvailable Text Generation Models (TEXT_GENERATION_MODEL_OPTIONS):")
    for i, model in enumerate(TEXT_GENERATION_MODEL_OPTIONS):
        print(f"  {i}: {model}")
        
def clear_cuda_memory():
    """Clear CUDA memory and run garbage collection."""
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        # Force garbage collection
        gc.collect()
        
        # Clear cache again after garbage collection
        torch.cuda.empty_cache()
        
    else:
        gc.collect()
        
def print_results_summary():
    if os.path.exists(model_response_directory):
        for model_response_fp in os.listdir(model_response_directory):
            avg_relevance_sources = []
            count = 0
            num_questions = 0
            with open(os.path.join(model_response_directory, model_response_fp), "r") as f:
                model_responses = json.load(f)
                for response in model_responses:
                    if response["is_correct"] == True:
                        count += 1
                    num_questions += 1
                    avg_relevance_sources.append(response["avg relevance sources"])
            print(f"Model: {model_response_fp}, Correct: {count}/{num_questions}, Avg Relevance: {sum(avg_relevance_sources) / len(avg_relevance_sources) if avg_relevance_sources else 0}")
                        
def main(mode="create", embedding_model_index=0, text_generation_model_index=-1, save_result=1):
    print("=" * 80)
    clear_cuda_memory()
    embedding_model_index = embedding_model_index
    
    # Get selected models
    embedding_model = EMBEDDING_MODEL_OPTIONS[embedding_model_index]
    embedding_model_type = EMBEDDING_MODEL_TYPES[embedding_model_index]
    
    db_name = "chroma"
    db_data_path = f"{db_name}/{embedding_model.split('/')[-1].replace('/', '_').replace('-', '_')}"
    
    if text_generation_model_index != -1:
        text_model = TEXT_GENERATION_MODEL_OPTIONS[text_generation_model_index]
        result_file_path = f"quiz_results/{embedding_model.split('/')[-1].replace('/', '_').replace('-', '_')}--{text_model.split('/')[-1].replace('/', '_').replace('-', '_')}_quiz_results.json"
    
    def create_mode():
        print(f"Using embedding model         : {embedding_model} ({embedding_model_type})")
        # os.makedirs(db_name, exist_ok=True)
        # os.makedirs(db_data_path, exist_ok=True)
        db_manager = DatabaseManager(embedding_model_name=embedding_model, 
                                   embedding_model_type=embedding_model_type)
        db_manager.generate_data_store(data_path=raw_knowledge_directory, 
                                                persist_directory=db_data_path)

    def quiz_mode():
        print("Running Alice in Wonderland quiz...")
        print(f"Using embedding model         : {embedding_model} ({embedding_model_type})")
        print(f"Using text generation model   : {text_model}")
        os.makedirs("quiz_results", exist_ok=True)
        query_engine = QueryEngine(persist_directory=db_data_path,
                                 embedding_model_name=embedding_model,
                                 embedding_model_type=embedding_model_type,
                                 text_model_name=text_model)
        
        # Run the quiz
        results = query_engine.run_quiz(test_questions_directory)
        
        # Rest of quiz_mode code remains the same...
        if results:
            if save_result==1:
                with open(result_file_path, "w") as f:
                    json.dump(results, f, indent=4)

    if mode=="create":
        create_mode()
    elif mode=="quiz":
        quiz_mode()
    clear_cuda_memory()

def run_mains(test_embedding_models=[], test_text_generation_models=[]):
    counter = 0
    for embedding_model_index in test_embedding_models:
        try:
            main(mode="create", embedding_model_index=embedding_model_index)
            print(f"successfully created db with {EMBEDDING_MODEL_OPTIONS[embedding_model_index]}")
        except Exception as e:
            print(f"failed to create db with {EMBEDDING_MODEL_OPTIONS[embedding_model_index]}")
            print(e)
            if os.path.exists("chroma"):
                shutil.rmtree("chroma")
            continue
        
        for text_generation_model_index in test_text_generation_models:
            counter += 1
            try: 
                main(mode="quiz", embedding_model_index=embedding_model_index, text_generation_model_index=text_generation_model_index)
                print(f"successfully ran quiz with {EMBEDDING_MODEL_OPTIONS[embedding_model_index]} and {TEXT_GENERATION_MODEL_OPTIONS[text_generation_model_index]}. {counter} / {len(test_embedding_models) * len(test_text_generation_models)} models combination tested. testing {embedding_model_index + 1} / {len(test_embedding_models)} embedding models. tested {text_generation_model_index + 1} / {len(test_text_generation_models)} chat models.")
                print()
                # print_results_summary()
            except Exception as e:
                print(f"failed to run quiz with {EMBEDDING_MODEL_OPTIONS[embedding_model_index]} and {TEXT_GENERATION_MODEL_OPTIONS[text_generation_model_index]}. {counter} / {len(test_embedding_models) * len(test_text_generation_models)} models combination tested. testing {embedding_model_index + 1} / {len(test_embedding_models)} embedding models. tested {text_generation_model_index + 1} / {len(test_text_generation_models)} chat models.")
                print(e)
                continue
        if os.path.exists("chroma"):
            shutil.rmtree("chroma")

def run_mains_smart():
    try:
        incomplete_model_combinations = {}
        with open(persist_filepath, 'r') as f:
            data = json.load(f)

        # getting all the combinations that are not in the file
        for i, embedding_model in enumerate(EMBEDDING_MODEL_OPTIONS):
            if embedding_model not in data:
                incomplete_model_combinations[i] = list(range(len(TEXT_GENERATION_MODEL_OPTIONS)))
            else:
                for j, text_model in enumerate(TEXT_GENERATION_MODEL_OPTIONS):
                    if text_model not in data[embedding_model]:
                        if i in incomplete_model_combinations:
                            incomplete_model_combinations[i].append(j)
                        else:
                            incomplete_model_combinations[i] = [j]

        for embedding_model_index, text_generation_model_indices in incomplete_model_combinations.items():
            run_mains(test_embedding_models=[embedding_model_index], test_text_generation_models=text_generation_model_indices)
      
    except Exception as e:
        print(f"Error: {e}")
    
            
    


In [13]:
list_models()

Available Embedding Models (EMBEDDING_MODEL_OPTIONS):
  0: sentence-transformers/all-MiniLM-L6-v2 (huggingface)
  1: sentence-transformers/all-mpnet-base-v2 (huggingface)
  2: BAAI/bge-m3 (huggingface)
  3: BAAI/bge-large-en (huggingface)
  4: BAAI/bge-base-en-v1.5 (huggingface)
  5: BAAI/bge-large-en-v1.5 (huggingface)
  6: intfloat/e5-base-v2 (huggingface)
  7: sentence-transformers/static-retrieval-mrl-en-v1 (huggingface)
  8: sentence-transformers/all-MiniLM-L12-v2 (huggingface)
  9: nomic-ai/nomic-embed-text-v1.5 (huggingface)
  10: sentence-transformers/multi-qa-mpnet-base-dot-v1 (huggingface)
  11: sentence-transformers/multi-qa-mpnet-base-cos-v1 (huggingface)
  12: hkunlp/instructor-large (huggingface)
  13: hkunlp/instructor-xl (huggingface)

Available Text Generation Models (TEXT_GENERATION_MODEL_OPTIONS):
  0: google/flan-t5-small
  1: google/flan-t5-base
  2: google/flan-t5-large
  3: google/flan-t5-xl
  4: tiiuae/Falcon3-7B-Base
  5: tiiuae/Falcon3-1B-Instruct
  6: tiiuae/

In [None]:
run_mains_smart()

Using embedding model         : sentence-transformers/all-MiniLM-L6-v2 (huggingface)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Initialized DatabaseManager: Embedding embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface)


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
Error loading file books\alice_in_wonderland.md


Error loading documents: partition_md() is not available because one or more dependencies are not installed. Use: pip install "unstructured[md]" (including quotes) to install the required dependencies
successfully created db with sentence-transformers/all-MiniLM-L6-v2
Running Alice in Wonderland quiz...
Using embedding model         : sentence-transformers/all-MiniLM-L6-v2 (huggingface)
Using text generation model   : google/flan-t5-small


config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Initialized QueryEngine: embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface); chat model : google/flan-t5-small

Quiz Summary:
Correct Answers: 0 / 90. Accuracy: 0.0%
Error saving results: Object of type datetime is not JSON serializable
successfully ran quiz with sentence-transformers/all-MiniLM-L6-v2 and google/flan-t5-small. 1 / 18 models combination tested. testing 1 / 1 embedding models. tested 1 / 18 chat models.

Running Alice in Wonderland quiz...
Using embedding model         : sentence-transformers/all-MiniLM-L6-v2 (huggingface)
Using text generation model   : google/flan-t5-base


config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Initialized QueryEngine: embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface); chat model : google/flan-t5-base

Quiz Summary:
Correct Answers: 0 / 90. Accuracy: 0.0%
Error saving results: Expecting value: line 5 column 30 (char 127)
successfully ran quiz with sentence-transformers/all-MiniLM-L6-v2 and google/flan-t5-base. 2 / 18 models combination tested. testing 1 / 1 embedding models. tested 2 / 18 chat models.

Running Alice in Wonderland quiz...
Using embedding model         : sentence-transformers/all-MiniLM-L6-v2 (huggingface)
Using text generation model   : google/flan-t5-large


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Initialized QueryEngine: embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface); chat model : google/flan-t5-large

Quiz Summary:
Correct Answers: 0 / 90. Accuracy: 0.0%
Error saving results: Expecting value: line 5 column 30 (char 127)
successfully ran quiz with sentence-transformers/all-MiniLM-L6-v2 and google/flan-t5-large. 3 / 18 models combination tested. testing 1 / 1 embedding models. tested 3 / 18 chat models.

Running Alice in Wonderland quiz...
Using embedding model         : sentence-transformers/all-MiniLM-L6-v2 (huggingface)
Using text generation model   : google/flan-t5-xl


config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Initialized QueryEngine: embedding model: sentence-transformers/all-MiniLM-L6-v2 (huggingface); chat model : google/flan-t5-xl

Quiz Summary:
Correct Answers: 0 / 90. Accuracy: 0.0%
Error saving results: Expecting value: line 5 column 30 (char 127)
successfully ran quiz with sentence-transformers/all-MiniLM-L6-v2 and google/flan-t5-xl. 4 / 18 models combination tested. testing 1 / 1 embedding models. tested 4 / 18 chat models.

Running Alice in Wonderland quiz...
Using embedding model         : sentence-transformers/all-MiniLM-L6-v2 (huggingface)
Using text generation model   : tiiuae/Falcon3-7B-Base


config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00003-of-00004.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/805M [00:00<?, ?B/s]

In [None]:
print_results_summary()