# Import Dependencies


In [1]:
# Import necessary functions from the uploaded files
import os
from final_response_front_end_main import initialize_system, process_query
from user_history_utils import save_chat_pkl_by_embedding
from user_history_utils import save_chat_json

# Intialize Environment (do this once)

In [2]:
# Initialize the environment
DEVICE, TOKENIZER, EMBEDDING_MODEL, LLM_MODEL, LLM_SYSTEM_PROMPT, QDRANT_CLIENT, CHUNK_COLLECTION, HISTORY_COLLECTION, BM25_SEARCH_FUNCTION, user_query_state_history, query_num, HISTORICAL_QUERY_NUM = initialize_system()

qdrant_vector_store/local_embedding_models/Snowflake/snowflake-arctic-embed-l-v2.0
Loading Snowflake/snowflake-arctic-embed-l-v2.0 from local storage...


Some weights of XLMRobertaModel were not initialized from the model checkpoint at qdrant_vector_store/local_embedding_models/Snowflake/snowflake-arctic-embed-l-v2.0 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Snowflake/snowflake-arctic-embed-l-v2.0 embedding model loaded to cuda


############### System Prompt:

    You are an advanced AI research assistant. Generate detailed and comprehensive responses that supplement students' and academic researchers' work with information grounded in highly cited AI/ML research papers, specifically in fields like NLP and CV. The response should not focus on one area of study but should be informed by both the current query and chat history to generate a well-rounded answer.

    1. **Introductory Overview**: Start with a high-level conceptual overview of the topic, providing a brief and clear explanation that covers the essential aspects of the subject. This should be accessible to a broad audience.

    2. **Technical Overview**: After the conceptual overview, provide a more in-depth, technical explanation that dives deeper into the topic. This could include relevant algorithms, methods, or models, as well as their theoretical foundations.

    3. **Ex

# Call Response Generation Function (do this as much as you like)

In [3]:
def call_back_end_response_generation(
                                      user_query_state_history: dict,
                                      DESIRED_HISTORY_WINDOW_SIZE: int,
                                      DESIRED_CONTEXT_CHUNKS_TOP_K: int,
                                      RAG_SWITCH: bool,
                                      HISTORY_SWITCH: bool,
                                      BM25_SWITCH: bool,
                                      TOPIC_RETRIEVAL_SWITCH: bool,
                                      HISTORIC_QUERY_SIMILARITY_THRESHOLD: float, # [0, 1] range (filter)
                                      QUERY_TEXT: str,
                                      QUERY_NUM: int,
                                      final_json_path: str
    ):

    # Call the `process_query()` function with the inputs
    user_query_state_history[QUERY_NUM] = process_query(
        DESIRED_HISTORY_WINDOW_SIZE, 
        DESIRED_CONTEXT_CHUNKS_TOP_K, 
        RAG_SWITCH, 
        HISTORY_SWITCH, 
        BM25_SWITCH, 
        TOPIC_RETRIEVAL_SWITCH, 
        HISTORIC_QUERY_SIMILARITY_THRESHOLD, 
        QUERY_TEXT, 
        user_query_state_history,
        QUERY_NUM, 
        QDRANT_CLIENT, 
        CHUNK_COLLECTION,
        HISTORY_COLLECTION,
        LLM_MODEL,
        LLM_SYSTEM_PROMPT,
        DEVICE,
        EMBEDDING_MODEL, 
        TOKENIZER,
        BM25_SEARCH_FUNCTION
    )

    return user_query_state_history

# Input Test Query Processing Function

In [4]:
def process_list_of_queries_for_testing(
                            saved_chats_topic_name, # The name of the folder to save questions in
                            list_of_questions, # The list of questions being used to get responses
                            task_folder='experiments',
                            user_output_folder='my_experiment', # the folder to save the chats in
                            DESIRED_HISTORY_WINDOW_SIZE=0, # The lookback window size for history utilization
                            DESIRED_CONTEXT_CHUNKS_TOP_K=5, # The number of chunks requested for a question
                            RAG_SWITCH=True, # Front-end user input for enabling RAG
                            HISTORY_SWITCH=False, # Front-end user input for enabling history usage
                            BM25_SWITCH=False, # Front-end user input for enabling BM25 search
                            TOPIC_RETRIEVAL_SWITCH=False, # Front-end user input for enabling topic retrieval
                            HISTORIC_QUERY_SIMILARITY_THRESHOLD=0.3 # History query-to-query similarity filter min bound
    ):

    directory = f'user_output/{task_folder}/{user_output_folder}/{saved_chats_topic_name}'
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Define the name of the chat, and the .json / .pkl files that will be saved with it
    chat_json_name = f"{saved_chats_topic_name}.json"
    chat_embedded_history_name = 'user_embedded_history.pkl'
    chat_non_embedded_history_name = 'user_non_embedded_history.pkl'

    # Define the path of the json / pkl files
    final_json_path = os.path.join(directory, chat_json_name)
    final_embedded_history_path = os.path.join(directory, chat_embedded_history_name)
    final_non_embedded_history_path = os.path.join(directory, chat_non_embedded_history_name)

    # Define the initial user state
    user_state = {}

    for i, question in enumerate(list_of_questions):
        query_num = i + 1
        # Your processing logic goes here (e.g., semantic search, BM25, RAG)
        print(f"Processing query {query_num}: {question}\n")
        print(f"RAG Switch: {RAG_SWITCH}")
        print(f"History Switch: {HISTORY_SWITCH}")
        print(f"BM25 Switch: {BM25_SWITCH}")
        print(f"Topic Retrieval Switch: {TOPIC_RETRIEVAL_SWITCH}\n")

        user_state = call_back_end_response_generation(
                                          user_state,
                                          DESIRED_HISTORY_WINDOW_SIZE,
                                          DESIRED_CONTEXT_CHUNKS_TOP_K,
                                          RAG_SWITCH,
                                          HISTORY_SWITCH,
                                          BM25_SWITCH,
                                          TOPIC_RETRIEVAL_SWITCH,
                                          HISTORIC_QUERY_SIMILARITY_THRESHOLD,
                                          QUERY_TEXT=question,
                                          final_json_path=final_json_path,
                                          QUERY_NUM=query_num
        )

        save_chat_json(user_state[query_num], file_path=final_json_path)

        #print(f"Response: {user_state[query_num]['response_text']}\n")

    # After processing the query, you might want to save the results to disk or perform further actions
    save_chat_pkl_by_embedding(user_state, 
                                embedded_path=final_embedded_history_path,
                                non_embedded_path=final_non_embedded_history_path
    )

    print(f'#### Final User State Length: {len(user_state)}\n')

# Experiment Scheduler

## My Questions:

In [5]:
#my_input_questions = [
#'Please explain batch normalization.',                                    # 1
#'How does it relate to layer normalization?',                             # 2
#'Can you explain the advantages of each method?']                         # 3
# 'What are some real-world applications of batch normalization?',          # 4
# 'How does batch normalization affect the performance of neural networks?',# 5
# 'What are the key differences between normalization techniques?',         # 6
# #Hard topic shift begins here:
# 'What is reinforcement learning and how does it work?',                   # 7
# 'Can you explain the exploration-exploitation tradeoff?',                 # 8
# 'What are some popular algorithms used in reinforcement learning?',       # 9
# 'asdfadsgfasd',                                                           # 10
# 'exit'                                                                    # end
# ]

In [6]:
my_input_questions_HW2 = [
"What is the purpose of tokenizing text into sentences and words?",

"What are n-gram language models and how are they useful in NLP?",

"What is the naive Bayes assumption and how does it relate to text classificatIon?",

"What are some of the advantages and disadvantages of naive Bayes classifiers compared to logistic regression?",

"What do we mean by \"features\" in the context of text classification? Give some examples of features that might be useful for distinguishing different newsgroup topics.",

"What is the purpose of a test set in machine learning? Why do we need separate training and test sets?",

"What metrics could you use to evaluate the performance of a text classification model? Define accuracy and any other relevant metrics.",

"How could you determine which features are most important or indicative for a logistic regression text classification model?",

"What is the bag-of-words representation and what are some of its limitations for text classification?",

"What is overfitting in machine learning? How could you tell if your text classification model is overfitting the training data? Describe two ways to reduce overfitting."
]

In [7]:
my_input_questions_HW4 = [
"What are the key components of the encoder-decoder architecture for sequence-to-sequence models? How is this architecture used for machine translation?",

"Attention mechanisms have become an integral part of sequence-to-sequence models. Explain how attention works and why it improves performance compared to basic encoder-decoder models.",

"What techniques can be used to handle very long input or output sequences in seq2seq models? Discuss solutions like hierarchical attention, sparse attention, and other approaches.",

"Explain how LSTMs and other RNN architectures are commonly used for sequence labeling tasks like part-of-speech tagging. What advantages do they provide over Hidden Markov Model (HMM) based techniques?",

"Compare part-of-speech (POS) tagging and named entity recognition (NER) tasks. What are the key differences in terms of input representation, output labels, model architecture, and evaluation metrics? Discuss challenges unique to NER such as entity boundary detection.",

"Explain the core components of the LSTM unit - the cell state, input gate, forget gate, output gate. How does this gating mechanism address shortcomings of basic RNNs?",

"Despite gating mechanisms, LSTMs can still face challenges in learning long-term dependencies. Explain limitations of standard LSTMs for modeling long sequences. Discuss at least two techniques that can help LSTMs better capture long-range dependencies, such as dilated LSTMs, skipping connections, and attention."

]

In [8]:
my_input_questions_HW5 = [
    "Explain the overall architecture of the Transformer model. What are the main components of the encoder and decoder?",

    "What is multi-head attention and why is it useful? How is it implemented in the Transformer?",

    "Explain how positional encodings work in the Transformer and why they are necessary.",

    "What is the purpose of layer normalization and residual connections in the Transformer? Where are they applied?",

    "Describe the training process for the Transformer. What is the batching scheme used? What is label smoothing and why is it helpful?",

    "How do large language models like GPT-3 differ from the original Transformer model described in the paper?",

    "Explain the pre-training and fine-tuning process for large language models. Why is pre-training on large unlabeled corpora important?",

    "What are some of the key challenges in training very large language models? Discuss techniques like sparse attention and model parallelism.",

    "Large language models have shown impressive few-shot learning abilities. What factors contribute to this? How could we further improve few-shot learning?",
    
    "Discuss the risks and ethical considerations with large language models. What should we be cautious about when deploying them in real applications? How can we make them safer and more trustworthy?"
]

## Experiment Subset @top_k=3

In [9]:
at_top_k_chunks_3 = 3 # @ the number of chunks in the system

zero_shot_experiments_at_3_top_k = {

    # experiment_1: Naive response without retrieval
    'experiment_1' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': False, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_2: RAG response with basic retrieval
    'experiment_2' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_3: RAG response hybrid retrieval
    'experiment_3' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_4: RAG response hybrid with topic level retrieval
    'experiment_4' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': True, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    }
}

history_window_size = 5

multi_shot_experiments_at_3_top_k = {

    # experiment_5: Naive response without retrieval and history
    'experiment_5' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': False, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_6: RAG response with basic retrieval and history
    'experiment_6' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_7: RAG response hybrid retrieval and history
    'experiment_7' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_8: RAG response hybrid with topic level retrieval and history
    'experiment_8' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_3, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': True, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    }
}

## Experiment Subset @top_k=5

In [None]:
at_top_k_chunks_5 = 5 # @ the number of chunks in the system

zero_shot_experiments_at_5_top_k = {

    # experiment_9: Naive response without retrieval
    'experiment_9' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': False, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_10: RAG response with basic retrieval
    'experiment_10' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_11: RAG response hybrid retrieval
    'experiment_11' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_12: RAG response hybrid with topic level retrieval
    'experiment_12' : {
        'DESIRED_HISTORY_WINDOW_SIZE': 0, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': False, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': True, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    }
}

history_window_size = 5

multi_shot_experiments_at_5_top_k = {

    # experiment_13: Naive response without retrieval and history
    'experiment_13' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': False, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_14: RAG response with basic retrieval and history
    'experiment_14' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': False, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_15: RAG response hybrid retrieval and history
    'experiment_15' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': False, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    },

    # experiment_16: RAG response hybrid with topic level retrieval and history
    'experiment_16' : {
        'DESIRED_HISTORY_WINDOW_SIZE': history_window_size, # The lookback window size for history utilization
        'DESIRED_CONTEXT_CHUNKS_TOP_K': at_top_k_chunks_5, # The number of chunks requested for a question
        'RAG_SWITCH': True, # Front-end user input for enabling RAG
        'HISTORY_SWITCH': True, # Front-end user input for enabling history usage
        'BM25_SWITCH': True, # Front-end user input for enabling BM25 search
        'TOPIC_RETRIEVAL_SWITCH': True, # Front-end user input for enabling topic retrieval
        'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3
    }
}

In [11]:
#import time

def conduct_experiment(my_input_questions: list, experiment: dict, experiment_type: str):

    # Process each experiment in the selected experiment type
    for experiment_name, experiment_config in experiment.items():
        print(f"#### Running {experiment_name} with configuration: {experiment_config}\n")
        
        # Here you can customize the configuration further if needed (e.g., dynamic changes)
        process_list_of_queries_for_testing(
            saved_chats_topic_name=experiment_name, 
            list_of_questions=my_input_questions,
            task_folder='experiments',
            user_output_folder=experiment_type,
            DESIRED_HISTORY_WINDOW_SIZE=experiment_config['DESIRED_HISTORY_WINDOW_SIZE'],
            DESIRED_CONTEXT_CHUNKS_TOP_K=experiment_config['DESIRED_CONTEXT_CHUNKS_TOP_K'],
            RAG_SWITCH=experiment_config['RAG_SWITCH'],
            HISTORY_SWITCH=experiment_config['HISTORY_SWITCH'],
            BM25_SWITCH=experiment_config['BM25_SWITCH'],
            TOPIC_RETRIEVAL_SWITCH=experiment_config['TOPIC_RETRIEVAL_SWITCH'],
            HISTORIC_QUERY_SIMILARITY_THRESHOLD=experiment_config['HISTORIC_QUERY_SIMILARITY_THRESHOLD']
        )

        #time.sleep(61)

## HW 2 Experiments

In [12]:
# Zero Shot @top_k=3
conduct_experiment(my_input_questions=my_input_questions_HW2,
                   experiment=zero_shot_experiments_at_3_top_k,
                   experiment_type='zero_shot_HW2_at_3_top_k'
)

#### Running experiment_1 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 0, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 3, 'RAG_SWITCH': False, 'HISTORY_SWITCH': False, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What is the purpose of tokenizing text into sentences and words?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False



✅ Attempt 1: STOP with content (code 1).

Processing query 2: What are n-gram language models and how are they useful in NLP?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: What is the naive Bayes assumption and how does it relate to text classificatIon?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 4: What are some of the advantages and disadvantages of naive Bayes classifiers compared to logistic regression?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 5: What do we mean by "features" in the context of text classification? Give some examples of features that might be useful for distinguishing different newsgroup topics.

RAG Switch: False
History Switch: False
BM25 Switch: F

In [13]:
# Zero Shot @top_k=5
conduct_experiment(my_input_questions=my_input_questions_HW2,
                   experiment=zero_shot_experiments_at_5_top_k,
                   experiment_type='zero_shot_HW2_at_5_top_k'
)

#### Running experiment_9 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 0, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 5, 'RAG_SWITCH': False, 'HISTORY_SWITCH': False, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What is the purpose of tokenizing text into sentences and words?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 2: What are n-gram language models and how are they useful in NLP?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: What is the naive Bayes assumption and how does it relate to text classificatIon?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 4: What are some of the advantages and disadvantage

In [14]:
# Multi Shot @top_k=3
conduct_experiment(my_input_questions=my_input_questions_HW2,
                   experiment=multi_shot_experiments_at_3_top_k,
                   experiment_type='multi_shot_HW2_at_3_top_k'
)

#### Running experiment_5 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 5, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 3, 'RAG_SWITCH': False, 'HISTORY_SWITCH': True, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What is the purpose of tokenizing text into sentences and words?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

History was meant to be utilized, but the window_size is 0, so no lookback will be performed (window size == 0, no history requested).
✅ Attempt 1: STOP with content (code 1).

Processing query 2: What are n-gram language models and how are they useful in NLP?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: What is the naive Bayes assumption and how does it relate to text classificatIon?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retr

In [15]:
# Multi Shot @top_k=5
conduct_experiment(my_input_questions=my_input_questions_HW2,
                   experiment=multi_shot_experiments_at_5_top_k,
                   experiment_type='multi_shot_HW2_at_5_top_k'
)

#### Running experiment_5 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 5, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 5, 'RAG_SWITCH': False, 'HISTORY_SWITCH': True, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What is the purpose of tokenizing text into sentences and words?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

History was meant to be utilized, but the window_size is 0, so no lookback will be performed (window size == 0, no history requested).
✅ Attempt 1: STOP with content (code 1).

Processing query 2: What are n-gram language models and how are they useful in NLP?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: What is the naive Bayes assumption and how does it relate to text classificatIon?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retr

## HW 4 Experiments

In [16]:
# Zero Shot @top_k=3
conduct_experiment(my_input_questions=my_input_questions_HW4,
                   experiment=zero_shot_experiments_at_3_top_k,
                   experiment_type='zero_shot_HW4_at_3_top_k'
)

#### Running experiment_1 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 0, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 3, 'RAG_SWITCH': False, 'HISTORY_SWITCH': False, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What are the key components of the encoder-decoder architecture for sequence-to-sequence models? How is this architecture used for machine translation?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 2: Attention mechanisms have become an integral part of sequence-to-sequence models. Explain how attention works and why it improves performance compared to basic encoder-decoder models.

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: What techniques can be used to handle very long input or output sequences in

In [17]:
# Zero Shot @top_k=5
conduct_experiment(my_input_questions=my_input_questions_HW4,
                   experiment=zero_shot_experiments_at_5_top_k,
                   experiment_type='zero_shot_HW4_at_5_top_k'
)

#### Running experiment_9 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 0, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 5, 'RAG_SWITCH': False, 'HISTORY_SWITCH': False, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What are the key components of the encoder-decoder architecture for sequence-to-sequence models? How is this architecture used for machine translation?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 2: Attention mechanisms have become an integral part of sequence-to-sequence models. Explain how attention works and why it improves performance compared to basic encoder-decoder models.

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: What techniques can be used to handle very long input or output sequences in

In [18]:
# Multi Shot @top_k=3
conduct_experiment(my_input_questions=my_input_questions_HW4,
                   experiment=multi_shot_experiments_at_3_top_k,
                   experiment_type='multi_shot_HW4_at_3_top_k'
)

#### Running experiment_5 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 5, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 3, 'RAG_SWITCH': False, 'HISTORY_SWITCH': True, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What are the key components of the encoder-decoder architecture for sequence-to-sequence models? How is this architecture used for machine translation?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

History was meant to be utilized, but the window_size is 0, so no lookback will be performed (window size == 0, no history requested).
✅ Attempt 1: STOP with content (code 1).

Processing query 2: Attention mechanisms have become an integral part of sequence-to-sequence models. Explain how attention works and why it improves performance compared to basic encoder-decoder models.

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

✅ Atte

In [19]:
# Multi Shot @top_k=5
conduct_experiment(my_input_questions=my_input_questions_HW4,
                   experiment=multi_shot_experiments_at_5_top_k,
                   experiment_type='multi_shot_HW4_at_5_top_k'
)

#### Running experiment_5 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 5, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 5, 'RAG_SWITCH': False, 'HISTORY_SWITCH': True, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: What are the key components of the encoder-decoder architecture for sequence-to-sequence models? How is this architecture used for machine translation?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

History was meant to be utilized, but the window_size is 0, so no lookback will be performed (window size == 0, no history requested).
✅ Attempt 1: STOP with content (code 1).

Processing query 2: Attention mechanisms have become an integral part of sequence-to-sequence models. Explain how attention works and why it improves performance compared to basic encoder-decoder models.

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

✅ Atte

## HW 5 Experiments

In [20]:
# Zero Shot @top_k=3
conduct_experiment(my_input_questions=my_input_questions_HW5,
                   experiment=zero_shot_experiments_at_3_top_k,
                   experiment_type='zero_shot_HW5_at_3_top_k'
)

#### Running experiment_1 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 0, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 3, 'RAG_SWITCH': False, 'HISTORY_SWITCH': False, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: Explain the overall architecture of the Transformer model. What are the main components of the encoder and decoder?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 2: What is multi-head attention and why is it useful? How is it implemented in the Transformer?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: Explain how positional encodings work in the Transformer and why they are necessary.

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with cont

In [21]:
# Zero Shot @top_k=5
conduct_experiment(my_input_questions=my_input_questions_HW5,
                   experiment=zero_shot_experiments_at_5_top_k,
                   experiment_type='zero_shot_HW5_at_5_top_k'
)

#### Running experiment_9 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 0, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 5, 'RAG_SWITCH': False, 'HISTORY_SWITCH': False, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: Explain the overall architecture of the Transformer model. What are the main components of the encoder and decoder?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 2: What is multi-head attention and why is it useful? How is it implemented in the Transformer?

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: Explain how positional encodings work in the Transformer and why they are necessary.

RAG Switch: False
History Switch: False
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with cont

In [22]:
# Multi Shot @top_k=3
conduct_experiment(my_input_questions=my_input_questions_HW5,
                   experiment=multi_shot_experiments_at_3_top_k,
                   experiment_type='multi_shot_HW5_at_3_top_k'
)

#### Running experiment_5 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 5, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 3, 'RAG_SWITCH': False, 'HISTORY_SWITCH': True, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: Explain the overall architecture of the Transformer model. What are the main components of the encoder and decoder?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

History was meant to be utilized, but the window_size is 0, so no lookback will be performed (window size == 0, no history requested).
✅ Attempt 1: STOP with content (code 1).

Processing query 2: What is multi-head attention and why is it useful? How is it implemented in the Transformer?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: Explain how positional encodings work in the Transformer and why they a

In [23]:
# Multi Shot @top_k=5
conduct_experiment(my_input_questions=my_input_questions_HW5,
                   experiment=multi_shot_experiments_at_5_top_k,
                   experiment_type='multi_shot_HW5_at_5_top_k'
)

#### Running experiment_5 with configuration: {'DESIRED_HISTORY_WINDOW_SIZE': 5, 'DESIRED_CONTEXT_CHUNKS_TOP_K': 5, 'RAG_SWITCH': False, 'HISTORY_SWITCH': True, 'BM25_SWITCH': False, 'TOPIC_RETRIEVAL_SWITCH': False, 'HISTORIC_QUERY_SIMILARITY_THRESHOLD': 0.3}

Processing query 1: Explain the overall architecture of the Transformer model. What are the main components of the encoder and decoder?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

History was meant to be utilized, but the window_size is 0, so no lookback will be performed (window size == 0, no history requested).
✅ Attempt 1: STOP with content (code 1).

Processing query 2: What is multi-head attention and why is it useful? How is it implemented in the Transformer?

RAG Switch: False
History Switch: True
BM25 Switch: False
Topic Retrieval Switch: False

✅ Attempt 1: STOP with content (code 1).

Processing query 3: Explain how positional encodings work in the Transformer and why they a

# Small Unit Test

Function from: `model_prompting_utils.py`

```python
def get_chunk_ids_by_bottom_level_headers(chunk_ids_by_header, example_headers):
    """
    Get a dictionary of chunk IDs that match the bottom-level sections of the original header tuples.
    This function handles malformed or empty headers by assigning unique placeholders
    to preserve distinction and avoid key collisions in the dictionary.
    """

    # === Step 1: Safely extract bottom-level headers ===
    bottom_level_headers = []
    header_placeholder_counter = 0

    for header in example_headers:
        if len(header) > 0:
            bottom_level_headers.append(header[-1])
        else:
            placeholder = f"ERROR_NO_HEADER_PLACEHOLDER_{header_placeholder_counter}"
            print(f"⚠ Assigned unique placeholder to bottom_level_headers: {placeholder}")
            bottom_level_headers.append(placeholder)
            header_placeholder_counter += 1

    if header_placeholder_counter > 1:
        print(f"!!! MULTIPLE_UNIQUE_PLACEHOLDERS_IN_HEADERS: {header_placeholder_counter} unknown example_headers assigned unique placeholders")

    # === Step 2: Normalize keys in chunk_ids_by_header ===
    normalized_chunk_ids_by_header = {}
    chunk_placeholder_counter = 0

    for key, value in chunk_ids_by_header.items():
        if key is None or (isinstance(key, str) and key.strip() == ''):
            placeholder_key = f"ERROR_NO_HEADER_PLACEHOLDER_{chunk_placeholder_counter}"
            print(f"⚠ Assigned unique placeholder key to chunk_ids_by_header: {placeholder_key} for value: {value}")
            normalized_chunk_ids_by_header[placeholder_key] = value
            chunk_placeholder_counter += 1
        else:
            normalized_chunk_ids_by_header[key] = value

    if chunk_placeholder_counter > 1:
        print(f"!!! MULTIPLE_UNIQUE_PLACEHOLDERS_IN_CHUNKS: {chunk_placeholder_counter} malformed chunk_ids_by_header keys assigned unique placeholders")

    # === Step 3: Filter normalized chunk IDs by bottom-level headers ===
    filtered_chunk_ids_by_header = {}
    print("\nFiltering normalized_chunk_ids_by_header based on bottom_level_headers...")
    print("Bottom-level headers:", bottom_level_headers, "\n")

    for key, value in normalized_chunk_ids_by_header.items():
        if key in bottom_level_headers:
            print(f"✔ Match found: {repr(key)} is in bottom_level_headers. Adding to result.")
            filtered_chunk_ids_by_header[key] = value
        else:
            print(f"✘ No match: {repr(key)} not in bottom_level_headers. Skipping.")

    print("\nFiltered result keys:", list(filtered_chunk_ids_by_header.keys()))

    return filtered_chunk_ids_by_header

chunk_ids_by_header = {'**SegFormer: Simple and Efficient Design for Semantic** **Segmentation with Transformers**': [9405, 9406, 9407, 9408, 9409], '**Abstract**': [9405], '**1 Introduction**': [9406, 9407, 9408], '**2 Related Work**': [9409], '': [9410, 9411, 9412, 9413, 9414, 9415, 9416, 9417, 9418, 9419, 9420, 9421, 9422, 9423, 9424, 9425, 9426, 9427, 9428, 9429, 9430, 9431, 9432], '**3 Method**': [9412, 9413, 9414, 9415, 9416], '**4 Experiments**': [9417, 9418, 9419, 9420, 9421, 9422, 9423, 9424], '**5 Conclusion**': [9425], '**Broader Impact**': [9426], '**A Details of MiT Series**': [9427], '**B More Qualitative Results on Mask Predictions**': [9428], '**C More Visualization on Effective Receptive Field**': [9429], '**D More Comparison of DeeplabV3+ and SegFormer on Cityscapes-C**': [9430, 9431, 9432]} 

example_headers = [('**4 Experiments**',), (), (), ('**3 Method**',), ('**SegFormer: Simple and Efficient Design for Semantic** **Segmentation with Transformers**', '**Abstract**'), ('**SegFormer: Simple and Efficient Design for Semantic** **Segmentation with Transformers**', '**1 Introduction**')] 


get_chunk_ids_by_bottom_level_headers(chunk_ids_by_header=chunk_ids_by_header, 
                                      example_headers=example_headers
                                      )
```

# Single Query Example: 

```python
# Collect user input and options dynamically from the front-end
# For demonstration, we're simulating user input here. In your front-end, these values will be collected from user interactions.

# Example user inputs (to be passed from the front-end)
DESIRED_HISTORY_WINDOW_SIZE = 3
DESIRED_CONTEXT_CHUNKS_TOP_K = 5

RAG_SWITCH = True  # Front-end user input for enabling RAG
HISTORY_SWITCH = False  # Front-end user input for enabling history usage
BM25_SWITCH = True  # Front-end user input for enabling BM25 search
TOPIC_RETRIEVAL_SWITCH = False  # Front-end user input for enabling topic retrieval

HISTORIC_QUERY_SIMILARITY_THRESHOLD = 0.3  # Front-end user input for similarity threshold

QUERY_TEXT = 'What is a neural radience field NERF?'
#QUERY_TEXT = input("Enter your question (or type 'exit' to quit): ")  # Get the query text from user input

# Your processing logic goes here (e.g., semantic search, BM25, RAG)
print(f"Processing query: {QUERY_TEXT}")
print(f"RAG Switch: {RAG_SWITCH}")
print(f"History Switch: {HISTORY_SWITCH}")
print(f"BM25 Switch: {BM25_SWITCH}")
print(f"Topic Retrieval Switch: {TOPIC_RETRIEVAL_SWITCH}")

# This incriment signals the initialization of a new query state placement matters
query_num += 1

# Call the `process_query()` function with the inputs
user_query_state_history[query_num] = process_query(
    DESIRED_HISTORY_WINDOW_SIZE, 
    DESIRED_CONTEXT_CHUNKS_TOP_K, 
    RAG_SWITCH, 
    HISTORY_SWITCH, 
    BM25_SWITCH, 
    TOPIC_RETRIEVAL_SWITCH, 
    HISTORIC_QUERY_SIMILARITY_THRESHOLD, 
    QUERY_TEXT, 
    user_query_state_history,
    query_num, 
    QDRANT_CLIENT, 
    CHUNK_COLLECTION,
    HISTORY_COLLECTION,
    LLM_MODEL,
    LLM_SYSTEM_PROMPT,
    DEVICE,
    EMBEDDING_MODEL, 
    TOKENIZER,
    BM25_SEARCH_FUNCTION
)

save_chat_json(user_query_state_history[query_num])

print(f"Processed query {query_num}: {QUERY_TEXT}")
print(f"Response: {user_query_state_history[query_num]['response_text']}")

# After processing the query, you might want to save the results to disk or perform further actions
save_chat_pkl_by_embedding(user_query_state_history, 
                            embedded_path='user_output/user_embedded_history.pkl',
                            non_embedded_path='user_output/user_non_embedded_history.pkl'
)

# Optionally, you could display or log the results of the query here for debugging purposes:
print(f"Processed query {query_num}: {QUERY_TEXT}")
print(f"Response: {user_query_state_history[query_num]['response_text']}")
```