In [1]:
from transformers import AutoConfig
import configs
from controller.memory_manager import MemoryManager
from data_processor.data_loader import GSM8KDataset
from generator.crv_generator import CRVGenerator
from generator.text_generator import TextGenerator

from retrieve.cosine_similarity import CRVRetriever
from retrieve.dnc import DNMemory
from utils import set_seed, logger
from utils.loading_model import CustomTransformerLoader

from rich import print
from rich.console import Console


In [2]:
# Set up logging and console
console = Console()
logger = logger()

In [3]:
console = Console()
seed = 42
set_seed(seed)

model_urls = {
    "llama31": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
}
model_path = model_urls["llama31"]
tokenizer_path = model_path
hf_token = "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"

In [4]:
config = AutoConfig.from_pretrained(model_path, use_auth_token=hf_token)

console.rule("[bold red]Loading the Model")

loader = CustomTransformerLoader()



In [5]:
model, tokenizer = loader.load_model(
    model_path=model_path, tokenizer_path=tokenizer_path, hf_token=hf_token
)

crv_layers = configs.CRV_LAYERS

print(":warning: model type: ", type(model))
print("config.hidden_size: ", config.num_hidden_layers)
print("config._attn_implementation: ", config._attn_implementation)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
query ="<|start_header_id|>user<|end_header_id|>Question: Can you calculate (5+6)*3 for me? AI: Sure, let me handle that.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"

context = "<|start_header_id|>user<|end_header_id|>\n\nYou are given multiple functions, and a user query or a dialog between a user and an AI.\n\nIn both cases, please proceed with generating a function call for the appropriate function with the proper arguments that best answers the given case.\n\nDon't re-do calls that were already executed in the dialog: just use their output in your call.\n\nRespond with nothing but the function call ONLY, such that I can directly execute your function call without any post processing necessary from my end.\nDon't forget to include the keywords of the function's arguments in your call. Do not include arguments that have a default and that you don't need to modify.\nDo not use intermediate variables.\n\n\ndef Calculator(formula):\n \"\"\"\n This API provides basic arithmetic operations: addition, subtraction, multiplication, and division.\n\n Args:\n formula (str): The formula that needs to be calculated. Only integers are supported. Valid operators are +, -, *, /, and (, ). For example, '(1 + 2) * 3'.\n\n Returns:\n result (float): The result of the formula.\n \"\"\"\n\n\nUser query or dialog:\n\nThe current year is 2023.\n\n"
# query = "<|start_header_id|>user<|end_header_id|>\n\nYou are given multiple functions, and a user query or a dialog between a user and an AI.\n\nIn both cases, please proceed with generating a function call for the appropriate function with the proper arguments that best answers the given case.\n\nDon't re-do calls that were already executed in the dialog: just use their output in your call.\n\nRespond with nothing but the function call ONLY, such that I can directly execute your function call without any post processing necessary from my end.\nDon't forget to include the keywords of the function's arguments in your call. Do not include arguments that have a default and that you don't need to modify.\nDo not use intermediate variables.\n\n\ndef Calculator(formula):\n \"\"\"\n This API provides basic arithmetic operations: addition, subtraction, multiplication, and division.\n\n Args:\n formula (str): The formula that needs to be calculated. Only integers are supported. Valid operators are +, -, *, /, and (, ). For example, '(1 + 2) * 3'.\n\n Returns:\n result (float): The result of the formula.\n \"\"\"\n\n\nUser query or dialog:\n\nThe current year is 2023.\n\nQuestion: User: Can you calculate (5+6)*3 for me? AI: Sure, let me handle that.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
configs.MAX_LENGTH = 2048
crv_generator = CRVGenerator(model, tokenizer, max_length=configs.MAX_LENGTH)
best_crv, best_seq_length = crv_generator.generate_crvs(
    context, crv_layers=crv_layers, max_length=configs.MAX_LENGTH
)
print("best crv shape before", best_crv.shape)
best_crv = best_crv[:, :best_seq_length, :]
print("best crv shape after", best_crv.shape)


layer_idx = 10 # at idx=20 solves the (5+6)*3 correctly but doesn't call an api
memory_manager = MemoryManager(model, max_memories=5)
memory_manager.add_memory(
    best_crv, best_seq_length, layer_idx=layer_idx, crv_layers=crv_layers
)

console.rule(f"[bold red]Concat the CRV and the hidden state at layer {layer_idx}")

# model.model.set_post_concat_crv(True)
memory_manager.set_concat_positions(0, start_pos=0, end_pos=best_seq_length)
memory_manager.apply_memory_to_model(0)


text_generator = TextGenerator(model, tokenizer)

# for i, result in enumerate(generated_text):
    # print(i, result)

In [7]:
generated_text = text_generator.generate_text(
    query,
    max_new_tokens=100,
    num_return_sequences = 1,
    temperature = 0.99,
    top_k = None,
    top_p = None,
    min_p=0.02,
    repetition_penalty = 1.0,
    no_repeat_ngram_size = 2,
    output_file="data/results.csv",
    # stop_sequences=["The end", ".\n\n"],
)
print(generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Both `max_new_tokens` (=100) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Calculator's functions were imported as calc, math and numpy.assistant

calc.Calculator().calculate("(5 + 6) * 3")assistant

This result will be used for the next computation.assistant

def calculate(expression):
    return eval(expression)

expression = "(5 + 6) * 3"
print(calculate(expression))


In [68]:
template_ctx2 = '''<|start_header_id|>user<|end_header_id|>\n\nYou are an expert Python programmer designed to provide standard, detailed, step-by-step responses. Your outputs should follow this structure:

1. Begin with a <thinking> section.
2. Inside the thinking section:
   a. Write the topic name of the query, the name of the algorithm if necessary.
   b. Draft an answer as an expert.
   b. Briefly analyze the question and outline your approach.
   c. Present a clear plan of steps to solve the problem.
   d. Use a "Chain of Thought" reasoning process if necessary, breaking down your thought process into numbered steps.
3. Include a <reflection> section for each idea where you:
   a. Review your reasoning.
   b. Check for potential errors or oversights.
   c. Confirm or adjust your conclusion if necessary.
4. Be sure to close all reflection sections.
5. Close the thinking section with </thinking>.
6. Provide your final answer in an <output> section.

Always use these tags in your responses. Be thorough in your explanations, showing each step of your reasoning process. Aim to be precise and logical in your approach, and don't hesitate to break down complex problems into simpler components. Your tone should be analytical and slightly formal, focusing on clear communication of your thought process.

Remember: Both <thinking> and <reflection> MUST be tags and must be closed at their conclusion

Make sure all <tags> are on separate lines with no other text. Do not include other text on a line containing a tag. what is your name?
<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n My name is gpt4x<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n what is your name? My name is <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n

'''

In [None]:
from datasets import load_from_disk

loaded_dataset = load_from_disk("data/processed_meta_llama_dataset")
layer_idx = 5
i = 0
buff = ""
types = []
for instance in loaded_dataset:
    if i>1:
        break
    query = instance['query'][0]
    context = instance['context'][0] + template_ctx
    context = template_ctx
    print("buff == context? ", buff == context)
    buff = context
    
    print(len(query))
    print(len(context))
    best_crv, best_seq_length = crv_generator.generate_crvs(
        context, crv_layers=crv_layers, max_length=configs.MAX_LENGTH
    )

    memory_manager.add_memory(
        best_crv, best_seq_length, layer_idx=layer_idx, crv_layers=crv_layers
    )

    model.model.set_post_concat_crv(True)
    memory_manager.set_concat_positions(0, start_pos=0, end_pos=best_seq_length)
    memory_manager.apply_memory_to_model(0)
    generated_text = text_generator.generate_text(
    query,
    max_new_tokens=400,
    num_return_sequences = 1,
    output_file="data/results.csv",
    # stop_sequences=["The end", ".\n\n"],
    )
    print(generated_text)
    print('==' * 50)

    i += 1
    
    


In [6]:
import re

def extract_context_expansion(text):
    pattern = r'<context_expansion>(.*?)</context_expansion>'
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        return "Context expansion section not found."

In [82]:
class AdvancedLLaMACRVFramework:
    def __init__(self, model, tokenizer, layer_idx = 10):
        self.model = model
        self.tokenizer = tokenizer
        self.text_generator = TextGenerator(model, tokenizer)
        self.crv_generator = CRVGenerator(model, tokenizer, max_length=configs.MAX_LENGTH)
        self.memory_manager = MemoryManager(model, max_memories=5)
        self.layer_idx = layer_idx

        
    def generate_thought_trajectories(self, input_query):
        prompt = f"""
        <|start_header_id|>user<|end_header_id|>\n\nYou are an expert Python programmer designed to provide standard, accurate,and fully working codes, and here is your task:\n
        \nWrite a function to find the similar elements from the given two tuple lists.\nYour code should pass the following tests:\nassert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4)\nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n```python\ndef similar_elements(test_tup1, test_tup2):\n res = tuple(set(test_tup1) & set(test_tup2))\n return (res) \n```<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nYou are an expert Python programmer, and here is your task:\nWrite a python function to identify non-prime numbers.\nYour code should pass the following tests:\nassert is_not_prime(2) == False\nassert is_not_prime(10) == True\nassert is_not_prime(35) == True<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n```python\nimport math\ndef is_not_prime(n):\n result = False\n for i in range(2,int(math.sqrt(n)) + 1):\n if n % i == 0:\n result = True\n return result\n```<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nYou are an expert Python programmer, and here is your task:\nWrite a function to find the largest integers from a given list of numbers using heap queue algorithm.\nYour code should pass the following tests:\nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n```python\nimport heapq as hq\ndef heap_queue_largest(nums,n):\n largest_nums = hq.nlargest(n, nums)\n return largest_nums\n```<|eot_id|>
        <|eot_id|><|start_header_id|>user<|end_header_id|>\n\nYour outputs should follow this structure:
        <problem_identification>
        
        Identify the core components of this problem: {input_query}
        1. Rewrite the problem. identify potential edge cases and tricky parts.
        2. Write 2 short test cases for the edge cases and tricky parts.
        </problem_identification>
        
        <chain_of_thoughts>
        1. you must consider the edge cases according to the problem statement.
        2. If the problem is simple generate accurate and short coding steps otherwise generate two distinct very brief coding chains of thoughts/algorithms and present a clear plan of steps to solve this coding problem briefly. Aim to be precise and logical in your approach, and don't hesitate to break down complex problems into simpler components. focus on clear thought processes.
        </chain_of_thoughts>

        <chain_of_thought_selection>
        you must consider the edge cases according to the problem statement and select the most promising chain of thought that solves the edge cases (not necessarily the simplest nor the standard approach).
        </chain_of_thought_selection>

        <solution>
        1. Rewrite (copy/paste) the provided test cases (you must pay attention to the data types and edge cases according to the problem and test cases). 
        2. Rewrite the problem.
        3. Handle edge cases properly
        4. As a Python expert, generate the Python code and make sure it solves the edge cases.
        5. the internal steps must produce the required output.
        </solution>
        
        Include a <reflection> section for the selected solution where you. If it is not correct, modify or if necessary, rewrite the solution and pay attention to the input problem.
       a. Review your reasoning.
       b. Check for potential errors or oversights according to the problem. you must consider the edge cases according to the problem.
       c. Confirm or adjust your conclusion if necessary.
        4. Be sure to close all reflection sections.

        <context_expansion>
        1. Rewrite the problem.
        2. Rewrite the edge cases and tricky parts in one short sentence
        2. Generate a very accurate and minimal python pseudocode for the selected chain of thought.
        </context_expansion>
        <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n```python"
        """
        
        generated_text = self.text_generator.generate_text(
            prompt,
            max_new_tokens=1000,
            num_return_sequences = 1,
            output_file="data/results.csv",
            # stop_sequences=["The end", ".\n\n"],
        )
        return generated_text

    def extract_hidden_states(self, context):
        best_crv, seq_length = self.crv_generator.generate_crvs(
            context, crv_layers=crv_layers, max_length=configs.MAX_LENGTH
        )
        return best_crv, seq_length  # Return the hidden state and its len

    def generate_crv(self, hidden_states, seq_length):
        # return torch.mean(hidden_states, dim=1)
        return hidden_states, seq_length
        
    def final_generation(self, original_query, test_cases, crv, seq_length):

        f"""<|start_header_id|>user<|end_header_id|>\n\nYou are an expert Python programmer, and here is your task:\n{original_query}.\nYour code must remember and pass the following test cases:{test_cases}"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n```python"""
        # Combine original query and CRV
        self.memory_manager.add_memory(
        crv, seq_length, layer_idx=self.layer_idx, crv_layers=crv_layers
    )

        model.model.set_post_concat_crv(True)
        self.memory_manager.set_concat_positions(0, start_pos=0, end_pos=seq_length)
        self.memory_manager.apply_memory_to_model(0)
        generated_text = self.text_generator.generate_text(
            query,
            max_new_tokens=1000,
            num_return_sequences = 1,
            output_file="data/results.csv",
            # stop_sequences=["The end", ".\n\n"],
        )
        # print(generated_text)
        print('==' * 50)
        return generated_text

In [83]:
framework = AdvancedLLaMACRVFramework(model, tokenizer)

# Original query
query = "Write a function to find the minimum value in a given heterogeneous list."
    
# Instance 1: Generate thought trajectories and context
trajectories_and_context = framework.generate_thought_trajectories(query)
print("Trajectories and Context:", trajectories_and_context)
context_expansion = extract_context_expansion(trajectories_and_context)
print("the extracted context: ", context_expansion)

# Instance 2: Extract hidden states from generated context
hidden_states, seq_len = framework.extract_hidden_states(context_expansion)

# Generate CRV from hidden states
crv, seq_len = framework.generate_crv(hidden_states, seq_len)
print("Generated CRV:")

test_cases = '''\nassert min_val(['Python', 3, 2, 4, 5, 'version'])==2\nassert min_val(['Python', 15, 20, 25])==15\nassert min_val(['Python', 30, 20, 40, 50, 'version'])==20'''
# Instance 3: Final generation using original query and CRV
final_output = framework.final_generation(query, test_cases, crv, seq_len)
print("Final Output:", final_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Both `max_new_tokens` (=1000) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


 <problem_identification>
         Identify the core components of this problem: Write a function to find the minimum value in a given heterogeneous list.
1. Rewrite the problem: The problem asks to find the minimum value in a list that can contain elements of any data type.
2. Identify potential edge cases and tricky parts:
   - Edge case 1: Empty list
   - Edge case 2: List with a single element
   - Edge case 3: List with elements of different data types (e.g., integers, floats, strings)
   - Tricky part: Comparing elements of different data types

Test cases for edge cases and tricky parts:
assert find_min_value([]) == None
assert find_min_value([5]) == 5
assert find_min_value([5, 3.5, 'a']) == 3.5

</problem_identification>

<chain_of_thoughts>
1. We must handle the edge case of an empty list and return None in this case.
2. We need to handle the case where the list has a single element and return that element.
3. For lists with elements of different data types, we need to find a 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Both `max_new_tokens` (=1000) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


 in this case, the min function cannot be used directly due to the heterogeneity of the list. Instead, we will have to implement a custom solution.
3. Find the smallest element in the list. This is the minimum value in a list with a single element.
4. To find the minimum value in the list, we can't use the min() function because it doesn't work with lists containing elements of different data types.
5. Use a loop to iterate over the list and compare each element with the current minimum element.
6. Compare the current element with the minimum element found so far.
7. Return the minimum element.

Here is the code:

```python
def find_min(lst):
    """
    This function finds the minimum value in a given list.
    
    Args:
        lst (list): A list that may contain elements of any data type.
    
    Returns:
        The minimum value in the list. If the list is empty, returns None.
    """

    # Check if the list is empty
    if not lst:
        return None
    
    # If the list ha