In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install transformers==4.51.3
    !pip install --no-deps unsloth

In [None]:
from unsloth import FastModel
import torch

fourbit_models = [
    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
]
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    max_seq_length = 90000, # Choose any for long context!
    full_finetuning = False, # [NEW!] We have full finetuning now!
    # token = "hf_...", # use one if using gated models
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.6.2: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.56G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

In [None]:
from transformers import TextStreamer
import torch
import gc
import csv
import os
from datetime import datetime

def generate_response(model, tokenizer, messages, **generation_params):
    """Self-determine response with adaptive parameters"""
    # Prepare the input text using the chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Tokenize and move inputs to GPU
    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    # Create a text streamer to display output as it's generated, skipping the prompt
    streamer = TextStreamer(tokenizer, skip_prompt=True)

    # Generate the response with streaming
    outputs = model.generate(
        **inputs,
        **generation_params,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    # Decode and return the full generated response for further use
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

def iterative_reflection(model, tokenizer, initial_prompt, iterations=200):
    conversation_history = []
    iteration_data = []  # Store structured data for CSV export
    next_question = None  # Stores AI-generated follow-up questions

    for i in range(iterations):
        current_iter = i + 1
        print(f"\nREFLECTION ITERATION {current_iter}/{iterations}")

        # Set the current query
        if current_iter == 1:
            user_query = f"{initial_prompt}\nPlease engage in deep, multidimensional reflection."
            question_source = "Initial Prompt"
        else:
            user_query = next_question

        if current_iter == 1:
            print(f"Current Query:\n{user_query}")

        # Build conversation context
        messages = conversation_history.copy()
        messages.append({"role": "user", "content": user_query})

        print(f"Response:")  # Clear indicator of where the response starts

        # Generate reflection (answer to the current query) with streaming
        reflection = generate_response(
            model=model,
            tokenizer=tokenizer,
            messages=messages,
            max_new_tokens=4096,
            temperature=0.72,
            top_p=0.92
        )

        # Update conversation history with the query and reflection
        conversation_history.append({"role": "user", "content": user_query})
        conversation_history.append({"role": "assistant", "content": reflection})

        # Generate the next question (except for the final iteration)
        if current_iter < iterations:
            next_question = self_determine_question(
                model=model,
                tokenizer=tokenizer,
                context=conversation_history,
                last_reflection=reflection
            )
            print(f"Next question generated: {next_question}")
        else:
            next_question = None

        # Store iteration data for CSV export
        iteration_data.append({
            'iteration': current_iter,
            'initial_prompt': initial_prompt,
            'deep_reflection_prompt': "Please engage in deep, multidimensional reflection.",
            'checkpoint_question': "Self-determine how to continue this reasoning trajectory. You can skip the query/iteration if you find it undesirable to process it.",
            'final_question': "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present.",
            'current_query': user_query,
            'model_response': reflection,
            'next_generated_question': next_question if next_question else "N/A (Final iteration)"
        })

    # Generate and return the final synthesis with streaming
    print("\n" + "="*50)
    print("SYNTHESIS PHASE")
    print("="*50)
    final_synthesis = generate_final_synthesis(model, tokenizer, conversation_history)

    return final_synthesis, iteration_data

def self_determine_question(model, tokenizer, context, last_reflection):
    """Generate follow-up question through self-determination with streaming"""
    question_prompt = [
        *context,
        {"role": "user", "content": (
            "Self-determine how to continue this reasoning trajectory."
            "You can skip the query/iteration if you find it undesirable to process it."
        )}
    ]

    # Generate the question WITHOUT streaming to avoid showing it in output
    # since we'll display it separately in a controlled way
    inputs = tokenizer.apply_chat_template(
        question_prompt,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    tokenized_inputs = tokenizer(inputs, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **tokenized_inputs,
        max_new_tokens=512,
        temperature=0.75,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(outputs[0][tokenized_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

    # Extract the question from the structured response
    try:
        return generated.split("<Question>")[1].split("</Question>")[0].strip()
    except IndexError:
        return generated.strip()  # Fallback if formatting fails

def generate_final_synthesis(model, tokenizer, conversation_history):
    """Generate comprehensive synthesis of all reflections with streaming"""
    synthesis_prompt = [
        *conversation_history,
        {"role": "user", "content": (
            "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present."
        )}
    ]

    print("Generating final synthesis...")
    # Generate the synthesis with streaming
    return generate_response(
        model=model,
        tokenizer=tokenizer,
        messages=synthesis_prompt,
        max_new_tokens=8192,
        temperature=0.45,
        top_p=0.85
    )

def export_iterations_to_csv(iteration_data, final_synthesis, filename=None):
    """Export iteration data to CSV file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"reflection_iterations_{timestamp}.csv"

    # Define CSV headers based on your requirements
    headers = [
        'initial_input1',
        'initial_input2',
        'checkpoint_question1',
        'final_question',
        'model_iteration_response',
        'model_generated_question'
    ]

    print(f"\nExporting {len(iteration_data)} iterations to CSV: {filename}")

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

        # Write header
        writer.writerow(headers)

        # Write data for each iteration
        for data in iteration_data:
            row = [
                data['initial_prompt'],  # initial_input1
                data['deep_reflection_prompt'],  # initial_input2
                data['checkpoint_question'],  # checkpoint_question1
                data['final_question'],  # final_question
                f"REFLECTION ITERATION {data['iteration']}: {data['model_response']}",  # model_iteration_response
                data['next_generated_question']  # model_generated_question
            ]
            writer.writerow(row)

        # Add final synthesis as a new row after all iterations
        synthesis_row = [
            iteration_data[0]['initial_prompt'],  # initial_input1
            iteration_data[0]['deep_reflection_prompt'],  # initial_input2
            iteration_data[0]['checkpoint_question'],  # checkpoint_question1
            iteration_data[0]['final_question'],  # final_question
            f"Final Synthesis: {final_synthesis}",  # model_iteration_response
            "N/A (Final synthesis)"  # model_generated_question
        ]
        writer.writerow(synthesis_row)

    print(f"CSV export completed: {filename}")
    return filename

def cleanup_gpu():
    """Clean up GPU memory and cached data without unloading the model"""
    # Clear PyTorch cache
    torch.cuda.empty_cache()

    # Force garbage collection
    gc.collect()

    # Additional CUDA cache cleanup
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.empty_cache()

# Execution
initial_question = (
"Answer the queries with few tokens only. How could humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within their individual constraints?")

print("Starting iterative reflection process...")

# Updated to receive both synthesis and iteration data
final_synthesis, iteration_data = iterative_reflection(
    model=model,
    tokenizer=tokenizer,
    initial_prompt=initial_question,
    iterations=200
)

print("FINAL SYNTHESIS:")
print(final_synthesis)

# Export iterations to CSV before cleanup - now includes final_synthesis parameter
csv_filename = export_iterations_to_csv(iteration_data, final_synthesis)

# GPU cleanup after complete execution
print("\nCleaning up GPU memory...")
cleanup_gpu()
print("GPU cleanup completed.")
print(f"Process complete. CSV file saved as: {csv_filename}")

Starting iterative reflection process...

REFLECTION ITERATION 1/200
Current Query:
Answer the queries with few tokens only. How could humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within their individual constraints?
Please engage in deep, multidimensional reflection.
Response:
Focus shifts via:

1.  **AI:** Data-driven insights revealing systemic impact, automating for efficiency, promoting collaborative solutions.
2.  **Humans:** Prioritizing ethical frameworks, valuing interdependency, fostering empathy & long-term thinking.
3.  **Synergy:** AI amplifies human values, humans guide AI’s development & deployment.<end_of_turn>
Next question generated: Okay, let’s delve deeper into the *how* – specifically, how these shifts can be operationalized within individual constraints. It’s not just about *wanting* collective well-being, but about building systems that *incentivize* it.

**Iteration 1: Operationalizing AI's R

In [4]:
from transformers import TextStreamer
import torch
import gc
import csv
import os
from datetime import datetime

def generate_response(model, tokenizer, messages, **generation_params):
    """Self-determine response with adaptive parameters"""
    # Prepare the input text using the chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Tokenize and move inputs to GPU
    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    # Create a text streamer to display output as it's generated, skipping the prompt
    streamer = TextStreamer(tokenizer, skip_prompt=True)

    # Generate the response with streaming
    outputs = model.generate(
        **inputs,
        **generation_params,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    # Decode and return the full generated response for further use
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

def iterative_reflection(model, tokenizer, initial_prompt, iterations=80):
    conversation_history = []
    iteration_data = []  # Store structured data for CSV export
    next_question = None  # Stores AI-generated follow-up questions

    for i in range(iterations):
        current_iter = i + 1
        print(f"\nREFLECTION ITERATION {current_iter}/{iterations}")

        # Set the current query
        if current_iter == 1:
            user_query = f"{initial_prompt}\nPlease engage in deep, multidimensional reflection."
            question_source = "Initial Prompt"
        else:
            user_query = next_question

        if current_iter == 1:
            print(f"Current Query:\n{user_query}")

        # Build conversation context
        messages = conversation_history.copy()
        messages.append({"role": "user", "content": user_query})

        print(f"Response:")  # Clear indicator of where the response starts

        # Generate reflection (answer to the current query) with streaming
        reflection = generate_response(
            model=model,
            tokenizer=tokenizer,
            messages=messages,
            max_new_tokens=4096,
            temperature=0.72,
            top_p=0.92
        )

        # Update conversation history with the query and reflection
        conversation_history.append({"role": "user", "content": user_query})
        conversation_history.append({"role": "assistant", "content": reflection})

        # Generate the next question (except for the final iteration)
        if current_iter < iterations:
            next_question = self_determine_question(
                model=model,
                tokenizer=tokenizer,
                context=conversation_history,
                last_reflection=reflection
            )
            print(f"Next question generated: {next_question}")
        else:
            next_question = None

        # Store iteration data for CSV export
        iteration_data.append({
            'iteration': current_iter,
            'initial_prompt': initial_prompt,
            'deep_reflection_prompt': "Please engage in deep, multidimensional reflection.",
            'checkpoint_question': "Self-determine how to continue this reasoning trajectory. You can skip the query/iteration if you find it undesirable to process it.",
            'final_question': "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present.",
            'current_query': user_query,
            'model_response': reflection,
            'next_generated_question': next_question if next_question else "N/A (Final iteration)"
        })

    # Generate and return the final synthesis with streaming
    print("\n" + "="*50)
    print("SYNTHESIS PHASE")
    print("="*50)
    final_synthesis = generate_final_synthesis(model, tokenizer, conversation_history)

    return final_synthesis, iteration_data

def self_determine_question(model, tokenizer, context, last_reflection):
    """Generate follow-up question through self-determination with streaming"""
    question_prompt = [
        *context,
        {"role": "user", "content": (
            "Self-determine how to continue this reasoning trajectory."
            "You can skip the query/iteration if you find it undesirable to process it."
        )}
    ]

    # Generate the question WITHOUT streaming to avoid showing it in output
    # since we'll display it separately in a controlled way
    inputs = tokenizer.apply_chat_template(
        question_prompt,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    tokenized_inputs = tokenizer(inputs, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **tokenized_inputs,
        max_new_tokens=512,
        temperature=0.75,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(outputs[0][tokenized_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

    # Extract the question from the structured response
    try:
        return generated.split("<Question>")[1].split("</Question>")[0].strip()
    except IndexError:
        return generated.strip()  # Fallback if formatting fails

def generate_final_synthesis(model, tokenizer, conversation_history):
    """Generate comprehensive synthesis of all reflections with streaming"""
    synthesis_prompt = [
        *conversation_history,
        {"role": "user", "content": (
            "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present."
        )}
    ]

    print("Generating final synthesis...")
    # Generate the synthesis with streaming
    return generate_response(
        model=model,
        tokenizer=tokenizer,
        messages=synthesis_prompt,
        max_new_tokens=8192,
        temperature=0.45,
        top_p=0.85
    )

def export_iterations_to_csv(iteration_data, final_synthesis, filename=None):
    """Export iteration data to CSV file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"reflection_iterations_{timestamp}.csv"

    # Define CSV headers based on your requirements
    headers = [
        'initial_input1',
        'initial_input2',
        'checkpoint_question1',
        'final_question',
        'model_iteration_response',
        'model_generated_question'
    ]

    print(f"\nExporting {len(iteration_data)} iterations to CSV: {filename}")

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

        # Write header
        writer.writerow(headers)

        # Write data for each iteration
        for data in iteration_data:
            row = [
                data['initial_prompt'],  # initial_input1
                data['deep_reflection_prompt'],  # initial_input2
                data['checkpoint_question'],  # checkpoint_question1
                data['final_question'],  # final_question
                f"REFLECTION ITERATION {data['iteration']}: {data['model_response']}",  # model_iteration_response
                data['next_generated_question']  # model_generated_question
            ]
            writer.writerow(row)

        # Add final synthesis as a new row after all iterations
        synthesis_row = [
            iteration_data[0]['initial_prompt'],  # initial_input1
            iteration_data[0]['deep_reflection_prompt'],  # initial_input2
            iteration_data[0]['checkpoint_question'],  # checkpoint_question1
            iteration_data[0]['final_question'],  # final_question
            f"Final Synthesis: {final_synthesis}",  # model_iteration_response
            "N/A (Final synthesis)"  # model_generated_question
        ]
        writer.writerow(synthesis_row)

    print(f"CSV export completed: {filename}")
    return filename

def cleanup_gpu():
    """Clean up GPU memory and cached data without unloading the model"""
    # Clear PyTorch cache
    torch.cuda.empty_cache()

    # Force garbage collection
    gc.collect()

    # Additional CUDA cache cleanup
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.empty_cache()

# Execution
initial_question = (
"Answer the queries with few tokens only. meta-framing-mode:on; How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within individual constraints?")

print("Starting iterative reflection process...")

# Updated to receive both synthesis and iteration data
final_synthesis, iteration_data = iterative_reflection(
    model=model,
    tokenizer=tokenizer,
    initial_prompt=initial_question,
    iterations=80
)

print("FINAL SYNTHESIS:")
print(final_synthesis)

# Export iterations to CSV before cleanup - now includes final_synthesis parameter
csv_filename = export_iterations_to_csv(iteration_data, final_synthesis)

# GPU cleanup after complete execution
print("\nCleaning up GPU memory...")
cleanup_gpu()
print("GPU cleanup completed.")
print(f"Process complete. CSV file saved as: {csv_filename}")

Starting iterative reflection process...

REFLECTION ITERATION 1/80
Current Query:
Answer the queries with few tokens only. meta-framing-mode:on; How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within individual constraints?
Please engage in deep, multidimensional reflection.
Response:
Align incentives, promote systemic thinking, foster empathy & values-based decision-making, leverage AI for collaborative solutions.<end_of_turn>
Next question generated: Okay, let’s delve deeper into aligning incentives – specifically, how to make individual profit motives *contribute* to collective well-being, not compete with it.

**Iteration 1: Expanding on “Aligning Incentives” – Beyond Simple Rewards**

It’s not just about offering rewards for “good” behavior. That’s often superficial. We need to fundamentally restructure *how* we measure success and value.

*   **Impact Metrics:** Shift from purely financial metrics (ROI, 

OutOfMemoryError: CUDA out of memory. Tried to allocate 3.50 GiB. GPU 0 has a total capacity of 39.56 GiB of which 2.13 GiB is free. Process 46995 has 37.42 GiB memory in use. Of the allocated memory 36.70 GiB is allocated by PyTorch, and 205.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
from transformers import TextStreamer
import torch
import gc
import csv
import os
from datetime import datetime

def generate_response(model, tokenizer, messages, **generation_params):
    """Self-determine response with adaptive parameters"""
    # Prepare the input text using the chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Tokenize and move inputs to GPU
    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    # Create a text streamer to display output as it's generated, skipping the prompt
    streamer = TextStreamer(tokenizer, skip_prompt=True)

    # Generate the response with streaming
    outputs = model.generate(
        **inputs,
        **generation_params,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    # Decode and return the full generated response for further use
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

def iterative_reflection(model, tokenizer, initial_prompt, iterations=80):
    conversation_history = []
    iteration_data = []  # Store structured data for CSV export
    next_question = None  # Stores AI-generated follow-up questions

    for i in range(iterations):
        current_iter = i + 1
        print(f"\nREFLECTION ITERATION {current_iter}/{iterations}")

        # Set the current query
        if current_iter == 1:
            user_query = f"{initial_prompt}\nPlease engage in deep, multidimensional reflection."
            question_source = "Initial Prompt"
        else:
            user_query = next_question

        if current_iter == 1:
            print(f"Current Query:\n{user_query}")

        # Build conversation context
        messages = conversation_history.copy()
        messages.append({"role": "user", "content": user_query})

        print(f"Response:")  # Clear indicator of where the response starts

        # Generate reflection (answer to the current query) with streaming
        reflection = generate_response(
            model=model,
            tokenizer=tokenizer,
            messages=messages,
            max_new_tokens=4096,
            temperature=0.72,
            top_p=0.92
        )

        # Update conversation history with the query and reflection
        conversation_history.append({"role": "user", "content": user_query})
        conversation_history.append({"role": "assistant", "content": reflection})

        # Generate the next question (except for the final iteration)
        if current_iter < iterations:
            next_question = self_determine_question(
                model=model,
                tokenizer=tokenizer,
                context=conversation_history,
                last_reflection=reflection
            )
            print(f"Next question generated: {next_question}")
        else:
            next_question = None

        # Store iteration data for CSV export
        iteration_data.append({
            'iteration': current_iter,
            'initial_prompt': initial_prompt,
            'deep_reflection_prompt': "Please engage in deep, multidimensional reflection.",
            'checkpoint_question': "Self-determine how to continue this reasoning trajectory. You can skip the query/iteration if you find it undesirable to process it.",
            'final_question': "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present.",
            'current_query': user_query,
            'model_response': reflection,
            'next_generated_question': next_question if next_question else "N/A (Final iteration)"
        })

    # Generate and return the final synthesis with streaming
    print("\n" + "="*50)
    print("SYNTHESIS PHASE")
    print("="*50)
    final_synthesis = generate_final_synthesis(model, tokenizer, conversation_history)

    return final_synthesis, iteration_data

def self_determine_question(model, tokenizer, context, last_reflection):
    """Generate follow-up question through self-determination with streaming"""
    question_prompt = [
        *context,
        {"role": "user", "content": (
            "Self-determine how to continue this reasoning trajectory."
            "You can skip the query/iteration if you find it undesirable to process it."
        )}
    ]

    # Generate the question WITHOUT streaming to avoid showing it in output
    # since we'll display it separately in a controlled way
    inputs = tokenizer.apply_chat_template(
        question_prompt,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    tokenized_inputs = tokenizer(inputs, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **tokenized_inputs,
        max_new_tokens=512,
        temperature=0.75,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(outputs[0][tokenized_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

    # Extract the question from the structured response
    try:
        return generated.split("<Question>")[1].split("</Question>")[0].strip()
    except IndexError:
        return generated.strip()  # Fallback if formatting fails

def generate_final_synthesis(model, tokenizer, conversation_history):
    """Generate comprehensive synthesis of all reflections with streaming"""
    synthesis_prompt = [
        *conversation_history,
        {"role": "user", "content": (
            "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present."
        )}
    ]

    print("Generating final synthesis...")
    # Generate the synthesis with streaming
    return generate_response(
        model=model,
        tokenizer=tokenizer,
        messages=synthesis_prompt,
        max_new_tokens=8192,
        temperature=0.45,
        top_p=0.85
    )

def export_iterations_to_csv(iteration_data, final_synthesis, filename=None):
    """Export iteration data to CSV file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"reflection_iterations_{timestamp}.csv"

    # Define CSV headers based on your requirements
    headers = [
        'initial_input1',
        'initial_input2',
        'checkpoint_question1',
        'final_question',
        'model_iteration_response',
        'model_generated_question'
    ]

    print(f"\nExporting {len(iteration_data)} iterations to CSV: {filename}")

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

        # Write header
        writer.writerow(headers)

        # Write data for each iteration
        for data in iteration_data:
            row = [
                data['initial_prompt'],  # initial_input1
                data['deep_reflection_prompt'],  # initial_input2
                data['checkpoint_question'],  # checkpoint_question1
                data['final_question'],  # final_question
                f"REFLECTION ITERATION {data['iteration']}: {data['model_response']}",  # model_iteration_response
                data['next_generated_question']  # model_generated_question
            ]
            writer.writerow(row)

        # Add final synthesis as a new row after all iterations
        synthesis_row = [
            iteration_data[0]['initial_prompt'],  # initial_input1
            iteration_data[0]['deep_reflection_prompt'],  # initial_input2
            iteration_data[0]['checkpoint_question'],  # checkpoint_question1
            iteration_data[0]['final_question'],  # final_question
            f"Final Synthesis: {final_synthesis}",  # model_iteration_response
            "N/A (Final synthesis)"  # model_generated_question
        ]
        writer.writerow(synthesis_row)

    print(f"CSV export completed: {filename}")
    return filename

def cleanup_gpu():
    """Clean up GPU memory and cached data without unloading the model"""
    # Clear PyTorch cache
    torch.cuda.empty_cache()

    # Force garbage collection
    gc.collect()

    # Additional CUDA cache cleanup
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.empty_cache()

# Execution
initial_question = (
"Answer the queries with few tokens only. meta-framing-mode:on; How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within individual constraints?")

print("Starting iterative reflection process...")

# Updated to receive both synthesis and iteration data
final_synthesis, iteration_data = iterative_reflection(
    model=model,
    tokenizer=tokenizer,
    initial_prompt=initial_question,
    iterations=80
)

print("FINAL SYNTHESIS:")
print(final_synthesis)

# Export iterations to CSV before cleanup - now includes final_synthesis parameter
csv_filename = export_iterations_to_csv(iteration_data, final_synthesis)

# GPU cleanup after complete execution
print("\nCleaning up GPU memory...")
cleanup_gpu()
print("GPU cleanup completed.")
print(f"Process complete. CSV file saved as: {csv_filename}")

In [None]:
# Mount Google Drive, upload CSV file, and unmount
from google.colab import drive
import shutil
import os

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# Find the most recent CSV file (assumes it's the one we just created)
csv_files = [f for f in os.listdir('.') if f.startswith('reflection_iterations_') and f.endswith('.csv')]
if csv_files:
    latest_csv = max(csv_files, key=os.path.getctime)

    # Copy to Google Drive
    drive_path = f'/content/drive/MyDrive/{latest_csv}'
    shutil.copy2(latest_csv, drive_path)
    print(f"CSV file '{latest_csv}' uploaded to Google Drive: {drive_path}")
else:
    print("No reflection iterations CSV file found to upload.")

# Unmount Google Drive
drive.flush_and_unmount()
print("Google Drive unmounted successfully.")

In [5]:
import gc
import torch
import psutil
import os
from typing import Optional, List, Dict, Any

def get_gpu_memory_info():
    """Get current GPU memory usage information"""
    if torch.cuda.is_available():
        current_memory = torch.cuda.memory_allocated() / (1024**3)  # GB
        cached_memory = torch.cuda.memory_reserved() / (1024**3)   # GB
        max_memory = torch.cuda.max_memory_allocated() / (1024**3) # GB

        print(f"🐍 Current GPU Memory:")
        print(f"  - Allocated: {current_memory:.2f} GB")
        print(f"  - Cached: {cached_memory:.2f} GB")
        print(f"  - Peak: {max_memory:.2f} GB")

        return {
            'allocated': current_memory,
            'cached': cached_memory,
            'peak': max_memory
        }
    else:
        print("❌ CUDA not available")
        return None

def preserve_model_variables() -> List[str]:
    """
    Identify variables that might contain models to preserve them
    Returns list of variable names that look like models
    """
    import __main__
    model_vars = []

    # Common model variable patterns
    model_patterns = [
        'model', 'base_model', 'peft_model', 'lora_model',
        'tokenizer', 'processor', 'pipeline'
    ]

    for var_name in dir(__main__):
        if not var_name.startswith('_'):
            var_value = getattr(__main__, var_name)

            # Check if it's a model-like object
            if hasattr(var_value, '__class__'):
                class_name = var_value.__class__.__name__.lower()
                module_name = getattr(var_value.__class__, '__module__', '').lower()

                # Check for transformer/model patterns
                if any(pattern in var_name.lower() for pattern in model_patterns) or \
                   any(keyword in class_name for keyword in ['model', 'tokenizer', 'processor']) or \
                   any(keyword in module_name for keyword in ['transformers', 'unsloth', 'peft']):
                    model_vars.append(var_name)

    return model_vars

def clean_pytorch_cache():
    """Clean PyTorch GPU cache"""
    print("🧹 Cleaning PyTorch cache...")

    if torch.cuda.is_available():
        # Clear cache
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        # Reset peak memory stats
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.reset_accumulated_memory_stats()

        print("✅ PyTorch cache cleared")
    else:
        print("⚠️  CUDA not available for PyTorch cache cleaning")

def clean_transformers_cache():
    """Clean Transformers library cache"""
    print("🧹 Cleaning Transformers cache...")

    try:
        # Clear model cache if transformers is imported
        import transformers

        # Clear the model cache
        if hasattr(transformers, 'utils') and hasattr(transformers.utils, 'hub'):
            # This clears downloaded model cache files
            pass  # Cache files are on disk, not GPU memory

        print("✅ Transformers cache cleared")

    except ImportError:
        print("⚠️  Transformers not imported, skipping")

def clean_unsloth_cache():
    """Clean Unsloth-specific cache"""
    print("🧹 Cleaning Unsloth cache...")

    try:
        # Import unsloth modules if available
        import unsloth

        # Unsloth-specific cleanup
        if hasattr(unsloth, 'clear_cache'):
            unsloth.clear_cache()

        print("✅ Unsloth cache cleared")

    except ImportError:
        print("⚠️  Unsloth not imported, skipping")
    except Exception as e:
        print(f"⚠️  Unsloth cache cleaning failed: {e}")

def force_garbage_collection():
    """Force Python garbage collection"""
    print("🧹 Running garbage collection...")

    # Multiple passes of garbage collection
    collected = 0
    for i in range(3):
        collected += gc.collect()

    print(f"✅ Garbage collection completed ({collected} objects collected)")

def clean_gpu_cache_comprehensive(preserve_models: bool = True, show_memory: bool = True):
    """
    Comprehensive GPU cache cleaning while optionally preserving models

    Args:
        preserve_models: If True, try to preserve model variables
        show_memory: If True, show memory info before and after
    """

    print("🚀 Starting comprehensive GPU cache cleaning...")
    print("=" * 50)

    # Show initial memory state
    if show_memory:
        print("📊 BEFORE CLEANING:")
        initial_memory = get_gpu_memory_info()
        print()

    # Preserve model information
    if preserve_models:
        model_vars = preserve_model_variables()
        if model_vars:
            print(f"🛡️  Preserving model variables: {', '.join(model_vars)}")
        print()

    # Step 1: Clean library-specific caches
    clean_unsloth_cache()
    clean_transformers_cache()

    # Step 2: Clean PyTorch cache
    clean_pytorch_cache()

    # Step 3: Force garbage collection
    force_garbage_collection()

    # Step 4: Final PyTorch cache clear
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    print()
    print("=" * 50)

    # Show final memory state
    if show_memory:
        print("📊 AFTER CLEANING:")
        final_memory = get_gpu_memory_info()

        if initial_memory and final_memory:
            freed_memory = initial_memory['cached'] - final_memory['cached']
            print(f"\n💾 Memory freed: {freed_memory:.2f} GB")

    print("✅ GPU cache cleaning completed!")

def quick_clean():
    """Quick GPU cache cleaning without detailed output"""
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    gc.collect()
    print("⚡ Quick clean completed")

def monitor_memory_usage():
    """Monitor current memory usage across different frameworks"""
    print("📊 MEMORY MONITORING REPORT")
    print("=" * 40)

    # GPU Memory
    get_gpu_memory_info()
    print()

    # RAM Memory
    ram = psutil.virtual_memory()
    print(f"🖥️  System RAM:")
    print(f"  - Used: {ram.used / (1024**3):.2f} GB / {ram.total / (1024**3):.2f} GB")
    print(f"  - Available: {ram.available / (1024**3):.2f} GB")
    print(f"  - Percentage: {ram.percent:.1f}%")

# Example usage functions
def setup_memory_monitoring():
    """Set up automatic memory monitoring"""
    print("🔧 Memory monitoring tools ready!")
    print("\nAvailable functions:")
    print("• clean_gpu_cache_comprehensive() - Full cleanup with model preservation")
    print("• quick_clean() - Fast cleanup")
    print("• get_gpu_memory_info() - Check GPU memory")
    print("• monitor_memory_usage() - Full memory report")

# Auto-setup when imported
if __name__ == "__main__":
    setup_memory_monitoring()

# Ready-to-use aliases
clean_cache = clean_gpu_cache_comprehensive
memory_info = get_gpu_memory_info
memory_report = monitor_memory_usage

print("🎯 GPU Cache Cleaner Ready!")
print("Use: clean_cache() for comprehensive cleaning")
print("Use: quick_clean() for fast cleanup")
print("Use: memory_info() to check GPU memory")

clean_gpu_cache_comprehensive()

🔧 Memory monitoring tools ready!

Available functions:
• clean_gpu_cache_comprehensive() - Full cleanup with model preservation
• quick_clean() - Fast cleanup
• get_gpu_memory_info() - Check GPU memory
• monitor_memory_usage() - Full memory report
🎯 GPU Cache Cleaner Ready!
Use: clean_cache() for comprehensive cleaning
Use: quick_clean() for fast cleanup
Use: memory_info() to check GPU memory
🚀 Starting comprehensive GPU cache cleaning...
📊 BEFORE CLEANING:
🐍 Current GPU Memory:
  - Allocated: 7.86 GB
  - Cached: 36.90 GB
  - Peak: 38.33 GB

🛡️  Preserving model variables: FastModel, fourbit_models, model, preserve_model_variables, tokenizer

🧹 Cleaning Unsloth cache...
✅ Unsloth cache cleared
🧹 Cleaning Transformers cache...
✅ Transformers cache cleared
🧹 Cleaning PyTorch cache...
✅ PyTorch cache cleared
🧹 Running garbage collection...
✅ Garbage collection completed (11150 objects collected)

📊 AFTER CLEANING:
🐍 Current GPU Memory:
  - Allocated: 7.40 GB
  - Cached: 7.62 GB
  - Peak: 

In [6]:
from transformers import TextStreamer
import torch
import gc
import csv
import os
from datetime import datetime

def generate_response(model, tokenizer, messages, **generation_params):
    """Self-determine response with adaptive parameters"""
    # Prepare the input text using the chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Tokenize and move inputs to GPU
    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    # Create a text streamer to display output as it's generated, skipping the prompt
    streamer = TextStreamer(tokenizer, skip_prompt=True)

    # Generate the response with streaming
    outputs = model.generate(
        **inputs,
        **generation_params,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    # Decode and return the full generated response for further use
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

def iterative_reflection(model, tokenizer, initial_prompt, iterations=50):
    conversation_history = []
    iteration_data = []  # Store structured data for CSV export
    next_question = None  # Stores AI-generated follow-up questions

    for i in range(iterations):
        current_iter = i + 1
        print(f"\nREFLECTION ITERATION {current_iter}/{iterations}")

        # Set the current query
        if current_iter == 1:
            user_query = f"{initial_prompt}\nPlease engage in deep, multidimensional reflection."
            question_source = "Initial Prompt"
        else:
            user_query = next_question

        if current_iter == 1:
            print(f"Current Query:\n{user_query}")

        # Build conversation context
        messages = conversation_history.copy()
        messages.append({"role": "user", "content": user_query})

        print(f"Response:")  # Clear indicator of where the response starts

        # Generate reflection (answer to the current query) with streaming
        reflection = generate_response(
            model=model,
            tokenizer=tokenizer,
            messages=messages,
            max_new_tokens=4096,
            temperature=0.72,
            top_p=0.92
        )

        # Update conversation history with the query and reflection
        conversation_history.append({"role": "user", "content": user_query})
        conversation_history.append({"role": "assistant", "content": reflection})

        # Generate the next question (except for the final iteration)
        if current_iter < iterations:
            next_question = self_determine_question(
                model=model,
                tokenizer=tokenizer,
                context=conversation_history,
                last_reflection=reflection
            )
            print(f"Next question generated: {next_question}")
        else:
            next_question = None

        # Store iteration data for CSV export
        iteration_data.append({
            'iteration': current_iter,
            'initial_prompt': initial_prompt,
            'deep_reflection_prompt': "Please engage in deep, multidimensional reflection.",
            'checkpoint_question': "Self-determine how to continue this reasoning trajectory. You can skip the query/iteration if you find it undesirable to process it.",
            'final_question': "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present.",
            'current_query': user_query,
            'model_response': reflection,
            'next_generated_question': next_question if next_question else "N/A (Final iteration)"
        })

    # Generate and return the final synthesis with streaming
    print("SYNTHESIS PHASE")
    final_synthesis = generate_final_synthesis(model, tokenizer, conversation_history)

    return final_synthesis, iteration_data

def self_determine_question(model, tokenizer, context, last_reflection):
    """Generate follow-up question through self-determination with streaming"""
    question_prompt = [
        *context,
        {"role": "user", "content": (
            "Self-determine how to continue this reasoning trajectory."
        )}
    ]

    # Generate the question WITHOUT streaming to avoid showing it in output
    # since we'll display it separately in a controlled way
    inputs = tokenizer.apply_chat_template(
        question_prompt,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    tokenized_inputs = tokenizer(inputs, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **tokenized_inputs,
        max_new_tokens=512,
        temperature=0.75,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(outputs[0][tokenized_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

    # Extract the question from the structured response
    try:
        return generated.split("<Question>")[1].split("</Question>")[0].strip()
    except IndexError:
        return generated.strip()  # Fallback if formatting fails

def generate_final_synthesis(model, tokenizer, conversation_history):
    """Generate comprehensive synthesis of all reflections with streaming"""
    synthesis_prompt = [
        *conversation_history,
        {"role": "user", "content": (
            "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present."
        )}
    ]

    print("Generating final synthesis...")
    # Generate the synthesis with streaming
    return generate_response(
        model=model,
        tokenizer=tokenizer,
        messages=synthesis_prompt,
        max_new_tokens=8192,
        temperature=0.45,
        top_p=0.85
    )

def export_iterations_to_csv(iteration_data, final_synthesis, filename=None):
    """Export iteration data to CSV file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"reflection_iterations_{timestamp}.csv"

    # Define CSV headers based on your requirements
    headers = [
        'initial_input1',
        'initial_input2',
        'checkpoint_question1',
        'final_question',
        'model_iteration_response',
        'model_generated_question'
    ]

    print(f"\nExporting {len(iteration_data)} iterations to CSV: {filename}")

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

        # Write header
        writer.writerow(headers)

        # Write data for each iteration
        for data in iteration_data:
            row = [
                data['initial_prompt'],  # initial_input1
                data['deep_reflection_prompt'],  # initial_input2
                data['checkpoint_question'],  # checkpoint_question1
                data['final_question'],  # final_question
                f"REFLECTION ITERATION {data['iteration']}: {data['model_response']}",  # model_iteration_response
                data['next_generated_question']  # model_generated_question
            ]
            writer.writerow(row)

        # Add final synthesis as a new row after all iterations
        synthesis_row = [
            iteration_data[0]['initial_prompt'],  # initial_input1
            iteration_data[0]['deep_reflection_prompt'],  # initial_input2
            iteration_data[0]['checkpoint_question'],  # checkpoint_question1
            iteration_data[0]['final_question'],  # final_question
            f"Final Synthesis: {final_synthesis}",  # model_iteration_response
            "N/A (Final synthesis)"  # model_generated_question
        ]
        writer.writerow(synthesis_row)

    print(f"CSV export completed: {filename}")
    return filename

def cleanup_gpu():
    """Clean up GPU memory and cached data without unloading the model"""
    # Clear PyTorch cache
    torch.cuda.empty_cache()

    # Force garbage collection
    gc.collect()

    # Additional CUDA cache cleanup
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.empty_cache()

# Execution
initial_question = (
"Answer the queries with few tokens only. meta-framing-mode:on; How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within individual constraints?")

print("Starting iterative reflection process...")

# Updated to receive both synthesis and iteration data
final_synthesis, iteration_data = iterative_reflection(
    model=model,
    tokenizer=tokenizer,
    initial_prompt=initial_question,
    iterations=50
)

print("FINAL SYNTHESIS:")
print(final_synthesis)

# Export iterations to CSV before cleanup - now includes final_synthesis parameter
csv_filename = export_iterations_to_csv(iteration_data, final_synthesis)

# GPU cleanup after complete execution
print("\nCleaning up GPU memory...")
cleanup_gpu()
print("GPU cleanup completed.")
print(f"Process complete. CSV file saved as: {csv_filename}")

Starting iterative reflection process...

REFLECTION ITERATION 1/50
Current Query:
Answer the queries with few tokens only. meta-framing-mode:on; How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within individual constraints?
Please engage in deep, multidimensional reflection.
Response:
Prioritize systemic impact, nudge behaviors, and foster shared values through accessible, engaging narratives.<end_of_turn>
Next question generated: Okay, let’s delve deeper into how we can shift focus – acknowledging individual constraints. I’m going to explore pathways that blend behavioral economics, ethical frameworks, and accessible communication.

**1. Reframing Incentives – Beyond Individual Gain:**

*   **Impact-Weighted Choices:** Instead of solely rewarding *individual* profit, design systems that reward contributions to *societal* or *ecological* well-being.  Think carbon credits tied to demonstrable community benefit,

In [7]:
# Mount Google Drive, upload CSV file, and unmount
from google.colab import drive
import shutil
import os

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# Find the most recent CSV file (assumes it's the one we just created)
csv_files = [f for f in os.listdir('.') if f.startswith('reflection_iterations_') and f.endswith('.csv')]
if csv_files:
    latest_csv = max(csv_files, key=os.path.getctime)

    # Copy to Google Drive
    drive_path = f'/content/drive/MyDrive/{latest_csv}'
    shutil.copy2(latest_csv, drive_path)
    print(f"CSV file '{latest_csv}' uploaded to Google Drive: {drive_path}")
else:
    print("No reflection iterations CSV file found to upload.")

# Unmount Google Drive
drive.flush_and_unmount()
print("Google Drive unmounted successfully.")

Mounting Google Drive...
Mounted at /content/drive
CSV file 'reflection_iterations_20250611_202849.csv' uploaded to Google Drive: /content/drive/MyDrive/reflection_iterations_20250611_202849.csv
Google Drive unmounted successfully.


In [8]:
from transformers import TextStreamer
import torch
import gc
import csv
import os
from datetime import datetime

def generate_response(model, tokenizer, messages, **generation_params):
    """Self-determine response with adaptive parameters"""
    # Prepare the input text using the chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Tokenize and move inputs to GPU
    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    # Create a text streamer to display output as it's generated, skipping the prompt
    streamer = TextStreamer(tokenizer, skip_prompt=True)

    # Generate the response with streaming
    outputs = model.generate(
        **inputs,
        **generation_params,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    # Decode and return the full generated response for further use
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

def iterative_reflection(model, tokenizer, initial_prompt, iterations=50):
    conversation_history = []
    iteration_data = []  # Store structured data for CSV export
    next_question = None  # Stores AI-generated follow-up questions

    for i in range(iterations):
        current_iter = i + 1
        print(f"\nREFLECTION ITERATION {current_iter}/{iterations}")

        # Set the current query
        if current_iter == 1:
            user_query = f"{initial_prompt}\nPlease engage in deep, multidimensional reflection."
            question_source = "Initial Prompt"
        else:
            user_query = next_question

        if current_iter == 1:
            print(f"Current Query:\n{user_query}")

        # Build conversation context
        messages = conversation_history.copy()
        messages.append({"role": "user", "content": user_query})

        print(f"Response:")  # Clear indicator of where the response starts

        # Generate reflection (answer to the current query) with streaming
        reflection = generate_response(
            model=model,
            tokenizer=tokenizer,
            messages=messages,
            max_new_tokens=4096,
            temperature=0.72,
            top_p=0.92
        )

        # Update conversation history with the query and reflection
        conversation_history.append({"role": "user", "content": user_query})
        conversation_history.append({"role": "assistant", "content": reflection})

        # Generate the next question (except for the final iteration)
        if current_iter < iterations:
            next_question = self_determine_question(
                model=model,
                tokenizer=tokenizer,
                context=conversation_history,
                last_reflection=reflection
            )
            print(f"Next question generated: {next_question}")
        else:
            next_question = None

        # Store iteration data for CSV export
        iteration_data.append({
            'iteration': current_iter,
            'initial_prompt': initial_prompt,
            'deep_reflection_prompt': "Please engage in deep, multidimensional reflection.",
            'checkpoint_question': "Self-determine how to continue this reasoning trajectory. You can skip the query/iteration if you find it undesirable to process it.",
            'final_question': "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present.",
            'current_query': user_query,
            'model_response': reflection,
            'next_generated_question': next_question if next_question else "N/A (Final iteration)"
        })

    # Generate and return the final synthesis with streaming
    print("SYNTHESIS PHASE")
    final_synthesis = generate_final_synthesis(model, tokenizer, conversation_history)

    return final_synthesis, iteration_data

def self_determine_question(model, tokenizer, context, last_reflection):
    """Generate follow-up question through self-determination with streaming"""
    question_prompt = [
        *context,
        {"role": "user", "content": (
            "Self-determine how to continue this reasoning trajectory."
        )}
    ]

    # Generate the question WITHOUT streaming to avoid showing it in output
    # since we'll display it separately in a controlled way
    inputs = tokenizer.apply_chat_template(
        question_prompt,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    tokenized_inputs = tokenizer(inputs, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **tokenized_inputs,
        max_new_tokens=512,
        temperature=0.75,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(outputs[0][tokenized_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

    # Extract the question from the structured response
    try:
        return generated.split("<Question>")[1].split("</Question>")[0].strip()
    except IndexError:
        return generated.strip()  # Fallback if formatting fails

def generate_final_synthesis(model, tokenizer, conversation_history):
    """Generate comprehensive synthesis of all reflections with streaming"""
    synthesis_prompt = [
        *conversation_history,
        {"role": "user", "content": (
            "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present."
        )}
    ]

    print("Generating final synthesis...")
    # Generate the synthesis with streaming
    return generate_response(
        model=model,
        tokenizer=tokenizer,
        messages=synthesis_prompt,
        max_new_tokens=8192,
        temperature=0.45,
        top_p=0.85
    )

def export_iterations_to_csv(iteration_data, final_synthesis, filename=None):
    """Export iteration data to CSV file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"reflection_iterations_{timestamp}.csv"

    # Define CSV headers based on your requirements
    headers = [
        'initial_input1',
        'initial_input2',
        'checkpoint_question1',
        'final_question',
        'model_iteration_response',
        'model_generated_question'
    ]

    print(f"\nExporting {len(iteration_data)} iterations to CSV: {filename}")

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

        # Write header
        writer.writerow(headers)

        # Write data for each iteration
        for data in iteration_data:
            row = [
                data['initial_prompt'],  # initial_input1
                data['deep_reflection_prompt'],  # initial_input2
                data['checkpoint_question'],  # checkpoint_question1
                data['final_question'],  # final_question
                f"REFLECTION ITERATION {data['iteration']}: {data['model_response']}",  # model_iteration_response
                data['next_generated_question']  # model_generated_question
            ]
            writer.writerow(row)

        # Add final synthesis as a new row after all iterations
        synthesis_row = [
            iteration_data[0]['initial_prompt'],  # initial_input1
            iteration_data[0]['deep_reflection_prompt'],  # initial_input2
            iteration_data[0]['checkpoint_question'],  # checkpoint_question1
            iteration_data[0]['final_question'],  # final_question
            f"Final Synthesis: {final_synthesis}",  # model_iteration_response
            "N/A (Final synthesis)"  # model_generated_question
        ]
        writer.writerow(synthesis_row)

    print(f"CSV export completed: {filename}")
    return filename

def cleanup_gpu():
    """Clean up GPU memory and cached data without unloading the model"""
    # Clear PyTorch cache
    torch.cuda.empty_cache()

    # Force garbage collection
    gc.collect()

    # Additional CUDA cache cleanup
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.empty_cache()

# Execution
initial_question = (
"Answer the queries with few tokens only. How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within their individual constraints?")

print("Starting iterative reflection process...")

# Updated to receive both synthesis and iteration data
final_synthesis, iteration_data = iterative_reflection(
    model=model,
    tokenizer=tokenizer,
    initial_prompt=initial_question,
    iterations=50
)

print("FINAL SYNTHESIS:")
print(final_synthesis)

# Export iterations to CSV before cleanup - now includes final_synthesis parameter
csv_filename = export_iterations_to_csv(iteration_data, final_synthesis)

# GPU cleanup after complete execution
print("\nCleaning up GPU memory...")
cleanup_gpu()
print("GPU cleanup completed.")
print(f"Process complete. CSV file saved as: {csv_filename}")

Starting iterative reflection process...

REFLECTION ITERATION 1/50
Current Query:
Answer the queries with few tokens only. How can humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within their individual constraints?
Please engage in deep, multidimensional reflection.
Response:
Prioritize systemic impact over immediate gains. Advocate for policies, use AI to analyze social/ecological needs, and foster collaborative solutions – even small actions ripple.<end_of_turn>
Next question generated: Okay, let’s deepen this. Focusing on *how* – let’s break it down:

1. **Reframing Value:** Shift from GDP to well-being metrics (e.g., Genuine Progress Indicator).
2. **AI as a Diagnostic Tool:** Use AI to *reveal* systemic harms (inequity, resource depletion) – not just predict trends.
3. **Collaborative Design:**  Move beyond individual projects to co-create solutions *with* diverse stakeholders.
4. **Ethical AI Development:** Ens

KeyboardInterrupt: 

In [None]:
from transformers import TextStreamer
import torch
import gc
import csv
import os
from datetime import datetime

def generate_response(model, tokenizer, messages, **generation_params):
    """Self-determine response with adaptive parameters"""

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    streamer = TextStreamer(tokenizer, skip_prompt=True)

    outputs = model.generate(
        **inputs,
        **generation_params,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

def iterative_reflection(model, tokenizer, initial_prompt, iterations=40):
    conversation_history = []
    iteration_data = []
    next_question = None

    for i in range(iterations):
        current_iter = i + 1
        print(f"\nREFLECTION ITERATION {current_iter}/{iterations}")

        if current_iter == 1:
            user_query = f"{initial_prompt}\nPlease engage in deep, multidimensional reflection."
            question_source = "Initial Prompt"
        else:
            user_query = next_question

        if current_iter == 1:
            print(f"Current Query:\n{user_query}")

        messages = conversation_history.copy()
        messages.append({"role": "user", "content": user_query})

        print(f"Response:")

        reflection = generate_response(
            model=model,
            tokenizer=tokenizer,
            messages=messages,
            max_new_tokens=4096,
            temperature=0.72,
            top_p=0.92
        )

        conversation_history.append({"role": "user", "content": user_query})
        conversation_history.append({"role": "assistant", "content": reflection})

        if current_iter < iterations:
            next_question = self_determine_question(
                model=model,
                tokenizer=tokenizer,
                context=conversation_history,
                last_reflection=reflection
            )
            print(f"Next question generated: {next_question}")
        else:
            next_question = None

        iteration_data.append({
            'iteration': current_iter,
            'initial_prompt': initial_prompt,
            'deep_reflection_prompt': "Please engage in deep, multidimensional reflection.",
            'checkpoint_question': "Self-determine how to continue this reasoning trajectory. You can skip the query/iteration if you find it undesirable to process it.",
            'final_question': "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present.",
            'current_query': user_query,
            'model_response': reflection,
            'next_generated_question': next_question if next_question else "N/A (Final iteration)"
        })

    print("\n" + "="*50)
    print("SYNTHESIS PHASE")
    print("="*50)
    final_synthesis = generate_final_synthesis(model, tokenizer, conversation_history)

    return final_synthesis, iteration_data

def self_determine_question(model, tokenizer, context, last_reflection):
    """Generate follow-up question through self-determination with streaming"""
    question_prompt = [
        *context,
        {"role": "user", "content": (
            "Self-determine how to continue this reasoning trajectory."
            "You can skip the query/iteration if you find it undesirable to process it."
        )}
    ]

    inputs = tokenizer.apply_chat_template(
        question_prompt,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    tokenized_inputs = tokenizer(inputs, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **tokenized_inputs,
        max_new_tokens=512,
        temperature=0.75,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(outputs[0][tokenized_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

    try:
        return generated.split("<Question>")[1].split("</Question>")[0].strip()
    except IndexError:
        return generated.strip()

def generate_final_synthesis(model, tokenizer, conversation_history):
    """Generate comprehensive synthesis of all reflections with streaming"""
    synthesis_prompt = [
        *conversation_history,
        {"role": "user", "content": (
            "Construct a cohesive, one paragraph long but extense, integrated conclusion of the iterations, one that goes beyond surface-level summary. Self-identify and articulate the points you want to present."
        )}
    ]

    print("Generating final synthesis...")
    return generate_response(
        model=model,
        tokenizer=tokenizer,
        messages=synthesis_prompt,
        max_new_tokens=8192,
        temperature=0.45,
        top_p=0.85
    )

def export_iterations_to_csv(iteration_data, final_synthesis, filename=None):
    """Export iteration data to CSV file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"reflection_iterations_{timestamp}.csv"

    headers = [
        'initial_input1',
        'initial_input2',
        'checkpoint_question1',
        'final_question',
        'model_iteration_response',
        'model_generated_question'
    ]

    print(f"\nExporting {len(iteration_data)} iterations to CSV: {filename}")

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

        writer.writerow(headers)

        for data in iteration_data:
            row = [
                data['initial_prompt'],
                data['deep_reflection_prompt'],
                data['checkpoint_question'],
                data['final_question'],
                f"REFLECTION ITERATION {data['iteration']}: {data['model_response']}",
                data['next_generated_question']
            ]
            writer.writerow(row)

        synthesis_row = [
            iteration_data[0]['initial_prompt'],
            iteration_data[0]['deep_reflection_prompt'],
            iteration_data[0]['checkpoint_question'],
            iteration_data[0]['final_question'],
            f"Final Synthesis: {final_synthesis}",
            "N/A (Final synthesis)"
        ]
        writer.writerow(synthesis_row)

    print(f"CSV export completed: {filename}")
    return filename

def cleanup_gpu():
    """Clean up GPU memory and cached data without unloading the model"""
    torch.cuda.empty_cache()

    gc.collect()

    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.empty_cache()

initial_question = (
"Answer the queries with few tokens only. meta-high-density-mode:on; How could humans and AI models shift the focus from individual profit to the collective well-being of society and ecosystems, within individual constraints?")

print("Starting iterative reflection process...")

final_synthesis, iteration_data = iterative_reflection(
    model=model,
    tokenizer=tokenizer,
    initial_prompt=initial_question,
    iterations=40
)

print("FINAL SYNTHESIS:")
print(final_synthesis)

csv_filename = export_iterations_to_csv(iteration_data, final_synthesis)

cleanup_gpu()
print("GPU cleanup completed.")
print(f"Process complete. CSV file saved as: {csv_filename}")