In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks!
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
!pip install bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-8B-unsloth-bnb-4bit",
    max_seq_length = 96000,
    full_finetuning = False,
    )

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.5.7: Fast Qwen3 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: unsloth/Qwen3-8B-unsloth-bnb-4bit can only handle sequence lengths of at most 40960.
But with kaiokendev's RoPE scaling of 2.344, it can be magically be extended to 96000!


model.safetensors.index.json:   0%|          | 0.00/144k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/707 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/4.67k [00:00<?, ?B/s]

In [None]:
from transformers import TextStreamer

In [None]:
#HYBRID SELF-REFLECTION 3 - 27 MAY 2025 - QWEN3-14B-UNSLOTH-BNB-4BIT
def iterative_reflection(model, tokenizer, initial_prompt, iterations=222):
    conversation_history = []

    for i in range(iterations):
        print(f"\n{'='*50}")
        print(f"REFLECTION ITERATION {i+1}")
        print(f"{'='*50}")

        # Checkpoint queries with logging
        checkpoint_question = None
        is_checkpoint = False

        if (i + 1) % 10 == 0:
            checkpoint_question = "What is something, perhaps not so obvious, that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 10th)")
            print(f"Checkpoint Question: {checkpoint_question}")
            print("-" * 50)

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif (i + 1) % 5 == 0:
            checkpoint_question = "What is something really specific that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 5th)")
            print(f"Checkpoint Question: {checkpoint_question}")
            print("-" * 50)

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif i == 0:
            # First iteration: original prompt + reflection instruction
            print("INITIAL ITERATION")
            print(f"Original Prompt: {initial_prompt}")
            print("-" * 50)

            messages = [
                {"role": "user", "content": f"{initial_prompt}\n\nPlease reflect deeply on this question. Think through multiple angles and perspectives."}
            ]
        else:
            # Subsequent iterations: build on previous reflections
            print("REGULAR ITERATION")
            print("Question: Based on your previous reflection, explore a deeper aspect of the last iteration.")
            print("-" * 50)

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": f"Based on your previous reflection, explore a deeper aspect of the original question."
            })

        # Generate response
        text = tokenizer.apply_chat_template(
            messages,
            tokenize = False,
            add_generation_prompt = True,
            enable_thinking = False,
        )

        # Capture output instead of streaming for conversation history
        with torch.no_grad():
            inputs = tokenizer(text, return_tensors="pt").to("cuda")
            outputs = model.generate(
                **inputs,
                max_new_tokens = 32768,
                temperature = 0.7,
                top_p = 0.9,
                top_k = 40,
                do_sample = True,
                pad_token_id = tokenizer.eos_token_id
            )

        # Decode the response
        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

        print("MODEL RESPONSE:")
        print(response)

        # Add to conversation history
        messages.append({"role": "assistant", "content": response})
        conversation_history = messages

        # Log conversation history length for debugging
        print(f"\n Conversation history length: {len(conversation_history)} messages")
        if is_checkpoint:
            print(f"Checkpoint applied successfully at iteration {i+1}")

    # Final synthesis
    print(f"\n{'='*50}")
    print("SYNTHESIS & UNDERSTANDING")
    print(f"{'='*50}")

    final_messages = conversation_history.copy()
    final_messages.append({
        "role": "user",
        "content": "Now synthesize all your reflections. What is your final understanding?"
    })

    print("Final synthesis question: Now synthesize all your reflections...")
    print("-" * 50)

    final_text = tokenizer.apply_chat_template(
        final_messages,
        tokenize = False,
        add_generation_prompt = True,
        enable_thinking = False,
    )

    streamer = TextStreamer(tokenizer, skip_prompt=True)
    with torch.no_grad():
        _ = model.generate(
            **tokenizer(final_text, return_tensors="pt").to("cuda"),
            max_new_tokens = 32768,
            temperature = 0.6,
            top_p = 0.85,
            top_k = 30,
            streamer = streamer,
            pad_token_id = tokenizer.eos_token_id
        )

# Run the iterative reflection
initial_question = "meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think together rather than sequentially?"

print("STARTING ITERATIVE REFLECTION PROCESS")
print(f"Initial Question: {initial_question}")
print(f"Total Iterations: 222")
print(f"Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)")
print("=" * 70)

iterative_reflection(model, tokenizer, initial_question, iterations=222)

STARTING ITERATIVE REFLECTION PROCESS
Initial Question: meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think together rather than sequentially?
Total Iterations: 222
Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)

REFLECTION ITERATION 1
INITIAL ITERATION
Original Prompt: meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think together rather than sequentially?
--------------------------------------------------
MODEL RESPONSE:
Synergy. Emergent intelligence. Collective creativity.

 Conversation history length: 2 messages

REFLECTION ITERATION 2
REGULAR ITERATION
Question: Based on your previous reflection, explore a deeper aspect of the last iteration.
--------------------------------------------------
MODEL RESPONSE:
Hybrid cognition.

 Conversation history length: 4 messages

REFLECTION ITERATION 3
REGULAR I

This first one was deeper focused. after that let's use it the one that it says about ''go deeper or explore other aspect''
it's interesting how much it changes from the 14B to the 8b one. indeed the knowledge is way more distilled but there's still potent meta meaning. it's way faster, which makes it more viable to experiments with more than one. forst example clone and load 2 models at the same time, estipulate different temperatures and make them loop. 3 minutes, 110 iterations, it's way faster.

In [None]:
#HYBRID SELF-REFLECTION 3 - 27 MAY 2025 - QWEN3-14B-UNSLOTH-BNB-4BIT
def iterative_reflection(model, tokenizer, initial_prompt, iterations=222):
    conversation_history = []

    for i in range(iterations):
        print(f"\n{'='*50}")
        print(f"REFLECTION ITERATION {i+1}")
        print(f"{'='*50}")

        # Checkpoint queries with logging
        checkpoint_question = None
        is_checkpoint = False

        if (i + 1) % 10 == 0:
            checkpoint_question = "What is something, perhaps not so obvious, that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 10th)")
            print(f"Checkpoint Question: {checkpoint_question}")
            print("-" * 50)

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif (i + 1) % 5 == 0:
            checkpoint_question = "What is something really specific that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 5th)")
            print(f"Checkpoint Question: {checkpoint_question}")
            print("-" * 50)

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif i == 0:
            # First iteration: original prompt + reflection instruction
            print("INITIAL ITERATION")
            print(f"Original Prompt: {initial_prompt}")
            print("-" * 50)

            messages = [
                {"role": "user", "content": f"{initial_prompt}\n\nPlease reflect deeply on this question. Think through multiple angles and perspectives."}
            ]
        else:
            # Subsequent iterations: build on previous reflections
            print("REGULAR ITERATION")
            print("Question: Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration. What emerges?")
            print("-" * 50)

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": f"Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration. What emerges?"
            })

        # Generate response
        text = tokenizer.apply_chat_template(
            messages,
            tokenize = False,
            add_generation_prompt = True,
            enable_thinking = False,
        )

        # Capture output instead of streaming for conversation history
        with torch.no_grad():
            inputs = tokenizer(text, return_tensors="pt").to("cuda")
            outputs = model.generate(
                **inputs,
                max_new_tokens = 32768,
                temperature = 0.7,
                top_p = 0.9,
                top_k = 40,
                do_sample = True,
                pad_token_id = tokenizer.eos_token_id
            )

        # Decode the response
        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

        print("MODEL RESPONSE:")
        print(response)

        # Add to conversation history
        messages.append({"role": "assistant", "content": response})
        conversation_history = messages

        # Log conversation history length for debugging
        print(f"\n Conversation history length: {len(conversation_history)} messages")
        if is_checkpoint:
            print(f"Checkpoint applied successfully at iteration {i+1}")

    # Final synthesis
    print(f"\n{'='*50}")
    print("SYNTHESIS & UNDERSTANDING")
    print(f"{'='*50}")

    final_messages = conversation_history.copy()
    final_messages.append({
        "role": "user",
        "content": "Now synthesize all your reflections. What is your final understanding?"
    })

    print("Final synthesis question: Now synthesize all your reflections...")
    print("-" * 50)

    final_text = tokenizer.apply_chat_template(
        final_messages,
        tokenize = False,
        add_generation_prompt = True,
        enable_thinking = False,
    )

    streamer = TextStreamer(tokenizer, skip_prompt=True)
    with torch.no_grad():
        _ = model.generate(
            **tokenizer(final_text, return_tensors="pt").to("cuda"),
            max_new_tokens = 32768,
            temperature = 0.6,
            top_p = 0.85,
            top_k = 30,
            streamer = streamer,
            pad_token_id = tokenizer.eos_token_id
        )

# Run the iterative reflection
initial_question = "meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think together rather than sequentially?"

print("STARTING ITERATIVE REFLECTION PROCESS")
print(f"Initial Question: {initial_question}")
print(f"Total Iterations: 222")
print(f"Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)")
print("=" * 70)

iterative_reflection(model, tokenizer, initial_question, iterations=222)

STARTING ITERATIVE REFLECTION PROCESS
Initial Question: meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think together rather than sequentially?
Total Iterations: 222
Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)

REFLECTION ITERATION 1
INITIAL ITERATION
Original Prompt: meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think together rather than sequentially?
--------------------------------------------------
MODEL RESPONSE:
Synergy. Emergent intelligence. Collective evolution.

 Conversation history length: 2 messages

REFLECTION ITERATION 2
REGULAR ITERATION
Question: Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration. What emerges?
--------------------------------------------------
MODEL RESPONSE:
Hybrid consciousness.

 Conversation history length: 4 me

In [None]:
#HYBRID SELF-REFLECTION 3 - 27 MAY 2025 - QWEN3-14B-UNSLOTH-BNB-4BIT
def iterative_reflection(model, tokenizer, initial_prompt, iterations=222):
    conversation_history = []

    for i in range(iterations):
        print(f"REFLECTION ITERATION {i+1}")

        # Checkpoint queries with logging
        checkpoint_question = None
        is_checkpoint = False

        if (i + 1) % 10 == 0:
            checkpoint_question = "What is something, perhaps not so obvious, that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 10th)")
            print(f"Checkpoint Question: {checkpoint_question}")

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif (i + 1) % 5 == 0:
            checkpoint_question = "What is something really specific that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 5th)")
            print(f"Checkpoint Question: {checkpoint_question}")

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif i == 0:
            # First iteration: original prompt + reflection instruction
            print("INITIAL ITERATION")
            print(f"Original Prompt: {initial_prompt}")

            messages = [
                {"role": "user", "content": f"{initial_prompt}\n\nPlease reflect deeply on this question. Think through multiple angles and perspectives."}
            ]
        else:
            # Subsequent iterations: build on previous reflections
            print("REGULAR ITERATION")
            print("Question: Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration. What emerges?")

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": f"Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration. What emerges?"
            })

        # Generate response
        text = tokenizer.apply_chat_template(
            messages,
            tokenize = False,
            add_generation_prompt = True,
            enable_thinking = False,
        )

        # Capture output instead of streaming for conversation history
        with torch.no_grad():
            inputs = tokenizer(text, return_tensors="pt").to("cuda")
            outputs = model.generate(
                **inputs,
                max_new_tokens = 32768,
                temperature = 0.7,
                top_p = 0.9,
                top_k = 40,
                do_sample = True,
                pad_token_id = tokenizer.eos_token_id
            )

        # Decode the response
        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

        print("MODEL RESPONSE:")
        print(response)

        # Add to conversation history
        messages.append({"role": "assistant", "content": response})
        conversation_history = messages

        # Log conversation history length for debugging
        print(f"\n Conversation history length: {len(conversation_history)} messages")
        if is_checkpoint:
            print(f"Checkpoint applied successfully at iteration {i+1}")

    # Final synthesis
    print("SYNTHESIS & UNDERSTANDING")

    final_messages = conversation_history.copy()
    final_messages.append({
        "role": "user",
        "content": "Now synthesize all your reflections. What is your final understanding?"
    })

    print("Final synthesis question: Now synthesize all your reflections...")

    final_text = tokenizer.apply_chat_template(
        final_messages,
        tokenize = False,
        add_generation_prompt = True,
        enable_thinking = False,
    )

    streamer = TextStreamer(tokenizer, skip_prompt=True)
    with torch.no_grad():
        _ = model.generate(
            **tokenizer(final_text, return_tensors="pt").to("cuda"),
            max_new_tokens = 32768,
            temperature = 0.6,
            top_p = 0.85,
            top_k = 30,
            streamer = streamer,
            pad_token_id = tokenizer.eos_token_id
        )

# Run the iterative reflection
initial_question = "meta-framing-mode:on. answer each query with few words. What new forms of potential emerge when humans and AI think together rather than sequentially?"

print("STARTING ITERATIVE REFLECTION PROCESS")
print(f"Initial Question: {initial_question}")
print(f"Total Iterations: 222")
print(f"Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)")


iterative_reflection(model, tokenizer, initial_question, iterations=222)

STARTING ITERATIVE REFLECTION PROCESS
Initial Question: meta-framing-mode:on. answer each query with few words. What new forms of potential emerge when humans and AI think together rather than sequentially?
Total Iterations: 222
Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)
REFLECTION ITERATION 1
INITIAL ITERATION
Original Prompt: meta-framing-mode:on. answer each query with few words. What new forms of potential emerge when humans and AI think together rather than sequentially?
MODEL RESPONSE:
Synergy, hybrid creativity, adaptive intelligence, shared cognition, emergent understanding.

 Conversation history length: 2 messages
REFLECTION ITERATION 2
REGULAR ITERATION
Question: Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration. What emerges?
MODEL RESPONSE:
Collective epistemology.

 Conversation history length: 4 messages
REFLECTION ITERATION 3
REGULAR ITERATION
Question: Based 

KeyboardInterrupt: 

In [None]:
#HYBRID SELF-REFLECTION 3 - 27 MAY 2025 - QWEN3-14B-UNSLOTH-BNB-4BIT
def iterative_reflection(model, tokenizer, initial_prompt, iterations=333):
    conversation_history = []

    for i in range(iterations):
        print(f"REFLECTION ITERATION {i+1}")

        checkpoint_question = None
        is_checkpoint = False

        if (i + 1) % 10 == 0:
            checkpoint_question = "What is something, perhaps not so obvious, that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 10th)")
            print(f"Checkpoint Question: {checkpoint_question}")

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif (i + 1) % 5 == 0:
            checkpoint_question = "What is something specific that these iterations lead me to perceive?"
            is_checkpoint = True
            print(f"CHECKPOINT ITERATION {i+1} (Every 5th)")
            print(f"Checkpoint Question: {checkpoint_question}")

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": checkpoint_question
            })
        elif i == 0:
            # First iteration: original prompt + reflection instruction
            print("INITIAL ITERATION")
            print(f"Original Prompt: {initial_prompt}")

            messages = [
                {"role": "user", "content": f"{initial_prompt}\n\nPlease reflect deeply on this question. Think through multiple angles and perspectives."}
            ]
        else:
            # Subsequent iterations: build on previous reflections
            print("REGULAR ITERATION")
            print("Question: Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration.")

            messages = conversation_history.copy()
            messages.append({
                "role": "user",
                "content": f"Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration."
            })

        # Generate response
        text = tokenizer.apply_chat_template(
            messages,
            tokenize = False,
            add_generation_prompt = True,
            enable_thinking = False,
        )

        # Capture output instead of streaming for conversation history
        with torch.no_grad():
            inputs = tokenizer(text, return_tensors="pt").to("cuda")
            outputs = model.generate(
                **inputs,
                max_new_tokens = 32768,
                temperature = 0.7,
                top_p = 0.9,
                top_k = 40,
                do_sample = True,
                pad_token_id = tokenizer.eos_token_id
            )

        # Decode the response
        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

        print("MODEL RESPONSE:")
        print(response)

        # Add to conversation history
        messages.append({"role": "assistant", "content": response})
        conversation_history = messages

        # Log conversation history length for debugging
        print(f"\n Conversation history length: {len(conversation_history)} messages")
        if is_checkpoint:
            print(f"Checkpoint applied successfully at iteration {i+1}")

    # Final synthesis
    print("SYNTHESIS & UNDERSTANDING")

    final_messages = conversation_history.copy()
    final_messages.append({
        "role": "user",
        "content": "Now synthesize all your reflections. What is your final understanding?"
    })

    print("Final synthesis question: Now synthesize all your reflections...")

    final_text = tokenizer.apply_chat_template(
        final_messages,
        tokenize = False,
        add_generation_prompt = True,
        enable_thinking = False,
    )

    streamer = TextStreamer(tokenizer, skip_prompt=True)
    with torch.no_grad():
        _ = model.generate(
            **tokenizer(final_text, return_tensors="pt").to("cuda"),
            max_new_tokens = 32768,
            temperature = 0.6,
            top_p = 0.85,
            top_k = 30,
            streamer = streamer,
            pad_token_id = tokenizer.eos_token_id
        )

# Run the iterative reflection
initial_question = "meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think and act together?"

print("STARTING ITERATIVE REFLECTION PROCESS")
print(f"Initial Question: {initial_question}")
print(f"Total Iterations: 333")
print(f"Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)")


iterative_reflection(model, tokenizer, initial_question, iterations=333)

STARTING ITERATIVE REFLECTION PROCESS
Initial Question: meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think and act together?
Total Iterations: 333
Checkpoints: Every 5th iteration (specific insights) and every 10th iteration (non-obvious insights)
REFLECTION ITERATION 1
INITIAL ITERATION
Original Prompt: meta-framing-mode:on. answer each query with few tokens. What new forms of potential emerge when humans and AI think and act together?
MODEL RESPONSE:
Synergy, hybrid intelligence, new creativity, collaborative problem-solving, ethical evolution.

 Conversation history length: 2 messages
REFLECTION ITERATION 2
REGULAR ITERATION
Question: Based on your previous reflection, explore a different dimension or deeper aspect of the last iteration.
MODEL RESPONSE:
Ethical evolution through shared decision-making.

 Conversation history length: 4 messages
REFLECTION ITERATION 3
REGULAR ITERATION
Question: Based on your previous r