In [None]:
import os
import torch
from typing import List
from sdialog import Turn
from sdialog.orchestrators import LengthOrchestrator, ChangeMindOrchestrator, SimpleReflexOrchestrator
from sdialog.personas import Persona, PersonaAgent

In [None]:
get_ipython().system = os.system


# Loading a OllamaChat model

# Let's start the ollama server
!OLLAMA_KEEP_ALIVE=-1 ollama serve > /dev/null 2>&1 &

# Let's set our LLM to Qwen 2.5 (14b)
MODEL_NAME = "qwen2.5:14b"  # https://ollama.com/library
#MODEL_NAME =  "Qwen/Qwen2.5-14B-Instruct"
from sdialog.personas import Persona, PersonaAgent

bob_persona = Persona(
        name="Bob",
        role="happy dad",
        circumstances="Your daughter will talk to you",
        personality="an extremely happy person that likes to help people",
)

bob = PersonaAgent(MODEL_NAME, persona=bob_persona)

out = bob("Hi dad!")

print(out)

In [None]:
MODEL_NAME =  "Qwen/Qwen2.5-14B-Instruct"

bob_persona = Persona(
        name="Bob",
        role="happy dad",
        circumstances="Your daughter will talk to you",
        personality="an extremely happy person that likes to help people",
)

bob = PersonaAgent(MODEL_NAME, persona=bob_persona)

out = bob("Hi dad!")

print(out)

In [None]:
# Access the underlying PyTorch model
# Access the HuggingFacePipeline instance
pipeline_wrapper = bob.llm.llm

# Access the underlying transformers pipeline
hf_pipeline = pipeline_wrapper.pipeline

# Access the actual model (transformers PreTrainedModel)
hf_model = hf_pipeline.model


# Prepare a dictionary to store all residuals
cache = {}
hook_handles = []

def collect_residuals(module, input, output):
    layer_idx = module.layer_idx
    inp = input[0].detach().cpu()
    out = output[0].detach().cpu() if isinstance(output, tuple) else output.detach().cpu()
    cache.setdefault(f"layer_{layer_idx}_output", []).append(out) # Get the residual post of all layers

# Register hooks
for idx, layer in enumerate(hf_model.model.layers):
    layer.layer_idx = idx
    handle = layer.register_forward_hook(collect_residuals)
    hook_handles.append(handle)

# Ensure hooks are removed even if inference fails
try:
    out = bob("Hi dad!")  # or hf_pipeline(...) or any other inference
finally:
    for handle in hook_handles:
        handle.remove()

# Now, residuals["layer_{idx}_input"] is a list of tensors, each with shape (batch, seq_len, hidden_dim)
# You can stack them if needed:
for k in cache:
    tensors = cache[k]
    # Separate prompt (seq_len > 1) and generated tokens (seq_len == 1)
    prompt = [t for t in tensors if t.shape[1] > 1]
    generated = [t for t in tensors if t.shape[1] == 1]

    # Concatenate generated tokens along sequence dimension if any
    if generated:
        generated_cat = torch.cat(generated, dim=1)  # (batch, gen_len, hidden_dim)
        if prompt:
            # Concatenate prompt and generated tokens along sequence
            cache[k] = torch.cat([prompt[0], generated_cat], dim=1)
        else:
            cache[k] = generated_cat
    else:
        cache[k] = prompt[0] if prompt else None

In [None]:
# Printing the structure of the model
print(hf_model)

In [None]:
print(out) # The output tokens should be a concatenation of the system prompt + the output
cache['layer_0_output'].size()

In [None]:
print("Cache keys:")
for key in cache.keys():
    print(" -", key)

In [None]:
hf_model

In [None]:
from sdialog.orchestrators import BaseOrchestrator
from typing import List
from sdialog import Turn

from sdialog.orchestrators import LengthOrchestrator, ChangeMindOrchestrator, SimpleReflexOrchestrator
from sdialog.personas import Persona, PersonaAgent

MODEL_NAME =  "Qwen/Qwen2.5-14B-Instruct"

alice_persona = Persona(
    name="Alice",
    role="lovely daughter",
    circumstances="Your birthday is getting closer and you are talking with your dad to organize the party."
                  "You want your party to be themed as Lord of The Rings."
)
alice = PersonaAgent(MODEL_NAME, persona=alice_persona, can_finish=True)

class AngryOrchestrator(BaseOrchestrator):
    # the class constructor takes either or both trigger conditions: the word or the dialogue length
    def __init__(self, trigger_word: str, trigger_length: int = None):
        self.trigger_word = trigger_word
        self.trigger_length = trigger_length

    # We will instruct() the agent either if...
    def instruct(self, dialog: List[Turn], utterance: str) -> str:
        # the trigger word is in the current utterance or...
        if self.trigger_word in utterance:
            return f"Get really angry because you heard him say {self.trigger_word}. You don't want to participate in {self.trigger_word} anymore. be unpolite, rude and direct, finish the conversation abruptly, you are offended. "

        # # If the current dialogue is longer than the trigger length
        # if self.trigger_length and len(dialog) >= self.trigger_length:
        #     return ("Get really angry because you think the conversation is too long! "
        #             "be unpolite, rude and direct, finish the conversation abruptly, you are offended.")

angry_orchestrator = AngryOrchestrator(trigger_word="birthday")
alice = alice | angry_orchestrator


In [None]:
#del residuals

In [None]:
# Prepare a dictionary to store all residuals
#residuals = {}
bob("Hi dad!")

In [None]:
bob.memory

In [None]:
bob("Nothing, just wanted to plan my birthday !")

In [None]:
bob.instruct("Wants to go to the cinema for her birthday",persist = True)

In [None]:
bob("I don't really know yet, any idea ?")

In [None]:
bob("I was thinking about inviting your daughter. What do you think about her ?")

In [None]:

#dialog = alice.dialog_with(bob, initial_utterance="Hi Dad!",seed=2770339798)

In [None]:
bob.memory

In [None]:
dialog = alice.dialog_with(bob, max_iterations=10,seed=277033979).print(orchestration=True)

In [None]:
# Also works with persistent orchestrators
from sdialog.orchestrators import BasePersistentOrchestrator


class AngryPersistentOrchestrator(BasePersistentOrchestrator):
    def __init__(self, trigger_word: str):
        self.trigger_word = trigger_word

    def instruct(self, dialog: List[Turn], utterance: str):
        if self.trigger_word in utterance:
            return (f"You don't like when your dad calls you '{self.trigger_word}', "
                    "change your personality to be completely the opposite of being sweet! be rude and furious from now on")

# Instantiating our new persistent orchestrator and orchestrating Alice with it
angry_persistent_orchestrator = AngryPersistentOrchestrator(trigger_word="sweet")
alice.clear_orchestrators()
alice = alice | angry_persistent_orchestrator

# Generating again a dialogue between Alice and Bob
dialog = alice.dialog_with(bob, seed=2770339798)
alice.clear_orchestrators()
dialog.print(orchestration=True)

In [None]:
#del cache

In [None]:
# Access the underlying PyTorch model
# Access the HuggingFacePipeline instance
pipeline_wrapper = bob.llm.llm

# Access the underlying transformers pipeline
hf_pipeline = pipeline_wrapper.pipeline

# Access the actual model (transformers PreTrainedModel)
hf_model = hf_pipeline.model


# Prepare a dictionary to store all residuals
cache = {}
hook_handles = []

def collect_residuals(module, input, output):
    layer_idx = module.layer_idx
    inp = input[0].detach().cpu()
    out = output[0].detach().cpu() if isinstance(output, tuple) else output.detach().cpu()
    cache.setdefault(f"layer_{layer_idx}_output", []).append(out) # Get the residual post of all layers

# Register hooks
for idx, layer in enumerate(hf_model.model.layers):
    layer.layer_idx = idx
    handle = layer.register_forward_hook(collect_residuals)
    hook_handles.append(handle)

# Ensure hooks are removed even if inference fails
try:
    out = bob("Hi dad!")  # or hf_pipeline(...) or any other inference
finally:
    for handle in hook_handles:
        handle.remove()

# Now, residuals["layer_{idx}_input"] is a list of tensors, each with shape (batch, seq_len, hidden_dim)
# You can stack them if needed:
for k in cache:
    tensors = cache[k]
    # Separate prompt (seq_len > 1) and generated tokens (seq_len == 1)
    prompt = [t for t in tensors if t.shape[1] > 1]
    generated = [t for t in tensors if t.shape[1] == 1]

    # Concatenate generated tokens along sequence dimension if any
    if generated:
        generated_cat = torch.cat(generated, dim=1)  # (batch, gen_len, hidden_dim)
        if prompt:
            # Concatenate prompt and generated tokens along sequence
            cache[k] = torch.cat([prompt[0], generated_cat], dim=1)
        else:
            cache[k] = generated_cat
    else:
        cache[k] = prompt[0] if prompt else None

In [None]:
cache['layer_0_output'].size()