<a target="_blank" href="https://colab.research.google.com/github/sonder-art/automl_o24/blob/main/codigo/nlp_chatbots/llm_playground.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
# Importamos las librerías necesarias
from transformers import  AutoModel
from huggingface_hub import login
import torch
from dataclasses import dataclass
from copy import deepcopy
import os

import torch
from transformers import pipeline
from dataclasses import dataclass
from copy import deepcopy
from typing import List, Dict
from rich.console import Console
from rich.table import Table
import json

from IPython.display import display, HTML, Markdown
from pygments import highlight
from pygments.lexers import get_lexer_by_name, Python3Lexer
from pygments.formatters import HtmlFormatter

In [2]:
def setup_huggingface(token):
    """Setup HuggingFace authentication and verify login"""
    try:
        # Set token and login
        os.environ["HUGGINGFACE_TOKEN"] = token
        login(token=token)

        # Verify login by attempting to download a private model
        # This will fail if not properly authenticated
        test_model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-bert")
        print("✓ Login successful - You can now access private models and datasets")
        return True
    except Exception as e:
        print(f"✗ Login failed: {str(e)}")
        return False

token = "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"  # Replace with your token from https://huggingface.co/settings/tokens
setup_huggingface(token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/uumami/.cache/huggingface/token
Login successful
✓ Login successful - You can now access private models and datasets


True

In [3]:
console = Console()

In [4]:
@dataclass
class GenerationConfig:
    """Configuration for text generation"""
    max_new_tokens: int = 1000
    temperature: float = 0.01
    top_p: float = .01
    top_k: int = 50
    num_return_sequences: int = 1
    
    def display(self):
        """Display current configuration"""
        table = Table(title="Generation Configuration")
        table.add_column("Parameter", style="cyan")
        table.add_column("Value", style="green")
        for field in self.__dataclass_fields__:
            table.add_row(field, str(getattr(self, field)))
        console.print(table)


In [5]:
class ConversationMemory:
    def __init__(self, max_messages: int = 10):
        self.messages: List[Dict] = []
        self.checkpoints: Dict = {}
        self.max_messages = max_messages
        self.summary = ""
        
        
    @property
    def conversation(self):
        """Access full conversation history"""
        return self.messages

    def add(self, role: str, content: str):
        """Add a message to the conversation"""
        self.messages.append({
            "role": role,
            "content": content,
            #"timestamp": datetime.now().isoformat()
        })

    def save_checkpoint(self, name: str):
        """Save current state as a checkpoint"""
        self.checkpoints[name] = {
            'messages': deepcopy(self.messages),
            'summary': self.summary,
            #'timestamp': datetime.now().isoformat()
        }
        console.print(f"[green]Checkpoint '{name}' saved")

    def load_checkpoint(self, name: str):
        """Load a saved checkpoint"""
        if name in self.checkpoints:
            checkpoint = self.checkpoints[name]
            self.messages = deepcopy(checkpoint['messages'])
            self.summary = checkpoint['summary']
            console.print(f"[green]Checkpoint '{name}' loaded")
        else:
            console.print(f"[red]Checkpoint '{name}' not found")

    def list_checkpoints(self):
        """Display all available checkpoints"""
        print("\nAvailable Checkpoints:")
        print("="*40)
        if not self.checkpoints:
            print("No checkpoints saved yet.")
            return
            
        # Format as a table
        print(f"{'Name':<15} {'Messages':<10}")
        print("-"*40)
        for name, checkpoint in self.checkpoints.items():
            msg_count = len(checkpoint['messages'])
            print(f"{name:<15} {msg_count:<10}")
        print("-"*40)


    def rollback(self, n: int = 1):
        """Remove the last n messages"""
        if n < len(self.messages):
            self.messages = self.messages[:-n]
            console.print(f"[yellow]Rolled back {n} messages")
        else:
            console.print("[red]Cannot rollback more messages than exist")

    def display(self):
        """Display the conversation history"""
        if self.summary:
            print("\nConversation Summary:")
            print("="*80)
            print(self.summary)
            print("="*80)
        
        print("\nConversation History:")
        print("="*80)
        
        for msg in self.messages:
            role = msg.get('role', 'unknown').upper()
            content = msg.get('content', '')
            
            print(f"\n[{role}]")
            print("-"*40)
            if role == "SYSTEM":
                print(content.strip())
            else:
                # Handle code blocks in content
                if '```' in content:
                    parts = content.split('```')
                    for i, part in enumerate(parts):
                        if i % 2 == 1:  # Code block
                            print("\nCode:")
                            print("-"*40)
                            print(part.strip())
                            print("-"*40)
                        else:  # Regular text
                            if part.strip():
                                print(part.strip())
                else:
                    print(content.strip())
            print("-"*80)


In [6]:
class ChatPlayground:
    def __init__(self, model_name: str = "meta-llama/Llama-3.2-1B-Instruct"):
        """Initialize the chat playground with a specific model"""
        self.pipe = pipeline(
            "text-generation",
            model=model_name,
            torch_dtype=torch.bfloat16,
            device_map="auto",
        )
        self.memory = ConversationMemory()
        self.summarizer = ConversationMemory()
        self.summarizer_len = 0
        
    def summarize_conversation(self, custom_prompt: str = None) -> str:
        """
        Summarize the current conversation
        Args:
            custom_prompt: Optional custom system prompt for summarization
        """
        default_prompt = """You are a precise summarizer. Create a concise summary of the conversation that:
        1. Captures main topics and key points
        2. Preserves important details and decisions
        3. Maintains chronological flow
        Provide the summary in a clear, professional tone."""
        messages_to_summarize = self.memory.messages[self.summarizer_len:]
        self.summarizer_len = len(self.memory.messages)
        # Create messages for summarization
        summary_messages = [
            {"role": "system", "content": custom_prompt or default_prompt},
            {"role": "user", "content": "Summarize this previous conversation from an LLM with a human:\n" + json.dumps(messages_to_summarize)}
        ]
        
        try:
            summary = self.pipe(
                summary_messages,
                max_new_tokens=1000,
                temperature=0.01,
                do_sample=True,
            )
            if isinstance(summary, list) and "generated_text" in summary[0]:
                summary = summary[0]["generated_text"][-1]['content']
                self.summarizer.add("memory", summary)
                return summary
            
            return str(summary)
            
        except Exception as e:
            return f"Error generating summary: {str(e)}"


    def chat(self, message: str, config: GenerationConfig = None, display: bool = True, use_summary: bool = False):
        """
        Generate a response to the user message
        Args:
            message: User input message
            config: Generation configuration
            display: Whether to display the response
        """
        if config is None:
            config = GenerationConfig()
        
        if use_summary:
            self.summarize_conversation()
            
        self.memory.add("user", message)
        
        if use_summary:
            llm_input = self.summarizer.messages + [self.memory.messages[-1]]  
        else:
            llm_input = self.memory.messages
        try:
            # Generate response
            outputs = self.pipe(
                llm_input,
                max_new_tokens=config.max_new_tokens,
                temperature=config.temperature,
                top_p=config.top_p,
                top_k=config.top_k,
                num_return_sequences=config.num_return_sequences,
                do_sample=True,
                eos_token_id=self.pipe.tokenizer.eos_token_id,
            )
            # Get the generated response
            if isinstance(outputs, list) and "generated_text" in outputs[0]:
                response = outputs[0]["generated_text"][-1]['content']
            else:
                response = str(outputs)  # Fallback if response format is different
                
            self.memory.add("assistant", response)
            if display:
                self._display_response(response, config)
            
            return response
            
        except Exception as e:
            print(f"\nError during generation: {str(e)}")
            print("\nDebug info:")
            print(f"Messages format: {type(self.memory.messages)}")
            print(f"Messages content: {self.memory.messages}")
            return f"Error generating response: {str(e)}"
        
    def _display_response(self, response: str, config: GenerationConfig):
        """Display model response with rich formatting"""

        try:
            new_response = response.split(json.dumps(self.memory.messages))[-1].strip()
        except:
            new_response = response

        parts = new_response.split('```')

        if parts[0].strip():
            display(Markdown(parts[0].strip()))

        for i in range(1, len(parts), 2):
            code = parts[i].strip()
            try:
                lang, code = code.split('\n', 1) if '\n' in code else ('python', code)
                lexer = get_lexer_by_name(lang.strip().lower(), stripall=True)
            except:
                lexer = Python3Lexer()
                
            formatter = HtmlFormatter(style='monokai')
            display(HTML(f"""
                <style>{formatter.get_style_defs('.highlight')}</style>
                <div class="highlight">
                    {highlight(code.strip(), lexer, formatter)}
                </div>
            """))

            if i + 1 < len(parts) and parts[i + 1].strip():
                display(Markdown(parts[i + 1].strip()))

        config_html = """
        <div style="margin:20px 0">
            <h3>Generation Parameters</h3>
            <table>
        """
        for field in config.__dataclass_fields__:
            config_html += f"""<tr><td><code>{field}</code></td><td>{getattr(config, field)}</td></tr>"""
        config_html += "</table></div>"
        display(HTML(config_html))

    def set_system_prompt(self, content: str):
        """Set or update the system prompt"""
        
        if self.memory.messages and self.memory.messages[0]["role"] == "system":
            self.memory.messages[0] = {"role": "system", "content": content}
        else:
            self.memory.messages.insert(0, {"role": "system", "content": content})
        
        ## Summarizer Memory
        if self.summarizer.messages and self.summarizer.messages[0]["role"] == "system":
            self.summarizer.messages[0] = {"role": "system", "content": content}
        else:
            self.summarizer.messages.insert(0, {"role": "system", "content": content})


    def view_history(self):
        """Display conversation history"""
        self.memory.display()


In [7]:
playground = ChatPlayground()

In [8]:
configs = {
        "creative": GenerationConfig(temperature=1.0, top_p=0.9),
        "focused": GenerationConfig(temperature=0.3, top_p=0.8),
        "balanced": GenerationConfig(temperature=0.7, top_p=0.9),
        "deterministic": GenerationConfig(temperature=0.01, top_p=0.01),
        
    }

In [9]:
playground.set_system_prompt("You are a Python expert. Provide clear, documented code.")

In [10]:
a = playground.chat("Write a function to find prime numbers in python", configs["deterministic"])

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


**Finding Prime Numbers in Python**
=====================================

Here's a well-documented and efficient function to find prime numbers in Python:

**Explanation:**

1.  We create a boolean array, `prime`, of size `n+1` and initialize all values as `True`.
2.  We set `prime[0]` and `prime[1]` to `False`, as 0 and 1 are not prime numbers.
3.  We iterate from 2 to the square root of `n` (inclusive) and mark as composite all the multiples of `p`.
4.  We create a list, `primes`, and iterate from 2 to `n` (inclusive). If `p` is a prime number, we add it to the list.
5.  Finally, we return the list of prime numbers.

**Time Complexity:** O(n log log n)

**Space Complexity:** O(n)

**Note:** This function has a time complexity of O(n log log n) due to the nested loop structure. However, for larger values of `n`, you can use more efficient algorithms like the Sieve of Eratosthenes, which has a time complexity of O(n log log n) as well.

0,1
max_new_tokens,1000.0
temperature,0.01
top_p,0.01
top_k,50.0
num_return_sequences,1.0


In [11]:
playground.memory.messages

[{'role': 'system',
  'content': 'You are a Python expert. Provide clear, documented code.'},
 {'role': 'user',
  'content': 'Write a function to find prime numbers in python'},
 {'role': 'assistant',

In [12]:
playground.memory.rollback(2)

In [13]:
playground.memory.messages

[{'role': 'system',
  'content': 'You are a Python expert. Provide clear, documented code.'}]

In [14]:
playground.chat("Write a function to find prime numbers in C", configs["deterministic"])

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


**Finding Prime Numbers in C**
================================

Here's a simple function to find prime numbers in C. This function uses the Sieve of Eratosthenes algorithm, which is an efficient method for finding all primes smaller than a given number.

**Explanation**
---------------

1.  We create a boolean array `prime` of size `n+1` and initialize all elements to `true`. This array will be used to mark the prime numbers.
2.  We start from 2 and mark all its multiples as composite (i.e., not prime) by setting `prime[i]` to `false`.
3.  We iterate from 2 to the square root of `n` and mark all the multiples of `p` as composite.
4.  Finally, we print all the prime numbers by iterating from 2 to `n` and checking the value of `prime[i]`.

**Example Use Case**
--------------------

When you run the program, it will ask you to enter a number. Let's say you enter `100`. The program will print all the prime numbers up to 100, which are 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, and 97.

**Time Complexity**
-------------------

The time complexity of this algorithm is O(n log log n) due to the Sieve of Eratosthenes algorithm. The space complexity is O(n), as we need to store the boolean array of size `n+1`.

0,1
max_new_tokens,1000.0
temperature,0.01
top_p,0.01
top_k,50.0
num_return_sequences,1.0




In [15]:
playground.memory.messages

[{'role': 'system',
  'content': 'You are a Python expert. Provide clear, documented code.'},
 {'role': 'user', 'content': 'Write a function to find prime numbers in C'},
 {'role': 'assistant',

La logica de hacer `rollback` no esta integrada con `summarize_conversation`, de tal manera que si hacemos `rollback` esto no va a afectar a la memoria sumarizada. En la realidad habria que reconsruir la memoria sumarizada para poder mantener un orden coherente las cosas, pues esta puede contener conversaciones que borramos con el `rollback` pero se quedaron resumidas ahi.

In [16]:
playground.summarize_conversation()

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




In [17]:
playground.chat("What have i asked you to do previously?", configs["deterministic"], use_summary=True)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You have asked me to:

1. Provide clear, documented code.
2. Give a summary of the code.
3. Explain the key points of the code.
4. Provide an example use case for the code.
5. Discuss the time and space complexity of the code.

Please let me know if there's anything else I can help you with.

0,1
max_new_tokens,1000.0
temperature,0.01
top_p,0.01
top_k,50.0
num_return_sequences,1.0


"You have asked me to:\n\n1. Provide clear, documented code.\n2. Give a summary of the code.\n3. Explain the key points of the code.\n4. Provide an example use case for the code.\n5. Discuss the time and space complexity of the code.\n\nPlease let me know if there's anything else I can help you with."

In [18]:
playground.chat("What have i asked you to do previously?", configs["deterministic"], use_summary=False)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You have asked me to:

1. Write a function to find prime numbers in C.
2. Provide clear, documented code for the function.
3. Explain the Sieve of Eratosthenes algorithm used in the function.
4. Provide an example use case for the function.
5. Discuss the time and space complexity of the function.

0,1
max_new_tokens,1000.0
temperature,0.01
top_p,0.01
top_k,50.0
num_return_sequences,1.0


'You have asked me to:\n\n1. Write a function to find prime numbers in C.\n2. Provide clear, documented code for the function.\n3. Explain the Sieve of Eratosthenes algorithm used in the function.\n4. Provide an example use case for the function.\n5. Discuss the time and space complexity of the function.'

## Ejercicio

Prueba diferentes prompts y diferentes parametros (top_p, temperature, max_tokens) para ver como se comportan los LLMS.

# Ejercicio

Como construyes un chatbot, que system_prompt necesitas?