<a href="https://colab.research.google.com/github/kushc2004/LLM/blob/main/catalyst.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers



In [1]:
# agents/agent.py

from typing import Dict, Optional, List, Tuple
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

class Agent:
    def __init__(self, name, description, model_name="llama-2-7b", **kwargs):
        self.name = name
        self.description = description
        self.system_prompt = "You're a helpful assistant."
        self.kwargs = kwargs
        self.model_name = model_name

        # Load the model and tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForCausalLM.from_pretrained(self.model_name)

    def llm_call(
        self,
        prompt: Optional[str] = None,
        messages: Optional[List] = None,
        seed: int = 10,
    ) -> str:
        # Ensure exactly one of prompt or messages is provided
        assert (prompt is None) != (messages is None)

        # Ensure if messages is provided, it is a list of dicts with role and content
        if messages is not None:
            assert isinstance(messages, list)
            for message in messages:
                assert isinstance(message, dict)
                assert "role" in message
                assert "content" in message

        if prompt is not None:
            messages = [
                {"role": "system", "content": self.system_prompt},
                {"role": "user", "content": prompt},
            ]

        # Concatenate messages into a single prompt
        full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])

        # Tokenize the input
        inputs = self.tokenizer(full_prompt, return_tensors="pt")

        # Generate response
        with torch.no_grad():
            outputs = self.model.generate(**inputs, max_length=200, do_sample=True, top_p=0.95, top_k=50, temperature=0.7)

        # Decode the response
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        return response

    def generate_reply(
        self,
        task: str,
        state: Dict,
        sender: "Agent",
    ) -> Tuple[str, Dict]:
        return (
            "This is a reply from the agent. REPLY NOT IMPLEMENTED! Terminate the whole process!",
            state,
        )

In [None]:
agent = Agent(name="TestAgent", description="A test agent", model_name="NousResearch/Llama-2-7b-chat-hf")

# Call the model with a prompt
response = agent.llm_call(prompt="How is the weather today in Mumbai?")
print(response)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]