In [None]:
private = ''
public = ''

host = 'https://cloud.langfuse.com'

In [None]:
from langfuse import Langfuse

langfuse = Langfuse(
  secret_key=private,
  public_key=public,
  host=host
)

In [None]:
import os
import base64

LANGFUSE_PUBLIC_KEY=public
LANGFUSE_SECRET_KEY=private
LANGFUSE_AUTH=base64.b64encode(f"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}".encode()).decode()

os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://cloud.langfuse.com/api/public/otel" # EU data region
# os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://us.cloud.langfuse.com/api/public/otel" # US data region
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"

# your Hugging Face token
os.environ["HF_TOKEN"] = HF_token

In [12]:
from opentelemetry.sdk.trace import TracerProvider

from openinference.instrumentation.smolagents import SmolagentsInstrumentor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

trace_provider = TracerProvider()
trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))

SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)

In [5]:
from smolagents import (
    CodeAgent,
    ToolCallingAgent,
    DuckDuckGoSearchTool,
    VisitWebpageTool,
    HfApiModel,
)

model = HfApiModel(
    model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
)

search_agent = ToolCallingAgent(
    tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
    model=model,
    name="search_agent",
    description="This is an agent that can do web search.",
)

manager_agent = CodeAgent(
    tools=[],
    model=model,
    managed_agents=[search_agent],
)
manager_agent.run(
    "How can I use custom model instead of HfApiModel"
)

AgentGenerationError: Error in generating model output:
402 Client Error: Payment Required for url: https://router.huggingface.co/hf-inference/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1/chat/completions (Request ID: Root=1-67ea5d62-523798c83442198b6df5e6fe;6633549a-fa4f-4ff9-ae5b-93671441e3e7)

You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.

In [8]:
dir(model)

['__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_prepare_completion_kwargs',
 'client',
 'custom_role_conversions',
 'flatten_messages_as_text',
 'from_dict',
 'get_token_counts',
 'kwargs',
 'last_input_token_count',
 'last_output_token_count',
 'model_id',
 'postprocess_message',
 'provider',
 'to_dict',
 'tool_arguments_key',
 'tool_name_key']

# with our model

In [1]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import pipeline
import torch

bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            bnb_4bit_use_double_quant=True,
        )

peft_model_id = "langdai/gemma-2-2B-it_think_funcion_call"
# device = "auto"
config = PeftConfig.from_pretrained(peft_model_id)
peft_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                             device_map="cuda:0",
                                             )
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
peft_model.resize_token_embeddings(len(tokenizer))
peft_model = PeftModel.from_pretrained(peft_model, peft_model_id)
peft_model.to(torch.bfloat16)
peft_model.eval()


generator = pipeline("text-generation", model= peft_model, tokenizer= tokenizer)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2Fo

In [10]:
res = generator("how are you?")

In [11]:
res

[{'generated_text': 'how are you?'}]

In [12]:
output = generator([{"role": "user", "content": 'how are you?'}], max_new_tokens=128, return_full_text=False)[0]

In [13]:
output

{'generated_text': "I am an AI model, so I don't have feelings or experiences like humans do. However, I am here to assist you with any questions or tasks you may have. How can I help you today?"}

In [2]:
prompt="""<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'convert_currency', 'description': 'Convert from one currency to another', 'parameters': {'type': 'object', 'properties': {'amount': {'type': 'number', 'description': 'The amount to convert'}, 'from_currency': {'type': 'string', 'description': 'The currency to convert from'}, 'to_currency': {'type': 'string', 'description': 'The currency to convert to'}}, 'required': ['amount', 'from_currency', 'to_currency']}}}, {'type': 'function', 'function': {'name': 'calculate_distance', 'description': 'Calculate the distance between two locations', 'parameters': {'type': 'object', 'properties': {'start_location': {'type': 'string', 'description': 'The starting location'}, 'end_location': {'type': 'string', 'description': 'The ending location'}}, 'required': ['start_location', 'end_location']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{tool_call}
</tool_call>Also, before making a call to a function take the time to plan the function to take. Make that thinking process between <think>{your thoughts}</think>

Hi, I need to convert 500 USD to Euros. Can you help me with that?<end_of_turn><eos>
<start_of_turn>model
<think>"""

In [3]:
output = generator([{"role": "user", "content": prompt}], max_new_tokens=128, return_full_text=False)[0]

In [4]:
output

{'generated_text': "Okay, so the user is asking to convert 500 USD to Euros. I need to figure out how to respond using the available functions. Let me look at the tools provided. There's a function called convert_currency which does exactly that—it converts one currency to another. The parameters needed are amount, from_currency, and to_currency. \n\nThe user provided the amount as 500, the source currency as USD, and the target as EUR. So, I should plug these values into the function. I don't need to make any assumptions about the values since the user has given"}

# merged

In [5]:
from peft import AutoPeftModelForCausalLM

# Local path, check post scriptum for explanation
model_id = "langdai/gemma-2-2B-it_think_funcion_call"
peft_model = AutoPeftModelForCausalLM.from_pretrained(model_id)
print(type(peft_model))

merged_model = peft_model.merge_and_unload()
# The adapters are merged now and it is transformers class again
print(type(merged_model))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

<class 'peft.peft_model.PeftModelForCausalLM'>


```python
from transformers import AutoModelForCausalLM

# Load original tied model
model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", tie_word_embeddings=False)

# Set the randomly initialized lm_head to the previously tied embeddings
model.lm_head.weight.data = model.model.embed_tokens.weight.data.clone()

# Save the untied model
untied_model_dir = "dir/for/untied/model"
model.save_pretrained(untied_model_dir)
model.config.save_pretrained(untied_model_dir)

# Now use the original model but in untied format
model = AutoModelForCausalLM.from_pretrained(untied_model_dir)
```



<class 'transformers.models.gemma2.modeling_gemma2.Gemma2ForCausalLM'>


In [26]:
merged_model

Gemma2ForCausalLM(
  (model): Gemma2Model(
    (embed_tokens): Embedding(256008, 2304, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x Gemma2DecoderLayer(
        (self_attn): Gemma2Attention(
          (q_proj): Linear(in_features=2304, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2304, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2304, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2304, bias=False)
        )
        (mlp): Gemma2MLP(
          (gate_proj): Linear(in_features=2304, out_features=9216, bias=False)
          (up_proj): Linear(in_features=2304, out_features=9216, bias=False)
          (down_proj): Linear(in_features=9216, out_features=2304, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
        (post_attention_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
        (pre_feedforward_layernorm): Gemm

In [6]:
prompt="""<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'convert_currency', 'description': 'Convert from one currency to another', 'parameters': {'type': 'object', 'properties': {'amount': {'type': 'number', 'description': 'The amount to convert'}, 'from_currency': {'type': 'string', 'description': 'The currency to convert from'}, 'to_currency': {'type': 'string', 'description': 'The currency to convert to'}}, 'required': ['amount', 'from_currency', 'to_currency']}}}, {'type': 'function', 'function': {'name': 'calculate_distance', 'description': 'Calculate the distance between two locations', 'parameters': {'type': 'object', 'properties': {'start_location': {'type': 'string', 'description': 'The starting location'}, 'end_location': {'type': 'string', 'description': 'The ending location'}}, 'required': ['start_location', 'end_location']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{tool_call}
</tool_call>Also, before making a call to a function take the time to plan the function to take. Make that thinking process between <think>{your thoughts}</think>

Hi, I need to convert 500 USD to Euros. Can you help me with that?<end_of_turn><eos>
<start_of_turn>model
<think>"""

generator_merge = pipeline("text-generation", model= merged_model, tokenizer= tokenizer)

Device set to use cuda:0


In [8]:
output = generator_merge([{"role": "user", "content": prompt}, {"role": "assistant", "content": "Do you mind if I call the function to convert 500 USD to Euros?"},\
{"role": "user", "content": "please call the function"}], max_new_tokens=128, return_full_text=False)[0]

print(output)

{'generated_text': ' involving function calls to assist with user queries. Here are the available tools: inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform inform'}


# smolagent with our model

In [None]:
# def generate(query):
#     output = generator([{"role": "user", "content": query}], max_new_tokens=128, return_full_text=False)[0]
#     return output['generated_text']


# import torch

# class LocalHfModel:
#     def _init_(self):
#         self.model = peft_model
#         self.tokenizer = tokenizer
    
#     def generate(self, prompt, max_length=100):
#         inputs = self.tokenizer(prompt, return_tensors="pt")
#         with torch.no_grad():
#             outputs = self.model.generate(**inputs, max_length=max_length)
#         return self.tokenizer.decode(outputs[0], skip_special_tokens=True)



from smolagents import TransformersModel

model = TransformersModel(
    model_id= 'langdai/gemma-2-2B-it_think_funcion_call',
    max_new_tokens=4096,
    device_map="auto"
)

In [None]:
from smolagents import (
    CodeAgent,
    ToolCallingAgent,
    DuckDuckGoSearchTool,
    VisitWebpageTool
)

model = generate

search_agent = ToolCallingAgent(
    tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
    model=model,
    name="search_agent",
    description="This is an agent that can do web search.",
)

manager_agent = CodeAgent(
    tools=[],
    model=model,
    managed_agents=[search_agent],
)
manager_agent.run(
    "How are you?"
)