# LLMs

In [3]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

### Text Completion models - DEPRECATED - we will just use ChatModels

In [None]:
# OpenAI is a TextCompletion model
# Whereas ChatOpenAI is a chat model
import os
from langchain_openai import OpenAI
from pydantic import SecretStr

llm = OpenAI(
  base_url=os.getenv("API_URL", ""),
  api_key=SecretStr(os.getenv("API_KEY", "")),
  model=os.getenv("MODEL", ""),
  temperature=0
)
llm.invoke("Tell me a joke")

'<|channel|>analysis<|message|>The user says "Tell me a joke". We need to comply with policy. It\'s a request for content that is allowed. There\'s no disallowed content. So we can provide a joke. Just give a short, clean joke. Ensure it\'s not hateful or harassing. It\'s fine.\n\nWe should respond with a joke. No policy issues.<|end|><|start|>assistant<|channel|>final<|message|>Why don’t scientists trust atoms?  \n\nBecause they make up everything!'

In [8]:
llm.invoke("Tell me a joke")

'<|channel|>analysis<|message|>The user says "Tell me a joke". We need to comply with policy. It\'s a request for content that is allowed. There\'s no disallowed content. So we can provide a joke. Just give a short, clean joke. Ensure it\'s not hateful or harassing. It\'s fine.\n\nWe should respond with a joke. No policy issues.<|end|><|start|>assistant<|channel|>final<|message|>Why don’t scientists trust atoms?  \n\nBecause they make up everything!'

In [4]:
llm.invoke(input="Tell me a joke")

'\n\nWhy did the tomato turn red? Because it saw the salad dressing!'

Batch Processing - Make multiple requests at once

In [9]:
result = llm.generate(["Tell me a joke about cows", "Tell me a joke about parrots"])
print(result)

generations=[[Generation(text='<|channel|>analysis<|message|>The user wants two jokes: one about cows and one about parrots. They want "tell me a joke about cows" and "tell me a joke about parrots". So we need to provide a joke for each. The user didn\'t specify any constraints beyond that. We should produce short, humorous jokes.\n\nWe must ensure the content is appropriate. Jokes about animals are fine. Provide two separate jokes. Possibly format them as bullet points or separate lines. Keep it concise and clear. No disallowed content. It\'s straightforward.\n\nWe can do something like:\n\n- Cow joke: "Why did the cow go to space? Because it wanted to see the Milky Way!"\n\n- Parrot joke: "What does a parrot say when it\'s tired? \'Polly-gone!\'"\n\nBut we need to be careful with jokes that might be offensive. These are fine.\n\nWe can also do:\n\nCow: "Why did the cow cross the road? To get to the udder side."\n\nParrot: "Why don\'t parrots like to play cards? Because they always ta

In [10]:
result.llm_output

{'token_usage': {'total_tokens': 333,
  'prompt_tokens': 83,
  'completion_tokens': 250},
 'model_name': 'unsloth/gpt-oss-20b'}

### Creating a conversation with SystemMessage, AIMessage and SystemMessage

In [11]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
  base_url=os.getenv("API_URL", ""),
  api_key=SecretStr(os.getenv("API_KEY", "")),
  model=os.getenv("MODEL", ""),
  temperature=0
)

In [12]:
result = llm.invoke("Tell me a joke about cows")
print(result)

content='Sure, here’s one for you:\n\n**Why don’t cows ever get lost?**\n\nBecause they always follow the *moo*ving directions! 🐄😄' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 76, 'prompt_tokens': 73, 'total_tokens': 149, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'unsloth/gpt-oss-20b', 'system_fingerprint': 'unsloth/gpt-oss-20b', 'id': 'chatcmpl-fddg23cxnmdattu509dmr', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None} id='run--5f00b388-d011-402b-8c90-996cb7e47849-0' usage_metadata={'input_tokens': 73, 'output_tokens': 76, 'total_tokens': 149, 'input_token_details': {}, 'output_token_details': {}}


In [13]:
from langchain.schema import SystemMessage, HumanMessage, AIMessage

messages = [
    SystemMessage(content="You are a helpful assistant specialized in providing information about BellaVista Italian Restaurant."),
    HumanMessage(content="What's on the menu?"),
    AIMessage(content="BellaVista offers a variety of Italian dishes including pasta, pizza, and seafood."),
    HumanMessage(content="Do you have vegan options?")
]

In [14]:
llm_result = llm.invoke(input=messages)
llm_result

AIMessage(content='Yes! BellaVista has several plant‑based options you can enjoy:\n\n| Dish | Description |\n|------|-------------|\n| **Vegan Margherita Pizza** | Classic tomato sauce, fresh basil, and a dairy‑free cheese blend (or no cheese at all). |\n| **Pasta Primavera** | Whole‑grain spaghetti tossed with seasonal veggies, garlic, olive oil, and a sprinkle of nutritional yeast for a cheesy flavor. |\n| **Vegan Mushroom Risotto** | Creamy arborio rice cooked in vegetable broth with sautéed mushrooms, onions, and fresh herbs. |\n| **Caesar Salad (vegan)** | Crisp romaine, homemade croutons, lemon‑olive oil dressing, and a drizzle of tahini for the “cheese” flavor. |\n| **Bruschetta Trio** | Classic tomato & basil, roasted red pepper, or sautéed spinach with garlic – all served on toasted ciabatta. |\n\nIf you have any dietary restrictions or want to double‑check ingredients (e.g., hidden dairy in sauces), just let the server know—they’re happy to accommodate and can suggest additio

Batch Processing for Chat Models - Make multiple requests at once

In [15]:
batch_messages = [
    [
        SystemMessage(content="You are a helpful assistant that translates English to German"),
        HumanMessage(content="Do you have vegan options?")
    ],
    [
        SystemMessage(content="You are a helpful assistant that translates the English to Spanish."),
        HumanMessage(content="Do you have vegan options?")
    ],
]
batch_result = llm.generate(batch_messages)
batch_result

LLMResult(generations=[[ChatGeneration(text='"Gibt es vegane Optionen?"', generation_info={'finish_reason': 'stop', 'logprobs': None}, message=AIMessage(content='"Gibt es vegane Optionen?"', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1407, 'prompt_tokens': 90, 'total_tokens': 1497, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'unsloth/gpt-oss-20b', 'system_fingerprint': 'unsloth/gpt-oss-20b', 'id': 'chatcmpl-os47mwor1x7rqbvcadmx6', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--6109f5cc-3bb0-44cb-8858-8cea8e1c9af8-0', usage_metadata={'input_tokens': 90, 'output_tokens': 1407, 'total_tokens': 1497, 'input_token_details': {}, 'output_token_details': {}}))], [ChatGeneration(text='¿Tienes opciones veganas?', generation_info={'finish_reason': 'stop', 'logprobs': None}, message=AIMessage(content='¿Tienes opciones veganas?', additional_kwargs={'refusal': None}, response_metadata={

Extracting the output ourself (we will later take a look at Output-Parsers!)


In [16]:
translations = [generation[0].text for generation in batch_result.generations]
translations

['"Gibt es vegane Optionen?"', '¿Tienes opciones veganas?']