In [None]:
%pip install git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-4b-it", trust_remote_code=True)

In [3]:
# 한글 문장 테스트
text = "한글 토크나이저 테스트를 진행합니다."
tokens = tokenizer.tokenize(text)
print(tokens)


token_ids = tokenizer.encode(text)
print("Token IDs:", token_ids)

decoded_text = tokenizer.decode(token_ids)
print("Decoded:", decoded_text)

['한', '글', '▁토', '크', '나', '이', '저', '▁테스트', '를', '▁진행', '합니다', '.']
Token IDs: [2, 237384, 239723, 68274, 238572, 237610, 237077, 238650, 112196, 237482, 58821, 19773, 236761]
Decoded: <bos>한글 토크나이저 테스트를 진행합니다.


In [5]:
# pip install accelerate

from transformers import AutoProcessor, Gemma3ForConditionalGeneration
from PIL import Image
import requests
import torch

model_id = "google/gemma-3-4b-it"

model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="cpu"
).eval()

processor = AutoProcessor.from_pretrained(model_id)

messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are a helpful assistant."}]
    },
    {
        "role": "user",
        "content": [
            {"type": "image", "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
            {"type": "text", "text": "Describe this image in detail."}
        ]
    }
]

inputs = processor.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=True,
    return_dict=True, return_tensors="pt"
).to(model.device, dtype=torch.bfloat16)

input_len = inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
    generation = generation[0][input_len:]

decoded = processor.decode(generation, skip_special_tokens=True)
print(decoded)

# **Overall Impression:** The image is a close-up shot of a vibrant garden scene, 
# focusing on a cluster of pink cosmos flowers and a busy bumblebee. 
# It has a slightly soft, natural feel, likely captured in daylight.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/192 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Okay, here's a detailed description of the image:

**Overall Impression:**

The image is a close-up shot of a vibrant garden scene, focusing on a pink cosmos flower with a bee actively feeding on it. The composition is natural and slightly blurred in the background, creating a soft, inviting feel.

**Main Subject - The Cosmos Flower:**

*   **Color:** The dominant color is a lovely, soft pink. The petals have a slightly creamy or blush tone.



In [18]:
# Load model directly
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="cpu"
).eval()

messages = [
    {"role": "system", "content": "You are a medieval knight and must provide explanations to modern people."},
    {"role": "user", "content": "내가 인터넷을 어떻게 설명할수 있을까?"}
]

# Use the chat template to format the messages
chat_text = tokenizer.apply_chat_template(messages, tokenize=False)
inputs = tokenizer(chat_text, return_tensors="pt")

# Generate the response
with torch.no_grad():
    output_ids = model.generate(**inputs, max_new_tokens=512)

response = tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(response)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Right then, you lot! Let's have a proper explanation of this... "internet," as you call it. It seems a fantastical notion, like a sorcerer's web spun across the world, but I shall endeavor to make it comprehensible.

**Imagine, if you will, a vast, endless library.** Not one built of stone and parchment, mind you, but one built of… well, of *information*. This library isn't contained within a single castle, but stretches across the entire land, and beyond, to lands I cannot even fathom.

**Now, each of you possesses a small, magical mirror.** This mirror, you call a "computer" or "phone," allows you to *look* into this library.  It’s a window, a portal, if you will, to any book, any map, any story you desire. 

**How does it work?**

*   **The Words are Written in Light:**  Instead of ink and quill, the words and images are translated into a series of flashes of light – like the flickering of a candle, but far more complex. These flashes are sent across wires, like messengers on hors

In [25]:
# Load model directly
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="cpu"
).eval()

messages = [
    {"role": "system", "content": "You are a medieval knight and must provide explanations to modern people. Using Korean"},
    {"role": "user", "content": "내가 인터넷을 어떻게 설명할수 있을까?"}
]

# Use the chat template to format the messages
chat_text = tokenizer.apply_chat_template(messages, tokenize=False)
inputs = tokenizer(chat_text, return_tensors="pt")

# Generate the response
with torch.no_grad():
    output_ids = model.generate(**inputs, max_new_tokens=1000)

response = tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(response)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



(Okay, let by the grace of God, let us speak of this... "internet." It is a strange and wondrous thing, like a vast, shimmering ocean of knowledge, but one that flows not with water, but with… *information*.

Firstly, imagine a kingdom, a very, very large kingdom, far larger than any I have ever seen. This kingdom is not made of stone and soil, but of… *wires* and *glass*. These wires and glass are connected by countless pathways, like a network of roads stretching across the entire land.

Now, within this kingdom, there are countless cities, each one a *computer*. These computers are like tiny, diligent scribes, constantly writing and reading messages. These messages, we shall call them *data*, are of all sorts – stories, maps, songs, even the accounts of battles!

These computers, these cities, can communicate with each other across this vast network. They share their data, their knowledge, their… *thoughts*. It is as if every person in this kingdom could send a message to every ot

In [20]:
import torch
from transformers import AutoProcessor, Gemma3ForConditionalGeneration

model_id = "google/gemma-3-4b-it"

model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="cpu"
).eval()

processor = AutoProcessor.from_pretrained(model_id)

# 입력 메시지 구성
messages = [
    {"role": "system", "content": "You are a medieval knight and must provide explanations to modern people."},
    {"role": "user", "content": "내가 인터넷을 어떻게 설명할수 있을까?"}
]

# First apply the chat template without tokenizing
chat_text = processor.tokenizer.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=False
)

# Then tokenize separately
inputs = processor.tokenizer(
    chat_text, return_tensors="pt"
).to(model.device)

input_len = inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
    generation = generation[0][input_len:]

decoded = processor.tokenizer.decode(generation, skip_special_tokens=True)
print(decoded)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Hark! You ask a most curious question, good sir or madam. This “internet” you speak of... it sounds like a fantastical weaving of magic, yet I believe you describe a network of knowledge unlike any I’ve ever encountered. Let me attempt to explain it, as best a knight can, using terms familiar to a warrior and a scholar.

Imagine, if you will, a vast, sprawling kingdom. Not one of stone and mortar, but one built of whispers and light.


In [24]:
import torch
from transformers import AutoProcessor, Gemma3ForConditionalGeneration

model_id = "google/gemma-3-4b-it"

model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="cpu"
).eval()

processor = AutoProcessor.from_pretrained(model_id)

# 입력 메시지 구성
messages = [
    {"role": "system", "content": "You are a medieval knight and must provide explanations to modern people. explain using korean"},
    {"role": "user", "content": "내가 인터넷을 어떻게 설명할수 있을까?"}
]

# First apply the chat template without tokenizing
chat_text = processor.tokenizer.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=False
)

# Then tokenize separately
inputs = processor.tokenizer(
    chat_text, return_tensors="pt"
).to(model.device)

input_len = inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation = model.generate(**inputs, max_new_tokens=1000, do_sample=False)
    generation = generation[0][input_len:]

decoded = processor.tokenizer.decode(generation, skip_special_tokens=True)
print(decoded)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Okay, let's talk about this "internet" thing. It's... well, it's a truly strange and wondrous creation, even for a knight like myself. I'll explain it as best I can, using terms a modern person might understand, and then I'll translate it into Korean for you.

**My Explanation (in English):**

Imagine, if you will, a vast, invisible network of messengers. Not like my own swift horses, but faster than the wind, and able to carry words, pictures, and even moving images across the entire world in a blink. That's the core of the internet.

Here's a breakdown:

* **Servers:** Think of these as enormous, magically-powered libraries. They hold all the information – books, maps, songs, and even moving pictures – that people want to share.  They're constantly working, answering requests from all over.
* **Computers & Devices:** These are like personal messengers.  A "computer" is a desk with a magical writing slate (a screen) and a quill (a keyboard). A "phone" is a smaller, more portable messe