In [17]:
from transformers import  LlamaConfig, LlamaModel, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("kakaocorp/kanana-1.5-2.1b-base", trust_remote_code=True)

# <<<< 약간의 편집 <<<<
chat_template = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B", trust_remote_code=True).chat_template
tokenizer.chat_template = chat_template
tokenizer.add_special_tokens({
    "additional_special_tokens": [
        "<|im_start|>", "<|im_end|>", "<|finetune_pad|>", "<tools>", "</tools>", "<tool_call>", "</tool_call>", "<tool_response>", "</tool_response>", "<think>", "</think>"
    ]
})
tokenizer.eos_token = "<|im_end|>"
tokenizer.bos_token = "<|im_start|>"
tokenizer.pad_token = "<finetune_pad>"
# >>>> 약간의 편집 >>>>

config = LlamaConfig(
    hidden_size=576,        # 히든 크기
    num_attention_heads=9,   # 어텐션 헤드 수
    num_hidden_layers=30,    # 레이어 수
    intermediate_size=1536,  # FFN 차원 (보통 hidden_size * (2 ~ 4)
    num_key_value_heads=3,
    vocab_size=len(tokenizer),
    max_position_embeddings=2048,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
    bos_token_id=tokenizer.bos_token_id,
)

print(f"Config: {config}")

# 랜덤 파라미터로 초기화
model = LlamaModel(config)

# 모델 파라미터 수 확인
model_size = sum(t.numel() for t in model.parameters())
print(f"Model size: {model_size/1000**3:.1f}B parameters")
print(f"Model size: {model_size/1000**2:.1f}M parameters")

model.save_pretrained("./tiny-random")
tokenizer.save_pretrained("./tiny-random")

Config: LlamaConfig {
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128256,
  "eos_token_id": 128257,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 576,
  "initializer_range": 0.02,
  "intermediate_size": 1536,
  "max_position_embeddings": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 9,
  "num_hidden_layers": 30,
  "num_key_value_heads": 3,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "transformers_version": "4.52.4",
  "use_cache": true,
  "vocab_size": 128267
}

Model size: 0.2B parameters
Model size: 180.1M parameters


('./tiny-random/tokenizer_config.json',
 './tiny-random/special_tokens_map.json',
 './tiny-random/chat_template.jinja',
 './tiny-random/tokenizer.json')

In [18]:
import transformers
import torch

model_id = "./tiny-random"

pipeline = transformers.pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto",
    max_new_tokens=100
)

reps = pipeline("""
한글은 대한민국의""")[0]["generated_text"]
print(reps)

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at ./tiny-random and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0



한글은 대한민국의 universalंरotions.Members성ंर fel glVertex justo	th độcンタはず whims depreciation虽然 depreciation whims이며ulneryor độcloslosンタ이며 whims độc took.signals userEmail.cs았다 Awareруж ball.gif Aware'].$(media took showing.gif Aware.gifrozenруж.atan Representatives	Entity_equ Podcast(media.signals ($_ getFile took showing whims took � проек worsening.atan("../../ showing싱 took contains PodcastΟΡ showing Seeds universal getFile.atan）。

 universal nobンタ）。

ンタ getFile DAYS _(ΟΡ_equ(media.atan Seeds Podcast حرفه проекンタΟΡ았다 검색(media justo took
