In [3]:
from transformers import  LlamaConfig, LlamaModel, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("./tknz/tiny-ko-tokenizer", trust_remote_code=True)

tokenizer.save_pretrained("./model/tiny-random")

print(len(tokenizer))

32000


In [4]:
config = LlamaConfig(
    hidden_size=576,        # 히든 크기
    num_attention_heads=9,   # 어텐션 헤드 수
    num_hidden_layers=30,    # 레이어 수
    intermediate_size=1536,  # FFN 차원 (보통 hidden_size * (2 ~ 4)
    num_key_value_heads=3,
    vocab_size=len(tokenizer),
    max_position_embeddings=4096,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
    # bos_token_id=tokenizer.bos_token_id,
)

print(f"Config: {config}")

# 랜덤 파라미터로 초기화
model = LlamaModel(config)

# 모델 파라미터 수 확인
model_size = sum(t.numel() for t in model.parameters())
print(f"Model size: {model_size/1000**3:.1f}B parameters")
print(f"Model size: {model_size/1000**2:.1f}M parameters")

model.save_pretrained("./tiny-random")

Config: LlamaConfig {
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 3,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 576,
  "initializer_range": 0.02,
  "intermediate_size": 1536,
  "max_position_embeddings": 4096,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 9,
  "num_hidden_layers": 30,
  "num_key_value_heads": 3,
  "pad_token_id": 1,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "transformers_version": "4.52.4",
  "use_cache": true,
  "vocab_size": 32000
}

Model size: 0.1B parameters
Model size: 124.6M parameters


In [5]:
import transformers
import torch

model_id = "./tiny-random"

pipeline = transformers.pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto",
    max_new_tokens=100
)

reps = pipeline("""
한글은 대한민국의""")[0]["generated_text"]
print(reps)

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at ./tiny-random and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


TypeError: not a string