In [1]:
#====================================================================================================
# EleutherAI/gpt-neo Text Generation 예제 
# => https://huggingface.co/EleutherAI
#
# huggingface 라이브러리를 이용할때, gpt-2 와 gpt-neo 차이점
# =>  GPT2LMHeadModel 대신에 GPTNeoForCausalLM 사용
# 
# gpt-neo-1.3B 모델 spec
# => model_size : 5G, tokenizer: BPELabelTokenizer, vocab_num: 50,257
#
# gpt-neo-125M 모델 spec
# => model_size : 527M, tokenizer: BPELabelTokenizer, vocab_num: 50,257
#====================================================================================================
import torch
from transformers import GPTNeoForCausalLM, GPT2Tokenizer

model_path = '../model/gpt-2/gpt-neo-125M/'
#model_path='EleutherAI/gpt-neo-1.3B'
#model_path='EleutherAI/gpt-neo-125M'
#model_path='EleutherAI/gpt-neo-2.7B'

device = torch.device("cuda:0")

In [2]:
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = GPTNeoForCausalLM.from_pretrained(model_path)
model.to(device)

GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0, inplace=False)
    (h): ModuleList(
      (0): GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0, inplace=False)
            (resid_dropout): Dropout(p=0, inplace=False)
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): Linear(in_features=768, out_features=768, bias=False)
            (q_proj): Linear(in_features=768, out_features=768, bias=False)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear(in_features=3072, o

In [None]:
'''
import os
OUT_PATH = '../model/gpt-2/gpt-neo-125M'
os.makedirs(OUT_PATH, exist_ok=True)
tokenizer.save_pretrained(OUT_PATH)
model.save_pretrained(OUT_PATH)
'''

In [3]:
model.num_parameters()

125198592

In [9]:
text = '좋은 날씨'
input_ids = tokenizer.encode(text, return_tensors='pt')
print(input_ids)

gen_ids = model.generate(input_ids.to(device),
                        do_sample=True,
                        temperature=0.9,
                        max_length=100,
                        pad_token_id=tokenizer.pad_token_id,
                        eos_token_id=tokenizer.eos_token_id,
                        bos_token_id=tokenizer.bos_token_id,
                        )
print(gen_ids.shape)
print(gen_ids[0])

# skip_special_tokens=True 로 해서 <s>, </s> 토큰들은 출력안 시킬수도 있음
generated = tokenizer.decode(gen_ids[0], skip_special_tokens=True)
print(generated)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[  168,    95,   233, 35975,   222, 31619,   224,   254,   168,   242,
           101]])
torch.Size([1, 100])
tensor([  168,    95,   233, 35975,   222, 31619,   224,   254,   168,   242,
          101,   167,   238,   250, 23821,   100,   239,   168,   243,   226,
        47991,   250, 31619,   235,   108, 35975,   112,   169,   226,   108,
        31619,   100,   238,   168,   252,   227, 46695,   230, 46695,    97,
           13,   198,   198,   166,   108,   247, 35975,   222, 31619,   235,
          108, 35975,   112,   169,   226,   108, 23821,   224,   112,   168,
          243,   226, 23821,   230,   246,   168,   252,   227, 46695,   230,
        46695,    97,    13, 23821,   251,   112,   168,   225,   223, 23821,
          100,   239,   168,   243,   226,   168,   250,   120,   167,    94,
          250, 31619,   234,   222,   168,   254,   223, 35975,   116, 23821],
       device='cuda:0')
좋은 날씨된 집아한 데이터 말입니다.

같은 데이터 살아 수입니다. 이상 집아으로 대적인 �
