In [None]:
# 출처 : https://huggingface.co/google/paligemma-3b-pt-448
import torch
import time
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration

start_time = time.time()

model_id = "google/paligemma-3b-mix-448"
device = "cuda:0"
dtype = torch.bfloat16

model = PaliGemmaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=dtype,
    device_map=device,
    revision="bfloat16",
).eval()

processor = AutoProcessor.from_pretrained(model_id)

# 소요된 시간을 계산합니다.
end_time = time.time()
formatted_elapsed_time = "{:.2f}".format(end_time - start_time)
print(f'*time:{formatted_elapsed_time}')

In [None]:
# 로컬 폴더에 저장 (예: ./model/paligemma-3b-mix-448 폴더에 저장)
folder = "./model/paligemma-3b-mix-448"
model.save_pretrained(folder, safe_serialization=False)
processor.save_pretrained(folder)

In [1]:
import torch
import time
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration

start_time = time.time()

# 로컬 파일에서 모델과 프로세서 불러오기
folder = "./model/paligemma-3b-mix-448"
device = "cuda:0"
dtype = torch.bfloat16

model = PaliGemmaForConditionalGeneration.from_pretrained(
    folder,
    torch_dtype=dtype,
    device_map=device,
    revision="bfloat16",
).eval()
processor = AutoProcessor.from_pretrained(folder)

# 소요된 시간을 계산합니다.
end_time = time.time()
formatted_elapsed_time = "{:.2f}".format(end_time - start_time)
print(f'*time:{formatted_elapsed_time}')

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

*time:6.52


In [5]:
from PIL import Image
import requests
import time

start_time = time.time()

#url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
#image = Image.open(requests.get(url, stream=True).raw)
image = Image.open("./data/trap1.jpg") # 로컬이미지 불러옴

# Instruct the model to create a caption in Spanish
prompt = "Describe the image"
model_inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
input_len = model_inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation_args = { 
        "max_new_tokens": 500, 
        "temperature": 0.0, # 0.0 이면 출력고정 
        "do_sample": False, 
    } 
    # generation = model.generate(**model_inputs, max_new_tokens=500, do_sample=False)
    generation = model.generate(**model_inputs, **generation_args)
    generation = generation[0][input_len:]
    decoded = processor.decode(generation, skip_special_tokens=True)
    print(decoded)
    
# 소요된 시간을 계산합니다.
end_time = time.time()
formatted_elapsed_time = "{:.2f}".format(end_time - start_time)
print(f'*time:{formatted_elapsed_time}')


In this image we can see buildings, trees, plants, water, hills and sky with clouds.
*time:0.60


In [2]:
model.generate?

[0;31mSignature:[0m
[0mmodel[0m[0;34m.[0m[0mgenerate[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0minputs[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgeneration_config[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mgeneration[0m[0;34m.[0m[0mconfiguration_utils[0m[0;34m.[0m[0mGenerationConfig[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlogits_processor[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mgeneration[0m[0;34m.[0m[0mlogits_process[0m[0;34m.[0m[0mLogitsProcessorList[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstopping_criteria[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mgeneration[0m[0;34m.[0m[0mstopping_criteria[0m[0;34m.[0m[0mStoppingCriteria

In [6]:
# 번역 => 영문을 한국어로 번역
#!pip install googletrans==4.0.0-rc1
from googletrans import Translator
def translate_google(text:str, source_lang:str, target_lang:str):
    translator = Translator()
    
    start_time = time.time()
    res = translator.translate(text, src=source_lang, dest=target_lang)

    # 소요된 시간을 계산합니다.
    end_time = time.time()
    formatted_elapsed_time = "{:.2f}".format(end_time - start_time)
    print(f'time:{formatted_elapsed_time}')

    print(res)
    print()
    response = res.text.strip('"')
    return response

text = translate_google(text=decoded, source_lang='en', target_lang='ko')
print(text)

time:0.27
Translated(src=en, dest=ko, text=이 이미지에서 우리는 구름이있는 건물, 나무, 식물, 물, 언덕 및 하늘을 볼 수 있습니다., pronunciation=i imijieseo ulineun guleum-iissneun geonmul, namu, sigmul, mul, eondeog mich haneul-eul bol su issseubnida., extra_data="{'confiden...")

이 이미지에서 우리는 구름이있는 건물, 나무, 식물, 물, 언덕 및 하늘을 볼 수 있습니다.
