# L2: Image captioning app 🖼️📝

HF API 키와 관련 Python 라이브러리를 로드합니다.

In [1]:
import os
import io
import IPython.display
from PIL import Image
import base64 
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
hf_api_key = os.environ['HF_API_KEY']

In [2]:
# Helper functions
import requests, json

#Image-to-text endpoint

def get_completion(inputs, parameters=None, ENDPOINT_URL="https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"): 
    headers = {
      "Authorization": f"Bearer {hf_api_key}",
      "Content-Type": "application/json"
    }
    data = { "inputs": inputs }
    if parameters is not None:
        data.update({"parameters": parameters})
    response = requests.request("POST",
                                ENDPOINT_URL,
                                headers=headers,
                                data=json.dumps(data))
    return json.loads(response.content.decode("utf-8"))

## 이미지 캡션 앱 만들기 

여기서는 14M 파라미터 캡션 모델 `Salesforce/blip-image-captioning-base`을 위해 [허깅페이스 추론 엔드포인트](https://huggingface.co/inference-endpoints)를 이용합니다. 

API가 아닌 로컬에서 실행하는 경우 코드는 아래와 매우 유사하게 보일 것입니다. [파이프라인](https://huggingface.co/docs/transformers/main_classes/pipelines) 문서 페이지에서 확인할 수 있습니다.

```py
from transformers import pipeline

get_completion = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base")

def summarize(input):
    output = get_completion(input)
    return output[0]['generated_text']
    
```

무료 이미지는 다음에서 사용할 수 있습니다: https://free-images.com/

In [3]:
image_url = "https://free-images.com/sm/9596/dog_animal_greyhound_983023.jpg"
display(IPython.display.Image(url=image_url))
get_completion(image_url)

[{'generated_text': 'a dog wearing a santa hat and a red scarf'}]

## Captioning with `gr.Interface()`

In [6]:
import gradio as gr 

def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).decode('utf-8'))

def captioner(image):
    base64_image = image_to_base64_str(image)
    result = get_completion(base64_image)
    return result[0]['generated_text']

gr.close_all()
demo = gr.Interface(fn=captioner,
                    inputs=[gr.Image(label="Upload image", type="pil")],
                    outputs=[gr.Textbox(label="Caption")],
                    title="Image Captioning with BLIP",
                    description="Caption any image using the BLIP model",
                    allow_flagging="never",
                    examples=["christmas_dog.jpeg", "bird_flight.jpeg", "cow.jpeg", "astronaut.png", "man.png"])

demo.launch(share=True)

Closing server running on port: 7861
Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://03f9407a893414ec54.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [7]:
gr.close_all()

Closing server running on port: 7861
Closing server running on port: 7861
