# L5: Chat with any LLM! 💬

HF API 키와 관련 Python 라이브러리를 로드합니다.

In [4]:
# !pip install text_generation

In [5]:
import os
import io
import IPython.display
from PIL import Image
import base64
import requests
requests.adapters.DEFAULT_TIMEOUT = 60

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 
hf_api_key = os.environ['HF_API_KEY']

In [12]:
# Helper function

import requests, json
from text_generation import Client

# FalconLM-instruct endpoint on the text_generation library
client = Client("https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct", headers={"Authorization": f"Bearer {hf_api_key}"}, timeout=120)

## 모든 LLM과 채팅할 수 있는 앱을 구축하세요!

In [13]:
hf_api_key

'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'

Here we'll be using an [Inference Endpoint](https://huggingface.co/inference-endpoints) for `falcon-40b-instruct` , one of best ranking open source LLM on the [🤗 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). 

To run it locally, one can use the [Transformers library](https://huggingface.co/docs/transformers/index) or the [text-generation-inference](https://github.com/huggingface/text-generation-inference) 

In [16]:
prompt = "Has math been invented or discovered?"
client.generate(prompt, max_new_tokens=256).generated_text

'\nMath has been discovered, not invented. It is a system of rules and formulas that are used to describe the natural world and its behavior.'

In [18]:
# Back to Lesson 2
import gradio as gr
def generate(input, slider):
    output = client.generate(input, max_new_tokens=slider).generated_text
    return output

demo = gr.Interface(
    fn=generate, 
    inputs=[
        gr.Textbox(label="Prompt"), 
        gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)
    ],
    outputs=[gr.Textbox(label="Completion")]
)
gr.close_all()
demo.launch()

Closing server running on port: 7870
Closing server running on port: 7870
Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.




## `gr.Chatbot()` to the rescue!

In [22]:
import random

def respond(message, chat_history):
    # NO LLM here, just response with a random pre-made message
    bot_message = random.choice(["Tell me more about it", 
                                 "Cool, but I'm not interested", 
                                 "Hmmmm, ok then"])
    chat_history.append((message, bot_message))
    return "", chat_history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)
    msg = gr.Textbox(label="Prompt")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear")

    btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
gr.close_all()
demo.launch()

Closing server running on port: 7870
Closing server running on port: 7870
Closing server running on port: 7870
Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.




In [26]:
def format_chat_prompt(message, chat_history):
    prompt = ""
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    print(prompt)
    return prompt

def respond(message, chat_history):
    formatted_prompt = format_chat_prompt(message, chat_history)
    bot_message = client.generate(formatted_prompt,
                                 max_new_tokens=1024,
                                 stop_sequences=["\nUser:", "<|endoftext\>"]).generated_text
    chat_history.append((message, bot_message))
    return "", chat_history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)
    msg = gr.Textbox(label="Prompt")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear")

    btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])

gr.close_all()
demo.launch()

Closing server running on port: 7870
Closing server running on port: 7870
Closing server running on port: 7870
Running on local URL:  http://127.0.0.1:7874

To create a public link, set `share=True` in `launch()`.





User: hi
Assistant:

User: hi
Assistant: hi! how can i help you?
User:
User: how?
Assistant:


### 기타 고급 기능 추가

In [30]:
def format_chat_prompt(message, chat_history, instruction):
    prompt = f"System:{instruction}"
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAddistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

def respond(message, chat_history, instruction, temperature=0.7):
    prompt = format_chat_prompt(message, chat_history, instruction)
    chat_history = chat_history + [[message, ""]]
    stream = client.generate_stream(
        prompt,
        max_new_tokens=1024,
        stop_sequences=["\nUser:", "<|endoftext|>"],
        temperature=temperature
    )
    acc_text = ""

    # streaming the tokens
    for idx, response in enumerate(stream):
        text_token = response.token.text

        if response.details:
            return

        if idx == 0 and text_token.startswith(" "):
            text_token = text_token[1:]

        acc_text += text_token
        last_turn = list(chat_history.pop(-1))
        last_turn[-1] += acc_text
        chat_history = chat_history + [last_turn]
        yield "", chat_history
        acc_text = ""

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)
    msg = gr.Textbox(label="Prompt")
    with gr.Accordion(label="Advanced options", open=False):
        system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
        temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1)

    btn = gr.Button("Submit")
    clear =gr.ClearButton(components=[msg, chatbot], value="Clear")

    btn.click(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])

gr.close_all()
demo.queue().launch()


Closing server running on port: 7870
Closing server running on port: 7870
Closing server running on port: 7870
Running on local URL:  http://127.0.0.1:7877

To create a public link, set `share=True` in `launch()`.


