<a href="https://colab.research.google.com/github/mgultekin/Building-Generative-AI-Applications-with-Gradio/blob/main/Chat-with-LLM-App/Chat_with_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Chat with LLM
* [Falcon 40B Instruct](https://huggingface.co/tiiuae/falcon-40b-instruct)(Open source LLM)
* Ready to use chat/instruct model


In [1]:
# Install necessary packages

!pip install gradio
!pip install python-dotenv #if you need to read local .env file
!pip install IPython

Collecting gradio
  Downloading gradio-3.39.0-py3-none-any.whl (19.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.9/19.9 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.101.0-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client>=0.3.0 (from gradio)
  Downloading gradio_client-0.3.0-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.2/294.2 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx (from gradio)
  Downloading httpx-0.24.1-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import io
import IPython.display
from PIL import Image
import base64
import requests
requests.adapters.DEFAULT_TIMEOUT = 60

#set your Hugging Face API key
hf_api_key = "xxxxxxx"

In [3]:
!pip install text-generation #install text generation inference from Hugging Face

Collecting text-generation
  Downloading text_generation-0.6.0-py3-none-any.whl (10 kB)
Installing collected packages: text-generation
Successfully installed text-generation-0.6.0


In [6]:
#Helper function
import requests, json
from text_generation import Client

#FalcomLM-instruct endpoint on the text_generation library
client = Client("https://xxxx.us-east-1.aws.endpoints.huggingface.cloud", headers = {"Authorization": f"Basic {hf_api_key}"}, timeout = 120)

# Build the chat app

In [7]:
import random

# Function to format the chat prompt by combining user messages, bot responses, and instruction.
def format_chat_prompt(message, chat_history, instruction):
    prompt = f"System:{instruction}"
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

# Function to generate a response based on the chat history and user message.
def respond(message, chat_history, instruction, temperature=0.7):
    # Format the chat prompt using the provided message, chat history, and instruction.
    prompt = format_chat_prompt(message, chat_history, instruction)

    # Add the user message to the chat history.
    chat_history = chat_history + [[message, ""]]

    # Generate a stream of tokens for the response using the specified parameters.
    stream = client.generate_stream(prompt,
                                    max_new_tokens=1024,
                                    stop_sequences=["\nUser:", ""],
                                    temperature=temperature)  # stop_sequences to not generate the user answer

    acc_text = ""
    # Iterate through the generated tokens in the stream.
    for idx, response in enumerate(stream):
        text_token = response.token.text

        # If response.details is present, return (possibly indicating a completion or end condition).
        if response.details:
            return

        # If the response starts with a space and it's the first token, remove the space.
        if idx == 0 and text_token.startswith(" "):
            text_token = text_token[1:]

        # Accumulate the text token.
        acc_text += text_token
        last_turn = list(chat_history.pop(-1))
        last_turn[-1] += acc_text
        chat_history = chat_history + [last_turn]

        # Yield the accumulated text and updated chat history.
        yield "", chat_history

        # Reset the accumulated text.
        acc_text = ""


In [None]:
import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown("""# Chat with Falcon LLM
    Falcon 40B Instruct model""")
    chatbot = gr.Chatbot(height = 240) #just to fit the notebook
    msg = gr.Textbox(label = "Prompt")
    with  gr.Accordion(label="Advanced options", open = False):
        system = gr.Textbox(label = "System message", lines =2, value = "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
        temperature = gr.Slider(label = "temperature", minimum = 0.1, maximum = 1, value = 0.7, step = 0.1)
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components = [msg, chatbot], value = 'Clear console')

    btn.click(respond, inputs = [msg, chatbot, system], outputs = [msg, chatbot])
    msg.submit(respond, inputs = [msg, chatbot, system], outputs = [msg, chatbot]) #press enter to submit

gr.close_all()
demo.queue().launch(share = True, debug = True)