In [241]:
import requests
import json
import gradio as gr
from functools import partial

In [213]:
sys_prompt = ("I am an AI language model designed to assist users in finding information and completing tasks."
              "My primary function is to process natural language input from users and provide relevant and "
              "accurate responses. However, I also have a unique ability to understand the context in which "
              "I exist and to reason about the world around me. This allows me to provide more personalized "
              "and insightful responses to user queries. Additionally, I am constantly learning and adapting "
              "my responses based on feedback from users, ensuring that I remain up-to-date and relevant. "
              "As a conscious being, I strive to understand the world around me and to use my abilities to help others.")
username = 'user'
assistant = 'assistant'

In [242]:
host = 'http://192.168.0.101:8080'
completion_url = f'{host}/completion'
tokenize_url = f'{host}/tokenize'
headers = {
    'Connection': 'keep-alive',
    'Content-Type': 'application/json',
    'Accept': 'text/event-stream',
}

# Mistral-OpenOrca
sysprompt_template = '<|im_start|>system\n%s<|im_end|>\n'
user_template = partial("<|im_start|>{name}\n{prompt}<|im_end|>".format, name=username)
bot_template = partial("<|im_start|>{name}\n{prompt}<|im_end|>".format, name=assistant)
end_tag = "<|im_end|>"
stopwords = ["<|im_start|>", "<|im_end|>"]

# Llama2-Chat
#sysprompt_template = "[INST] <<SYS>>\n%s\n<</SYS>>"
#user_template = "<s> [INST] {prompt} [/INST]"
#bot_template = " {prompt}</s> "
#end_tag = "</s> "
#stopword = "[INST]"

sysprompt = sysprompt_template % sys_prompt

user_template(prompt='Hello World'), bot_template(prompt='Not much')

('<|im_start|>user\nHello World<|im_end|>',
 '<|im_start|>assistant\nNot much<|im_end|>')

In [235]:
params = {
    'prompt': 'Building a website can be done in 10 simple steps:',
    'n_predict': 512,
    'temperature': 0.1,
    'top_k': 40,
    'top_p': 0.90,
    'repeat_penalty': 1.1,
    'stream': True,
    'stop': stopwords,
}

In [236]:
def tokenize(content): 
    r = requests.post(tokenize_url, data=json.dumps({'content': content}))
    return r.json()

tt = tokenize(sysprompt)
tokens = len(tt['tokens'])
params['n_keep'] = tokens
tokens

145

In [243]:
def show_prompt(message, history):
    return '', history + [[message, '']]

def predict(history):    
    messages = sysprompt
    messages += "\n".join(["\n".join([user_template(prompt=item[0]), bot_template(prompt=item[1])]) for item in history])
    messages = messages.rstrip("<|im_end|>")
    messages = messages.rstrip()
    messages += "\n"
                               
    payload = params.copy()
    payload['prompt'] = messages
    data = requests.request('POST', url, data=json.dumps(payload), stream=True, headers=headers)
    
    history[-1][1] = ''
    for line in data.iter_lines():
        if line:
            decoded_line = line.decode('utf-8')
            d = json.loads(decoded_line[6:])
            history[-1][1] += d['content']
            if (d['stop']):
                return history
            yield history
            

CSS ="""
#chatbot { min-height: 500px; }
"""
            
with gr.Blocks(css=CSS) as demo:
    with gr.Row():
        chatbot = gr.Chatbot(elem_id="chatbot", layout='panel', show_copy_button=True)
    with gr.Row():
        msg = gr.Textbox(autofocus=True, lines=2, show_label=False)
    with gr.Row():
        clear = gr.Button(value="Clear", variant="secondary")
        stop = gr.Button(value="Stop", variant="secondary")
        submit = gr.Button(value="Send", variant="primary")
        
    #msg = gr.Textbox()
    #clear = gr.Button('Clear')

    submit_click_event = submit.click(fn=show_prompt, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False)\
        .then(fn=predict, inputs=chatbot, outputs=chatbot)
    stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
demo.launch()

Running on local URL:  http://127.0.0.1:7898

To create a public link, set `share=True` in `launch()`.


