# Gradio UI for LLM 

This project will build a User Interface for a llm project

In [1]:
import os
import requests
import json
from typing import List
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display 
from openai import OpenAI

In [4]:
# importing gradio 
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
#initializing constants
llama_url = 'http://localhost:11434/v1'
llama_api_url = "http://localhost:11434/api/chat"
Headers = {"Content-Type": "application/json"}
Model = "llama3.1"

In [8]:
# A generic system message
system_message = "You are a helpful assistant"

In [9]:
# initialize the ollama local model

def message_llama(prompt):
    ollama = OpenAI(base_url=llama_url, api_key="ollama")

    message = [
        {"role":"system", "content":system_message},
        {"role":"user", "content": prompt}
    ]
    completion = ollama.chat.completions.create(
        model=Model,
        messages = message
    )

    return completion.choices[0].message.content


In [11]:
message_llama("What is today's date?")

"I'm a large language model, I don't have real-time access to the current date. However, I can suggest some options for you to find out the current date:\n\n1. Check your computer or mobile device's clock.\n2. Search online using a search engine like Google.\n3. Look at a physical calendar.\n\nAlternatively, if you'd like me to provide the current date as of our conversation started (which was likely a few seconds ago), I can tell you that we last updated my training data in December 2023. However, this is not the actual current date."

# Create a UI using Gradio

In [13]:
#create a test function to shout (uppercase)
def shout(text):
    return text.upper()

In [18]:
# fn=function, input, output
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", allow_flagging="never").launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




In [22]:
# A tailored system message that returns in Markdown
system_message = "You are a helpful assistant that responds in markdown"

# creating a Gradio UI for llama llm
gr.Interface(fn=message_llama,
            inputs=[gr.Textbox(label="Your Message:", lines=6)],
            outputs=[gr.Textbox(label="Chat Output:", lines=10)],
            flagging_mode="never").launch()

* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




In [25]:
# Create a stream function for llm response 
# Yeild -> Generators

def stream_llama(prompt):
    ollama = OpenAI(base_url=llama_url, api_key="ollama")

    message = [
        {"role":"system", "content":system_message},
        {"role":"user", "content": prompt}
    ]
    stream = ollama.chat.completions.create(
        model=Model,
        messages = message,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
    yield result


## Generators (the "yield" keyword) 
Why Do We Need Generators?
- Memory Efficient : Handle large or infinite data without loading everything into memory.
- No List Overhead : Yield items one by one, avoiding full list creation.
- Lazy Evaluation : Compute values only when needed, improving performance.
- Support Infinite Sequences : Ideal for generating unbounded data like Fibonacci series.
- Pipeline Processing : Chain generators to process data in stages efficiently.


In [29]:
# A tailored system message that returns in Markdown
system_message = "You are a helpful assistant that responds in markdown format"

# creating a Gradio UI for llama llm
gr.Interface(fn=stream_llama,
            inputs=[gr.Textbox(label="Your Message:", lines=6)],
            outputs=[gr.Markdown(label="Chat Output:")],
            flagging_mode="never").launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.


