In [1]:
# A simple call to openai

In [2]:
# imports

import os
import requests
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from IPython.display import display, Markdown

In [3]:
# Load environment variables in a file .env

load_dotenv()
openai_key=os.getenv("OPENAI_API_KEY")

# Check the key
if not openai_key:
    print("An API key was not found")
elif openai_key[:8]!="sk-proj-":
    print("An API key was found, but it dosen't start with sk-proj-")
elif openai_key.strip()!= openai_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("An API key was found and looks good")

An API key was found and looks good


In [4]:
# Check if Ollama is running by making a proper request
try:
    response = requests.get("http://localhost:11434")
    print(f"Status Code: {response.status_code}")
    print(f"Response: {response.text}")
except requests.exceptions.ConnectionError:
    print("Cannot connect to Ollama. Make sure it's running on localhost:11434")
except Exception as e:
    print(f"An error occurred: {e}")

Status Code: 200
Response: Ollama is running


In [5]:
# Ollama parameters
OLLAMA_BASE_URL  = "http://localhost:11434/v1"

ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key="ollama")

olama_model = "llama3.2"

In [6]:
# Create an instance of openai

openai = OpenAI()

In [7]:
system_message = "You are a helpful assistant that responds in markdown without code block."
openai_model = "gpt-4o-mini"

In [8]:
def message_gpt(prompt):
    messages =[
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]
    response = openai.chat.completions.create(
        model= openai_model,
        messages= messages,
        stream=True
    )
    result = ""
    for chunk in response:
        result += chunk.choices[0].delta.content or ""
        yield result

    

In [9]:
def message_ollama(prompt):
    messages =[
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]
    response = ollama.chat.completions.create(
        model= olama_model,
        messages= messages,
        stream=True
    )
    result = ""
    for chunk in response:
        result += chunk.choices[0].delta.content or ""
        yield result

    

In [10]:
def stream_model(prompt, model):
    if model == "llama3.2":
        result = message_ollama(prompt)
    elif model == "gpt-4o-mini":
        result = message_gpt(prompt)
    else:
        raise ValueError("Invalid model")
    yield from result

In [11]:
# Gradio interface
message_input = gr.Textbox(label="Your message", info="Enter your message here", lines=7)
model_selector = gr.Dropdown(["llama3.2","gpt-4o-mini"], label="Select model", value="llama3.2")
message_output = gr.Markdown(label="Response")

view = gr.Interface(fn=stream_model, 
                    inputs= [message_input, model_selector], 
                    outputs=[message_output], 
                    title="Dotshow Chatbot",
                    # examples=["Explain the transformer model",
                    #           "Explain the concept of attention",
                    #           ],
                    flagging_mode="never")

# view.launch(share=True)
view.launch(inbrowser=True, share=True)

* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://a5c6d5a040c1e8ed3f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


