In [1]:
import os
import requests
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic

In [2]:
import gradio as gr # oh yeah!

In [9]:
# Load environment variables in a file called .env
# Print the key prefixes to help with any debugging

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key}")
else:
    print("OpenAI API Key not set")

# Connect to OpenAI, Anthropic and Google; comment out the Claude or Google lines if you're not using them

openai = OpenAI()

'''
  define openai to use ollama at local
'''
OLLAMA_MODEL = "llama3.2"
GPT_MODEL = "gpt-4o-mini"
ollama_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

# The simplicty of gradio. This might appear in "light mode" - I'll show you how to make this in dark mode later.
force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

OpenAI API Key exists and begins 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


In [4]:
# A generic system message - no more snarky adversarial AIs!

system_message = "You are a helpful assistant"

In [5]:
# Let's wrap a call to GPT-4o-mini in a simple function

def message_gpt(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    completion = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages,
    )
    return completion.choices[0].message.content

def message_ollama(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    completion = ollama_openai.chat.completions.create(
        model=OLLAMA_MODEL,
        messages=messages,
    )
    return completion.choices[0].message.content

In [6]:
def shout(text):
    print(f"Shout has been called with input {text}")
    return text.upper()

In [7]:
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never", js=force_dark_mode).launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Shout has been called with input hi


In [8]:
# Inputs and Outputs
system_message = "You are a helpful assistant that responds in markdown"

# Let's create a call that streams back results
# If you'd like a refresher on Generators (the "yield" keyword),
# Please take a look at the Intermediate Python notebook in week1 folder.

def stream_openai(prompt, MODEL, openai_engine):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]

    stream = openai_engine.chat.completions.create(
        model=MODEL,
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

def stream_model(prompt, MODEL):
    if MODEL == "GPT":
        result = stream_openai(prompt, GPT_MODEL, openai)
    elif MODEL == "Ollama":
        result = stream_openai(prompt, OLLAMA_MODEL, ollama_openai)
    for chunk in result:
        yield chunk
        
view = gr.Interface(
    fn=stream_model,
    inputs=[gr.Textbox(label="Your message:"), gr.Dropdown(["GPT", "Ollama"], label="Select model")],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never",
    js=force_dark_mode
)
view.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [2]:
import gradio as gr

with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("# Inference Provider")
        gr.Markdown("This Space showcases the meta-llama/Llama-3.1-8B-Instruct model, served by the sambanova API. Sign in with your Hugging Face account to use this API.")
        button = gr.LoginButton("Sign in")
    gr.load("models/meta-llama/Llama-3.1-8B-Instruct", accept_token=button, provider="sambanova")
    
demo.launch()



* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Fetching model from: https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
Fetching model from: https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
Fetching model from: https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct


In [None]:
from transformers import pipeline

messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="meta-llama/Llama-3.1-8B-Instruct")
pipe(messages)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
