-
Notifications
You must be signed in to change notification settings - Fork 37
/
app.py
73 lines (61 loc) · 2.19 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import ollama
import streamlit as st
import asyncio
import time
from openai import AsyncOpenAI
from token_count import TokenCount
title = "Running LLMs in parallel with Ollama"
st.set_page_config(page_title=title, layout="wide")
st.title(title)
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="ignore-me")
models = [
m['name']
for m in ollama.list()["models"]
if m["details"]["family"] in ["llama", "gemma"]
]
with st.sidebar:
prompt = st.text_area("Prompt")
model_1_index = models.index("phi3:latest")
model_1 = st.selectbox("Model 1", options=models, index=model_1_index)
model_2_index = models.index("llama3:latest")
model_2 = st.selectbox("Model 2", options=models, index=model_2_index)
generate = st.button("Generate", type="primary")
col1, col2 = st.columns(2)
col1.write(f"# :blue[{model_1}]")
col2.write(f"# :red[{model_2}]")
meta_1 = col1.empty()
meta_2 = col2.empty()
body_1 = col1.empty()
body_2 = col2.empty()
async def run_prompt(placeholder, meta, prompt, model):
tc = TokenCount(model_name="gpt-3.5-turbo")
start = time.time()
stream = await client.chat.completions.create(
model=model,
messages=[{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},],
stream=True
)
streamed_text = ""
async for chunk in stream:
chunk_content = chunk.choices[0].delta.content
if chunk_content is not None:
streamed_text = streamed_text + chunk_content
placeholder.write(streamed_text)
end = time.time()
time_taken = end-start
tokens = tc.num_tokens_from_string(streamed_text)
meta.info(f"""**Duration: :green[{time_taken:.2f} secs]**
**Eval count: :green[{tokens} tokens]**
**Eval rate: :green[{tokens / time_taken:.2f} tokens/s]**
""")
async def main():
await asyncio.gather(
run_prompt(body_1, meta_1, prompt=prompt, model=model_1),
run_prompt(body_2, meta_2, prompt=prompt, model=model_2)
)
if generate:
if prompt == "":
st.warning("Please enter a prompt")
else:
asyncio.run(main())