In [1]:
from tabulate import tabulate
from IPython.display import display, Markdown, HTML

from utils.schema import KnowledgeGraph
from utils.prompts import SYSTEM_PROMPT, PROMPT
from utils.kg import create_graph
from utils.llm_utils import llm, unload

 ./llama-swap/build/llama-swap-linux-amd64 --config utils/config.yaml --listen localhost:8888

In [2]:
MODELS_TO_TEST = [
    "mistral-small-sd",    # 24B + 0.5B
    "gemma3-sd",           # 27B + 1B
    "qwen3-sd",            # 32B + 0.6B
]

HOST = '192.168.0.252'
# HOST = 'localhost'

In [3]:
input_text = r"""
AI Styria by AI Austria ‚Äì Machine Learning Graz Meetup: Summer Edition 2025

After an extended break, the Machine Learning Graz Meetup is back - more dynamic and exciting than ever! AI Austria and Wirecube have teamed up to reignite Styria‚Äôs ML community. Whether you're active in research, industry, or simply curious about ML, Data Science, and AI - this meetup is perfect for you!

When & Where:
üìÖ Tuesday, July 8, 2025, Start 5:00 PM
üìç Wirecube Graz ‚Äì Waagner-Biro-Stra√üe 124/4th floor, 8020 Graz

Program:

- 6:15 PM ‚Äì Welcome & Introduction
- Andreas Windisch (Joanneum Research): "Research Meets Industry ‚Äì Hands-on ML Use Case"
- Roman Kern, David Fleischhacker (Know-Center): "Go Trustworthy, Go Local"
- Florian Becker (CEO Wirecube): "Predictive Maintenance ‚Äì ML in Operational Use"
- Thomas Kloiber (VP AI, Leftshift One): "Deep Dive into RAG Agents ‚Äì Technology & Application"
- 8:00 PM ‚Äì Open Q&A Session & Food, Drinks & Networking üçïüçªü§ù

Join us, save the date, and actively engage in shaping the future of Graz's Machine Learning community at the AI Austria Meetup Summer Edition 2025!
"""

prompt = PROMPT.format(text=input_text)

In [4]:
all_metrics = []

for model_name in MODELS_TO_TEST:
    display(Markdown(f"**{model_name}:**"))
    
    unload(host=HOST)
    response, metrics = llm(model_name, system=SYSTEM_PROMPT, prompt=prompt, schema=KnowledgeGraph, host=HOST)
    unload(host=HOST)
    all_metrics.append(metrics)
    
    kg_name = f"output/{model_name}.html"
    knowledge_graph = create_graph(response)
    knowledge_graph.show(kg_name)
    
    display(Markdown("---"))

**mistral-small-sd:**

output/mistral-small-sd.html


---

**gemma3-sd:**

output/gemma3-sd.html


---

**qwen3-sd:**

output/qwen3-sd.html


---

In [5]:
summary_headers = ["Model", "Token Generation\n(tokens/s)", "Prompt Processing\n(tokens/s)", "Tokens Generated", "Tokens Prompt", "Draft Accept Ratio"]
print(tabulate(all_metrics, headers=summary_headers, tablefmt="grid")) 

+------------------+--------------------+---------------------+--------------------+-----------------+----------------------+
| Model            |   Token Generation |   Prompt Processing |   Tokens Generated |   Tokens Prompt |   Draft Accept Ratio |
|                  |         (tokens/s) |          (tokens/s) |                    |                 |                      |
| mistral-small-sd |             160.76 |             4235.98 |               1714 |            1156 |                 0.83 |
+------------------+--------------------+---------------------+--------------------+-----------------+----------------------+
| gemma3-sd        |             108.98 |             3384.8  |               1919 |            1193 |                 0.73 |
+------------------+--------------------+---------------------+--------------------+-----------------+----------------------+
| qwen3-sd         |             102.65 |             3081.13 |                893 |            1136 |                