### Complete Testrun of Logos Service
This notebook provides a complete testrun of logos. It has own models, classifies them under real conditions and sends requests to the k >= 1 most suitable models.

In [1]:
from logos.classification.classification_manager import ClassificationManager
from logos.scheduling.scheduling_fcfs import FCFSScheduler
from logos.scheduling.scheduling_manager import SchedulingManager
from pprint import pprint

In [2]:
policy = {
    "id": 0,
    "name": "lax_all",
    "entity_id": 0,
    "description": "Somehow all LLMs that come into mind",
    "threshold_privacy": "CLOUD_NOT_IN_EU_BY_US_PROVIDER",
    "threshold_latency": 0,
    "threshold_accuracy": 0,
    "threshold_cost": -32,
    "threshold_quality": 0,
    "priority": 255,
    "topic": "",
}

In [3]:
from test_model_data import prepare_model_data
models = prepare_model_data()
classifier = ClassificationManager(models)
sm = SchedulingManager(FCFSScheduler())
sm.run()

2025-07-07 11:11:57,669 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Model Handler Started: 15.95ms


In [4]:
prompt = "What's the definition of classical mechanics in physics?"

In [5]:
results = classifier.classify(prompt, policy)

Latency weight for model 0 is: 0.5
Latency weight for model 1 is: 0.9608342772032357
Latency weight for model 2 is: 0.008162571153159891
Latency weight for model 3 is: 0.8320183851339245
Latency weight for model 4 is: 0.039165722796764356
Latency weight for model 5 is: 0.0016588010801744217
Latency weight for model 6 is: 0.9918374288468401
Latency weight for model 7 is: 0.9983411989198255
Latency weight for model 8 is: 0.16798161486607552
Accuracy weight for model 0 is: 0.9983411989198255
Accuracy weight for model 1 is: 0.039165722796764356
Accuracy weight for model 2 is: 0.8320183851339245
Accuracy weight for model 3 is: 0.16798161486607552
Accuracy weight for model 4 is: 0.9918374288468401
Accuracy weight for model 5 is: 0.9608342772032357
Accuracy weight for model 6 is: 0.008162571153159891
Accuracy weight for model 7 is: 0.0016588010801744217
Accuracy weight for model 8 is: 0.5
Quality weight for model 0 is: 0.9983411989198255
Quality weight for model 1 is: 0.039165722796764356
Qua

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Laura weight for model 0 is: 0.15380439162254333
Laura weight for model 1 is: 0.1193922758102417
Laura weight for model 2 is: 0.04920206218957901
Laura weight for model 3 is: 0.12884920835494995
Laura weight for model 4 is: 0.0660228282213211
Laura weight for model 5 is: 0.16006161272525787
Laura weight for model 6 is: -0.001814933493733406
Laura weight for model 7 is: 0.03188987076282501
Laura weight for model 8 is: 0.22442996501922607


In [6]:
tid = sm.add_request(dict(), results)

# Wait for this task to be executed
while not sm.is_finished(tid):
    pass

out = sm.get_result()

In [7]:
print(out.models)

[(0, 2.8042911810847375, 255, 256), (5, 2.2434505809371617, 255, 256), (4, 2.154886236933087, 255, 256), (2, 1.7706034658001668, 255, 256), (8, 1.6168415449045277, 255, 256), (3, 1.4256800315759754, 255, 256), (1, 1.2779502744172477, 255, 256), (7, 1.0654385426058244, 255, 256), (6, 1.004532704165693, 255, 256)]


In [8]:
with open("key.txt", "r") as f:
    lines = f.readlines()
    azure_key_0 = lines[0].strip().replace("\t", "").replace("\n", "")
    azure_key_1 = lines[1].strip().replace("\t", "").replace("\n", "")
    openwebui_key = lines[2].strip().replace("\t", "").replace("\n", "")

In [9]:
from test_model_data import get_from_id, test_send_to_azure, create_html, test_send_to_webui
import time

data = dict()

for index, tpl in enumerate(out.models):
    model = get_from_id(models, tpl[0])
    if tpl[0] in (0, 1):
        s = time.time()
        response = test_send_to_azure(model, prompt, azure_key_0 if tpl[0] == 0 else azure_key_1)
        latency = time.time() - s
        print(f"Model {model['name']} sent to Azure")
    else:
        s = time.time()
        response = test_send_to_webui(model, prompt, openwebui_key)
        latency = time.time() - s
        print(f"Model {model['name']} sent to Openwebui")
    try:
        data[model['name']] = f"Place {index}: " + response["choices"][0]["message"]["content"], tpl[1], latency, get_from_id(classifier.filtered, tpl[0])["classification_weight"]
        # pprint(response["choices"][0]["message"]["content"])
    except:
        pprint(response)
    # print()
    # print()
    

Model azure-gpt-4-omni sent to Azure
Model qwen3:30b-a3b sent to Openwebui
Model llama3.3:latest sent to Openwebui
Model deepseek-r1:70b sent to Openwebui
Model qwen3:32b sent to Openwebui
Model gemma3:27b sent to Openwebui
Model o3-mini sent to Azure
Model gemma3:4b sent to Openwebui
Model tinyllama:latest sent to Openwebui


In [10]:
create_html(data, prompt)

HTML successfully exported


In [13]:
models[3]

{'id': 3,
 'name': 'gemma3:27b',
 'endpoint': 'https://gpu.aet.cit.tum.de/api/chat/completions',
 'api_id': 1,
 'weight_privacy': 'LOCAL',
 'tags': '#conversational-AI #natural-language-understanding #explanation-generation #general-purpose #chatbot',
 'parallel': 256,
 'description': 'Strong in conversational AI, natural language understanding, and generating detailed explanations',
 'classification_weight': <logos.classification.classification_balancer.Balancer at 0x1bfbe5291f0>,
 'weight_cost': 16,
 'weight_accuracy': -8,
 'weight_quality': -8,
 'weight_latency': 8}

In [None]:
sm.stop()

In [None]:
from test_model_data import test_send_to_webui

In [None]:
response = test_send_to_webui("", prompt, openwebui_key)
print(f"Model xxx sent to Webui:")
try:
    pprint(response)
except:
    pprint(response)

In [None]:
import requests


def chat_with_model(token):
    url = 'https://gpu.aet.cit.tum.de/api/chat/completions'
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json'
    }
    data = {
      "model": "deepseek-r1:70b",
      "messages": [
        {
          "role": "user",
          "content": "Tell me a short joke"
        }
      ]
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()

pprint(chat_with_model(openwebui_key))