In [1]:
from logos.classification.classification_manager import ClassificationManager
from logos.classification.model_handler import ModelHandler

import time

In [2]:
from logos.classification.classification_balancer import Balancer

models = [
        {"id": 0,
         "name": "azure-gpt-4-omni",
         "endpoint": "/gpt-4o/chat/completions?api-version=2024-08-01-preview",
         "api_id": 0,
         "weight_privacy": "CLOUD_NOT_IN_EU_BY_US_PROVIDER",
         "tags": "#math;#chat",
         "parallel": 256,
         "description": "reasoning, advanced maths, coding",
         "classification_weight": Balancer(),
         },
        {"id": 1,
         "name": "o3-mini",
         "endpoint": "/o3-mini/chat/completions?api-version=2024-12-01-preview",
         "api_id": 1,
         "weight_privacy": "CLOUD_NOT_IN_EU_BY_US_PROVIDER",
         "tags": "#chat;#coding;#empathy;#life;#poetry;#geography",
         "parallel": 256,
         "description": "chat, question answering, writing, coding",
         "classification_weight": Balancer(),
         },
    ]


In [3]:
s = time.time()
cost = ModelHandler(list())
cost.add_model(None, 0)
cost.add_model(None, 1)

accuracy = ModelHandler(list())
accuracy.add_model(None, 0)
accuracy.add_model(None, 1)

quality = ModelHandler(list())
quality.add_model(None, 0)
quality.add_model(None, 1)

latency = ModelHandler(list())
latency.add_model(None, 0)
latency.add_model(0, 1)
print("Model Handler Started: {:.2f}ms".format((time.time() - s) * 1000))

print("Cost", cost.get_models())
print("Accuracy", accuracy.get_models())
print("Quality", quality.get_models())
print("Latency", latency.get_models())

Model Handler Started: 0.00ms
Cost [(-4, 1), (4, 0)]
Accuracy [(-4, 1), (4, 0)]
Quality [(-4, 1), (4, 0)]
Latency [(-4, 0), (4, 1)]


In [4]:
for v, i in cost.get_models():
    models[i]["weight_cost"] = v
for v, i in accuracy.get_models():
    models[i]["weight_accuracy"] = v
for v, i in quality.get_models():
    models[i]["weight_quality"] = v
for v, i in latency.get_models():
    models[i]["weight_latency"] = v

In [5]:
policy = {
    "id": 0,
    "name": "lax_all",
    "entity_id": 0,
    "description": "Somehow all LLMs that come into mind",
    "threshold_privacy": "CLOUD_NOT_IN_EU_BY_US_PROVIDER",
    "threshold_latency": 4,
    "threshold_accuracy": 4,
    "threshold_cost": -4,
    "threshold_quality": 4,
    "priority": 255,
    "topic": "",
}

In [6]:
s = time.time()
classifier = ClassificationManager(models)
print("Classification Manager started: {:.2f}ms".format((time.time() - s) * 1000))

Classification Manager started: 3521.20ms


In [9]:
s = time.time()
prompt = "Tell me a fun fact about the roman empire"
results = classifier.classify(prompt, policy)
print("Classification finished in: {:.2f}ms".format((time.time() - s) * 1000))

Latency weight for model 0 is: 0.16798161486607552
Latency weight for model 1 is: 0.5
Accuracy weight for model 0 is: 0.5
Accuracy weight for model 1 is: 0.16798161486607552
Quality weight for model 0 is: 0.5
Quality weight for model 1 is: 0.16798161486607552
Token weight for model 0 is: 0.0
Token weight for model 1 is: 0.0
tensor([-3.5556e-02,  7.1006e-03,  7.3745e-02,  2.7595e-02, -8.9129e-02,
        -2.7758e-02,  2.4404e-02,  3.9917e-03, -1.3082e-01,  1.6316e-02,
         7.5010e-02, -9.3414e-02, -1.3835e-02, -3.8455e-02, -1.2535e-01,
        -1.6019e-01,  2.7984e-02,  2.6003e-02,  2.9090e-02,  1.1504e-02,
         7.7546e-02, -7.6757e-02,  6.1760e-02, -2.6348e-02,  3.6615e-02,
         2.0043e-02,  4.5466e-03, -1.1553e-02,  4.9050e-02, -6.6610e-03,
         5.8409e-03,  5.3251e-02,  5.5141e-02, -6.8713e-02,  8.2041e-03,
        -6.2590e-02,  9.1526e-02, -4.7934e-04,  1.0106e-01, -1.2960e-02,
         2.2962e-02,  1.7249e-02,  8.6478e-02,  8.9021e-02, -1.1831e-02,
        -4.2556e-

In [10]:
for result in results:
    print(f"Model {models[result[0]]["name"]} got weight {result[1]}")

Model azure-gpt-4-omni got weight 1.329666157760439
Model o3-mini got weight 1.1009923620983315
