In [1]:
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from pathlib import Path
import torch

W0212 07:48:40.691000 1968 site-packages/torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
DATA_PATH = "../data/projects.jsonl"
EMBED_MODEL = "intfloat/multilingual-e5-base"  
# Sehr gut für Deutsch + Englisch

TOP_K = 5   # Wie viele Treffer pro Frage?

def load_data(path):
    documents = []
    metadatas = []

    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            entry = json.loads(line)

            text = entry["text"]
            metadata = entry["metadata"]

            documents.append(text)
            metadatas.append(metadata)

    return documents, metadatas

def chunk_text(text, chunk_size=400, overlap=50):
    words = text.split()
    chunks = []

    for i in range(0, len(words), chunk_size - overlap):
        chunk = words[i:i + chunk_size]
        chunks.append(" ".join(chunk))

    return chunks

def build_index(documents, metadatas):
    model = SentenceTransformer(EMBED_MODEL)

    all_chunks = []
    all_metadata = []

    for doc, meta in zip(documents, metadatas):
        chunks = chunk_text(doc)

        for chunk in chunks:
            combined_text = (
                f"Projekt: {meta['projekt']}\n"
                f"Kategorie: {meta['kategorie']}\n"
                f"Datum: {meta['datum']}\n\n"
                f"{chunk}"
            )
            all_chunks.append(combined_text)
            all_metadata.append(meta)

    embeddings = model.encode(all_chunks, convert_to_numpy=True)

    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    return model, index, all_chunks, all_metadata


def search(query, model, index, chunks, metadata, top_k=TOP_K):
    query_embedding = model.encode([query], convert_to_numpy=True)

    distances, indices = index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        results.append({
            "text": chunks[idx],
            "metadata": metadata[idx]
        })

    return results

In [3]:
print("Lade Daten...")
documents, metadatas = load_data(DATA_PATH)

print("aue Index...")
model, index, chunks, metadata = build_index(documents, metadatas)


Lade Daten...
aue Index...


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

[1mXLMRobertaModel LOAD REPORT[0m from: intfloat/multilingual-e5-base
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [4]:
def ask_question(query, top_k=5):
    results = search(query, model, index, chunks, metadata, top_k=top_k)

    print("\n Top Treffer:\n")
    
    for i, r in enumerate(results):
        print(f"--- Treffer {i+1} ---")
        print(f"Projekt: {r['metadata']['projekt']}")
        print(f"Kategorie: {r['metadata']['kategorie']}")
        print(f"Datum: {r['metadata']['datum']}")
        print(f"\nText:\n{r['text']}\n")
        print("="*70)

    return results

In [5]:
ask_question("Um was gehts bei Projekt Neptune?")




 Top Treffer:

--- Treffer 1 ---
Projekt: Neptune
Kategorie: DevOps
Datum: 2025-02-03

Text:
Projekt: Neptune
Kategorie: DevOps
Datum: 2025-02-03

Entscheidung: Basis-Image aktualisieren und CI-Pipeline standardisieren.

--- Treffer 2 ---
Projekt: Neptune
Kategorie: Softwareentwicklung
Datum: 2025-02-16

Text:
Projekt: Neptune
Kategorie: Softwareentwicklung
Datum: 2025-02-16

Entwicklerperspektive: Integrationstests mussten an neue Node-Version angepasst werden.

--- Treffer 3 ---
Projekt: Neptune
Kategorie: DevOps
Datum: 2025-02-10

Text:
Projekt: Neptune
Kategorie: DevOps
Datum: 2025-02-10

Monitoring mit Prometheus erweitert. Neue Alerts für Memory-Leaks wurden hinzugefügt.

--- Treffer 4 ---
Projekt: Neptune
Kategorie: DevOps
Datum: 2025-02-10

Text:
Projekt: Neptune
Kategorie: DevOps
Datum: 2025-02-10

Monitoring-Ausbau priorisiert, um Memory-Probleme frühzeitig zu erkennen.

--- Treffer 5 ---
Projekt: Neptune
Kategorie: Support
Datum: 2025-02-16

Text:
Projekt: Neptune
Kategorie

[{'text': 'Projekt: Neptune\nKategorie: DevOps\nDatum: 2025-02-03\n\nEntscheidung: Basis-Image aktualisieren und CI-Pipeline standardisieren.',
  'metadata': {'projekt': 'Neptune',
   'kategorie': 'DevOps',
   'datum': '2025-02-03'}},
 {'text': 'Projekt: Neptune\nKategorie: Softwareentwicklung\nDatum: 2025-02-16\n\nEntwicklerperspektive: Integrationstests mussten an neue Node-Version angepasst werden.',
  'metadata': {'projekt': 'Neptune',
   'kategorie': 'Softwareentwicklung',
   'datum': '2025-02-16'}},
 {'text': 'Projekt: Neptune\nKategorie: DevOps\nDatum: 2025-02-10\n\nMonitoring mit Prometheus erweitert. Neue Alerts für Memory-Leaks wurden hinzugefügt.',
  'metadata': {'projekt': 'Neptune',
   'kategorie': 'DevOps',
   'datum': '2025-02-10'}},
 {'text': 'Projekt: Neptune\nKategorie: DevOps\nDatum: 2025-02-10\n\nMonitoring-Ausbau priorisiert, um Memory-Probleme frühzeitig zu erkennen.',
  'metadata': {'projekt': 'Neptune',
   'kategorie': 'DevOps',
   'datum': '2025-02-10'}},
 {'te

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "Qwen/Qwen2.5-3B"

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

llm = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    trust_remote_code=True
)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
llm = llm.to(device)
llm.eval()

Loading weights:   0%|          | 0/434 [00:00<?, ?it/s]

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 2048)
    (layers): ModuleList(
      (0-35): 36 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear(in_features=2048, out_features=256, bias=True)
          (v_proj): Linear(in_features=2048, out_features=256, bias=True)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=2048, out_features=11008, bias=False)
          (up_proj): Linear(in_features=2048, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((2048,), eps=1e-06)
    (ro

In [None]:
import torch

def ask_question(query, top_k=5, max_new_tokens=300):

    # Retrieval
    results = search(query, model, index, chunks, metadata, top_k=top_k)

    if len(results) == 0:
        print("Keine relevanten Dokumente gefunden.")
        return

    # Kontext strukturieren (Qwen mag klare Struktur)
    context_blocks = []
    for i, r in enumerate(results):
        block = (
            f"DOKUMENT [{i+1}]\n"
            f"Projekt: {r['metadata']['projekt']}\n"
            f"Kategorie: {r['metadata']['kategorie']}\n"
            f"Datum: {r['metadata']['datum']}\n"
            f"Inhalt:\n{r['text']}\n"
        )
        context_blocks.append(block)

    full_context = "\n\n".join(context_blocks)

    # Chat-Template nutzen (WICHTIG für Qwen)
    messages = [
        {
            "role": "system",
            "content": (
                "Du bist ein interner Unternehmensassistent. "
                "Beantworte die Frage **vollständig aus dem Kontext**. "
                "Formuliere eine klare, präzise Antwort. "
                "Nutze die Informationen in den Dokumenten. "
                "Wenn Informationen fehlen, sage: 'Die Information ist im Kontext nicht enthalten.'"
            )
        },
        {
            "role": "user",
            "content": f"Hier ist der Kontext:\n\n{full_context}\n\nFrage: {query}\n\nIMPORTANT: Respond in the same language as the question."
        }
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Tokenisieren
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=2048
    )

    # ALLES explizit aufs gleiche Device wie das Modell
    inputs = {k: v.to(llm.device) for k, v in inputs.items()}

    # Generieren (deterministisch für RAG)
    with torch.no_grad():
        outputs = llm.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    # Nur generierten Teil dekodieren
    generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
    answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

    print("\nAntwort:\n")
    print(answer)
    print("\n" + "="*80)
    print("Verwendete Quellen:\n")

    for i, r in enumerate(results):
        print(f"{i+1}. {r['metadata']['projekt']} | {r['metadata']['datum']}")

    return answer


In [8]:
ask_question("Gab es Probleme bei Projekt Neptune?")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



Antwort:

Ja, es gab Probleme bei Projekt Neptune. Die Informationen in den Dokumenten zeigen, dass Probleme intern begrenzt waren, aber Integrationstests mussten an neue Node-Version angepasst werden. Darüber hinaus wurden neue Alerts für Memory-Leaks hinzugefügt, um Memory-Probleme frühzeitig zu erkennen.

Verwendete Quellen:

1. Neptune | 2025-02-16
2. Neptune | 2025-02-16
3. Neptune | 2025-02-10
4. Neptune | 2025-02-16
5. Neptune | 2025-02-10


'Ja, es gab Probleme bei Projekt Neptune. Die Informationen in den Dokumenten zeigen, dass Probleme intern begrenzt waren, aber Integrationstests mussten an neue Node-Version angepasst werden. Darüber hinaus wurden neue Alerts für Memory-Leaks hinzugefügt, um Memory-Probleme frühzeitig zu erkennen.'

In [9]:
ask_question("Worum ging es beim Projekt Helios?")


Antwort:

Das Projekt Helios befasst sich mit der IT-Sicherheit und beinhaltet Updates und Änderungen an TLS-Konfigurationen, Zugriffsrechten für Admin-Rollen und Code- und Config-Änderungen.

Verwendete Quellen:

1. Helios | 2025-02-01
2. Helios | 2025-02-08
3. Helios | 2025-02-18
4. Helios | 2025-03-03
5. Helios | 2025-02-01


'Das Projekt Helios befasst sich mit der IT-Sicherheit und beinhaltet Updates und Änderungen an TLS-Konfigurationen, Zugriffsrechten für Admin-Rollen und Code- und Config-Änderungen.'

In [11]:
ask_question("Haben wir bereits auf OAuth2 umgestellt? Erkläre kurz den Stand der Dinge.")


Antwort:

Ja, wir haben bereits auf OAuth2 umgestellt. Die Migration auf OAuth2 ist abgeschlossen. Alte API-Keys wurden deaktiviert und die Dokumentation final aktualisiert.

Verwendete Quellen:

1. Atlas | 2025-01-12
2. Cipher | 2025-03-10
3. Sentinel | 2025-03-22
4. Sentinel | 2025-03-18
5. Cipher | 2025-04-02


'Ja, wir haben bereits auf OAuth2 umgestellt. Die Migration auf OAuth2 ist abgeschlossen. Alte API-Keys wurden deaktiviert und die Dokumentation final aktualisiert.'

In [31]:
ask_question("Were there problems with Project Neptune?")


Antwort:

Yes, there were problems with Project Neptune. The information in the documents indicates that there were memory issues that were not detected in time, which led to delays in feature releases. Additionally, integration tests had to be updated for a new Node version, and there were CI failures that caused further delays.

Verwendete Quellen:

1. Neptune | 2025-02-16
2. Neptune | 2025-02-10
3. Neptune | 2025-02-16
4. Neptune | 2025-02-16
5. Neptune | 2025-02-10


'Yes, there were problems with Project Neptune. The information in the documents indicates that there were memory issues that were not detected in time, which led to delays in feature releases. Additionally, integration tests had to be updated for a new Node version, and there were CI failures that caused further delays.'

In [17]:
ask_question("What was Project Helios about?")



Antwort:

Project Helios was a project that focused on security measures despite limited resources. The project management perspective emphasized the importance of security measures, while the developer perspective required code and configuration changes to adapt to TLS configurations. The IT security perspective involved defining new admin roles, reducing rights on database and infrastructure levels, and adopting the least-privilege principle as a standard. An internal security scan identified outdated TLS configurations, and an update was planned.

Verwendete Quellen:

1. Helios | 2025-02-18
2. Helios | 2025-02-18
3. Helios | 2025-02-08
4. Helios | 2025-02-08
5. Helios | 2025-02-01


'Project Helios was a project that focused on security measures despite limited resources. The project management perspective emphasized the importance of security measures, while the developer perspective required code and configuration changes to adapt to TLS configurations. The IT security perspective involved defining new admin roles, reducing rights on database and infrastructure levels, and adopting the least-privilege principle as a standard. An internal security scan identified outdated TLS configurations, and an update was planned.'

In [19]:
ask_question("Have we already migrated to OAuth2? Briefly explain the current status.")


Antwort:

Yes, we have already migrated to OAuth2. The current status is that the OAuth2 flow with Authorization Code Grant has been implemented, and the token lifetime and scope handling have been adjusted. The legacy API key validation is still active. A decision has been made to remove old API keys completely by the end of Q1 and to inform customers early. The refresh token expired server-side because the maximum validity period was exceeded. The frontend currently does not handle the error status correctly. It has been decided to extend the error handling in the frontend and to implement automatic re-authentication. The developer perspective: The migration to OAuth2 required adjustments in several services and additional tests for token flows. The DevOps perspective: Secrets management and token configuration must be centralized for OAuth2. The migration to OAuth2 is complete. Old API keys have been deactivated, and the documentation has been finalized.

Verwendete Quellen:

1. At

'Yes, we have already migrated to OAuth2. The current status is that the OAuth2 flow with Authorization Code Grant has been implemented, and the token lifetime and scope handling have been adjusted. The legacy API key validation is still active. A decision has been made to remove old API keys completely by the end of Q1 and to inform customers early. The refresh token expired server-side because the maximum validity period was exceeded. The frontend currently does not handle the error status correctly. It has been decided to extend the error handling in the frontend and to implement automatic re-authentication. The developer perspective: The migration to OAuth2 required adjustments in several services and additional tests for token flows. The DevOps perspective: Secrets management and token configuration must be centralized for OAuth2. The migration to OAuth2 is complete. Old API keys have been deactivated, and the documentation has been finalized.'