In [2]:
import pandas as pd
import os

from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings

## eigenes chunken

In [3]:
df=pd.read_csv("input_data/components 1.txt", encoding="utf-8")

In [15]:
for name in df['Ebene-3'].dropna().unique():
    filtered_df = df[df['Ebene-3'] == name]
    
    # Dateinamen bereinigen (z. B. keine Schrägstriche oder Leerzeichen)
    safe_name = name.replace("/", "_").replace(" ", "_")
    
    # Exportieren als TXT
    filtered_df.to_csv(f"indexed_data/{safe_name}.txt", sep=",", index=False)

## Verbindung herstellen

In [11]:
# Option 2: use an Azure OpenAI account with a deployment of an embedding model
azure_openai_api_key=os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") 
azure_openai_api_version: str = "2023-05-15"
azure_deployment: str = "embedding"

In [12]:
vector_store_address: str =os.getenv("YOUR_AZURE_SEARCH_ENDPOINT")
vector_store_password: str = os.getenv("YOUR_AZURE_SEARCH_ADMIN_KEY")

In [13]:
# Option 2: Use AzureOpenAIEmbeddings with an Azure account
embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_deployment,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_endpoint,
    api_key=azure_openai_api_key,
)

## Embeddings erzeugen und eine Vektorstore Instanz!

In [14]:
index_name: str = "components-index"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [None]:
from langchain_community.document_loaders import TextLoader


In [18]:

data_folder = "indexed_data"

# Alle TXT-Dateien im Ordner laden
documents = []
for filename in os.listdir(data_folder):
    if filename.endswith(".txt"):
        file_path = os.path.join(data_folder, filename)
        loader = TextLoader(file_path, encoding="utf-8")
        docs = loader.load()
        documents.extend(docs)

# Direkt in die Vektordatenbank einfügen – ohne weiteres Chunking
vector_store.add_documents(documents=documents)

['ZjA4MGNkMDUtOWY1Mi00NGQ3LWI5ZDMtNmY2YTQ1NTJlYjU3',
 'MDg2MzIzOTEtYWE0My00YTNkLWFjNWQtODI1YTcxN2EyMDdi',
 'ZTI4MDk1OTAtZGNjYi00NDFmLThjZTQtYTA0MDIzYzM5ZTYx',
 'ZjUyZTE1MjYtNDA5ZS00NDAyLThhNWEtMWUxNDllMDA0OTJm',
 'N2I1MDU5OWMtNGU2Yy00YjVlLWEzMzgtN2EwNDVhZjhmNWVl',
 'OWMzYTM0ZTktMGRhMC00YjZhLThkMmQtMmVmYmFmNGZhYjRk',
 'YWUzMWQzMTMtMTQ5Yi00NDQ3LWFjMDItZGY0M2RkZGZiOGUw',
 'MTk2ODVkMmQtMTI3MC00M2RjLWI0NmUtOGMxNDNjYWNjNGUy',
 'NzA3YjVmZjYtODJlYy00YTQyLWJkOTMtNGFkMTBlYTcxODA4',
 'ZTJlMjI5MDgtMWY0My00NTRiLWExZTQtMDRhNjY4OTUzNmUy',
 'ODAwMTQwNjItYjlkOC00NjdjLWFlNzgtMmYyNmFjMzJhYjE3',
 'OTFiZTBjNDctN2NlYi00N2U4LTk1NmYtZTExNDc3MWY1MDll',
 'OWNkZmJlZDktMmI3Ni00MDI4LTgwY2QtYjZiOGMyY2Y4MTVm',
 'MTEwMjM0ZDYtY2ZjZS00NTdiLWExZTQtYWUxNTY3Njg3ZjFk',
 'ODA4ZWJiNDgtODY5Ni00MjU5LWFhMWUtMzI5NWFiNmY2NGFi',
 'MzI3NDVkZWEtNTYyOS00MzIwLTk4MzYtZDllMWY1ZWU0ZmE5',
 'MTQwZjI2ZDktODRlNy00ODhjLWI0YTktZWQ1ODRmY2E2MWQw',
 'Mzk5NTRmNWUtZDkyOS00YWY4LTkxNWQtYzNkOTNjNjhjNDk0',
 'MDlkZmJlODEtY2QzMS00NDg0LWE1MGEtNTZkZDdlMmY0

### Testen des Index

In [19]:
docs = vector_store.similarity_search(
    query="Scheinwerfer finden",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

Ebene-1,Ebene-2,Ebene-3,SNS_number,Komponente
Bodysystem,Beleuchtungssystem,Außenlichtsystem,94f36c4a-2bad-48ef-817a-cdd88abbaf50,LED-Modul
Bodysystem,Beleuchtungssystem,Außenlichtsystem,15206204-5d75-4417-a13c-cda18fa722a6,Scheinwerfergehäuse
Bodysystem,Beleuchtungssystem,Außenlichtsystem,9ac635ff-d737-4191-a3e1-124873207fa9,Reflektorlinse
Bodysystem,Beleuchtungssystem,Außenlichtsystem,13b3c0b4-b02d-42b0-a505-246126c283fe,Projektionslinse
Bodysystem,Beleuchtungssystem,Außenlichtsystem,4fa22ce8-bf30-45c8-99b4-cd28cc509259,Leuchtmittelhalterung
Bodysystem,Beleuchtungssystem,Außenlichtsystem,109e6ca3-897c-4f7f-985b-81fe7029ba12,Kühlkörper
Bodysystem,Beleuchtungssystem,Außenlichtsystem,49eaed10-27d1-453c-ae33-cc4ffd332a1f,Steckverbinder
Bodysystem,Beleuchtungssystem,Außenlichtsystem,5fc9f96f-77c1-4144-9ce6-a7f33bbd2aca,Kabelbaum
Bodysystem,Beleuchtungssystem,Außenlichtsystem,3a5c5287-b726-4443-ac2f-97d30b725333,Spannungswandler
Bodysystem,Beleuchtungssystem,Außenlichtsystem,e5ee980c-ce75-

In [20]:
docs_and_scores = vector_store.similarity_search_with_relevance_scores(
    query="What did the president say about Ketanji Brown Jackson",
    k=4,
    score_threshold=0.80,
)
from pprint import pprint

pprint(docs_and_scores)

[]


In [None]:
docs_and_scores = vector_store.similarity_search_with_relevance_scores(
    query="What did the president say about Ketanji Brown Jackson",
    k=4,
    score_threshold=0.80,
)
from pprint import pprint

pprint(docs_and_scores)