In [1]:
import os
from dotenv import load_dotenv

from pymongo import MongoClient
from pymongo.errors import DuplicateKeyError

# Init MongoDB Client
load_dotenv()
mongoClient = MongoClient(os.environ.get('MONGO_URI_PRIVAT'))
database = mongoClient.law_buddy
collection = database.rechtsprechung

In [12]:
def text_search(search_text: str = "*", gen_suchworte: bool = False, score: float = 0.0, gericht_filter: list = [], limit: int = 10) -> (list, str):
    
    # define query ------------------------------------------------
    if search_text == "":
        return [], ""
    if search_text == "*":
        suchworte = "*"
        score = 0.0
        query = {
            "index": "volltext",
            "exists": {"path": "xml_text"},
        }
    else:
        suchworte = generate_query(question=search_text) if gen_suchworte else search_text
        query = {
            "index": "volltext",
            "text": {
                "query": suchworte,
                "path": {"wildcard": "*"}
            }
        }

    # define fields ------------------------------------------------
    fields = {
        "_id": 1,
        "doknr": 1,
        "gericht": 1,
        "entsch_datum": 1,
        "aktenzeichen": 1,
        "xml_text": 1,
        "score": {"$meta": "searchScore"},
    }

    # define pipeline ------------------------------------------------
    pipeline = [
        {"$search": query},
        {"$project": fields},
        {"$match": {"score": {"$gte": score}}},
        {"$sort": {"entsch_datum": -1}},
        {"$limit": limit},
    ]

    # if filter:
    #     pipeline.insert(1, {"$match": {"gericht": {"$in": gericht_filter}}})

    # execute query ------------------------------------------------
    cursor = collection.aggregate(pipeline)
    return list(cursor), suchworte

In [17]:
# such_text = "Was sagt das BAG zur Abmahnung?"
such_text = "'Abmahnung' 'BAG'"
# such_text = "*"


results, suchworte = text_search(
    search_text=such_text, 
    gen_suchworte=False, 
    score=0.0, 
    # gericht_filter=["BVerwG"], 
    limit=10
    )

print(f"Suchworte: {suchworte}")
print(f"Results: {len(results)}")

for item in results:
    print(f"{item['gericht']}, {item['entsch_datum']}, {item['aktenzeichen']}")
  

Suchworte: 'Abmahnung' 'BAG'
Results: 10
BAG, 20241024, 2 AZN 608/24
BGH, 20241023, I ZR 112/23
BAG, 20241021, 8 AZB 10/24
BGH, 20241010, I ZR 108/22
BGH, 20241008, VI ZR 250/22
BGH, 20240926, I ZR 142/23
BAG, 20240919, 8 AZR 368/22
BGH, 20240918, IV ZR 436/22
BGH, 20240917, X ZR 39/23
BGH, 20240917, EnZR 57/23
