In [1]:
import sys
import os
# adapt depending on your project structure
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
# Import the config classes
from aeat_code_rag_backend.env import MilvusConfig, EmbeddingsConfig

# Create config instances (will load from .env automatically)
milvus_config = MilvusConfig()
embeddings_config = EmbeddingsConfig()

from aeat_code_rag_backend.connectors.MilvusClient import MilvusClient

milvus_client = MilvusClient(config=milvus_config, embeddingsconfig=embeddings_config)

2025-04-04 23:32:01,123 - INFO - app_logger - Starting clients preemptively
2025-04-04 23:32:02,430 - DEBUG - watsonx_logger - WatsonX Client successfully initialized.
2025-04-04 23:32:03,662 - DEBUG - milvus_logger - Milvus Client successfully initialized.
2025-04-04 23:32:04,193 - INFO - app_logger - All clients started correctly
2025-04-04 23:32:06,459 - DEBUG - milvus_logger - Milvus Client successfully initialized.


In [15]:
milvus_client.list_collections()

2025-04-04 23:33:06,115 - DEBUG - milvus_logger - Found 1 collections


['programming_languages']

In [17]:
# Collection name
collection_name = "programming_languages"

In [None]:
# milvus_client.delete_collection('collection_name')

In [4]:
milvus_client.get_collection_stats(collection_name)

{'collection_name': 'programming_languages',
 'auto_id': True,
 'num_shards': 1,
 'description': '',
 'fields': [{'field_id': 100,
   'name': 'id',
   'description': '',
   'type': <DataType.INT64: 5>,
   'params': {},
   'auto_id': True,
   'is_primary': True},
  {'field_id': 101,
   'name': 'text',
   'description': '',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 1000, 'enable_analyzer': 'true'}},
  {'field_id': 102,
   'name': 'sparse',
   'description': '',
   'type': <DataType.SPARSE_FLOAT_VECTOR: 104>,
   'params': {},
   'is_function_output': True},
  {'field_id': 103,
   'name': 'dense',
   'description': '',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 1024}}],
 'functions': [{'name': 'text_bm25_emb',
   'id': 100,
   'description': '',
   'type': <FunctionType.BM25: 1>,
   'params': {},
   'input_field_names': ['text'],
   'input_field_ids': [101],
   'output_field_names': ['sparse'],
   'output_field_ids': [102]}],
 'aliases': [],
 'collec

In [4]:
# Sample data
sample_documents = [
    {
        "text": "Python is a high-level, interpreted programming language known for its readability and versatility. It's widely used in data science, AI, and web development."
    },
    {
        "text": "JavaScript is a scripting language that enables interactive web pages and is an essential part of web development. It works alongside HTML and CSS."
    },
    {
        "text": "Java is a class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible."
    },
    {
        "text": "C# is a general-purpose, multi-paradigm programming language developed by Microsoft. It's widely used for developing Windows applications and games with Unity."
    },
    {
        "text": "Swift is a powerful programming language developed by Apple for iOS, macOS, watchOS, and tvOS app development."
    }
]

In [5]:
# Create collection and insert data
result = milvus_client.create_and_load_collection(collection_name, sample_documents)
print(f"Created collection and inserted data: {result}")

2025-04-04 23:20:58,096 - INFO - milvus_logger - Collection programming_languages already exists, dropping it
2025-04-04 23:21:00,288 - DEBUG - milvus_logger - Created collection: programming_languages
2025-04-04 23:21:02,554 - DEBUG - milvus_logger - Prepared 5 documents with embeddings
2025-04-04 23:21:02,694 - DEBUG - milvus_logger - Inserted 5 documents into programming_languages
Created collection and inserted data: {'insert_count': 5, 'ids': [457077134854783276, 457077134854783277, 457077134854783278, 457077134854783279, 457077134854783280]}


In [4]:
query_text = "programming language with iOS development"
dense_results = milvus_client.sparse_search(collection_name, query_text, limit=5)
for result in dense_results:
    print(result['entity']['text'])
    print(f"ID: {result['id']}")
    print(f"Distance: {result['distance']}")
    print("--------------------------------")

Swift is a powerful programming language developed by Apple for iOS, macOS, watchOS, and tvOS app development.
ID: 457077134854783280
Distance: 2.535745859146118
--------------------------------
C# is a general-purpose, multi-paradigm programming language developed by Microsoft. It's widely used for developing Windows applications and games with Unity.
ID: 457077134854783279
Distance: 1.6978446245193481
--------------------------------
Python is a high-level, interpreted programming language known for its readability and versatility. It's widely used in data science, AI, and web development.
ID: 457077134854783276
Distance: 0.8654128313064575
--------------------------------
JavaScript is a scripting language that enables interactive web pages and is an essential part of web development. It works alongside HTML and CSS.
ID: 457077134854783277
Distance: 0.6035613417625427
--------------------------------
Java is a class-based, object-oriented programming language that is designed to hav

In [16]:
query_text = "programming language with iOS development"
dense_results = milvus_client.dense_search(collection_name, query_text, limit=5)
for result in dense_results:
    print(result['entity']['text'])
    print(f"ID: {result['id']}")
    print(f"Distance: {result['distance']}")
    print("--------------------------------")

Swift is a powerful programming language developed by Apple for iOS, macOS, watchOS, and tvOS app development.
ID: 457077134854783280
Distance: 0.8745343685150146
--------------------------------
JavaScript is a scripting language that enables interactive web pages and is an essential part of web development. It works alongside HTML and CSS.
ID: 457077134854783277
Distance: 0.8248165845870972
--------------------------------
C# is a general-purpose, multi-paradigm programming language developed by Microsoft. It's widely used for developing Windows applications and games with Unity.
ID: 457077134854783279
Distance: 0.821643590927124
--------------------------------
Java is a class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible.
ID: 457077134854783278
Distance: 0.8170634508132935
--------------------------------
Python is a high-level, interpreted programming language known for its readability and versatility. It's wide

In [18]:
query_text = "programming language for web development"
dense_results = milvus_client.hybrid_search(collection_name, query_text, sparse_weight=0.3, dense_weight=0.7)
for result in dense_results:
    print(result['entity']['text'])
    print(f"ID: {result['id']}")
    print(f"Distance: {result['distance']}")
    print("--------------------------------")

2025-04-04 23:26:19,404 - DEBUG - milvus_logger - Using WeightedRanker with weights 0.3 and 0.7
Python is a high-level, interpreted programming language known for its readability and versatility. It's widely used in data science, AI, and web development.
ID: 457077134854783276
Distance: 0.7242634892463684
--------------------------------
JavaScript is a scripting language that enables interactive web pages and is an essential part of web development. It works alongside HTML and CSS.
ID: 457077134854783277
Distance: 0.7108761072158813
--------------------------------
Swift is a powerful programming language developed by Apple for iOS, macOS, watchOS, and tvOS app development.
ID: 457077134854783280
Distance: 0.6993676424026489
--------------------------------
C# is a general-purpose, multi-paradigm programming language developed by Microsoft. It's widely used for developing Windows applications and games with Unity.
ID: 457077134854783279
Distance: 0.6399834156036377
-------------------

In [18]:
query_text = "programming language for web development"
dense_results = milvus_client.hybrid_search(collection_name, query_text, ranker_type="rrf", k=100)
for result in dense_results:
    print(result['entity']['text'])
    print(f"ID: {result['id']}")
    print(f"Distance: {result['distance']}")
    print("--------------------------------")

2025-04-04 23:34:31,434 - DEBUG - milvus_logger - Using RRFRanker with k=100
JavaScript is a scripting language that enables interactive web pages and is an essential part of web development. It works alongside HTML and CSS.
ID: 457077134854783277
Distance: 0.0197049118578434
--------------------------------
Python is a high-level, interpreted programming language known for its readability and versatility. It's widely used in data science, AI, and web development.
ID: 457077134854783276
Distance: 0.0196097269654274
--------------------------------
Swift is a powerful programming language developed by Apple for iOS, macOS, watchOS, and tvOS app development.
ID: 457077134854783280
Distance: 0.019512660801410675
--------------------------------
Java is a class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible.
ID: 457077134854783278
Distance: 0.01913919486105442
--------------------------------
C# is a general-purpose, mult