In [12]:
# Installing the related packages!

%pip install --upgrade pymilvus langchain openai tiktoken GitPython python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [2]:
import dotenv

# loading the environment variables of OpenAI, your token and uri for Milvus db!
dotenv.load_dotenv()

True

In [3]:
from git import Repo
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language

In [4]:
# Clone the pymilvus repository!
repo_path = "/Users/mertbozkir/Code/advent-of-code-submissions-2023/pymilvus"
repo = Repo.clone_from("https://github.com/milvus-io/pymilvus", to_path=repo_path)

In [5]:
# Turning the .py code files into the documents thanks to langchain loader!
loader = GenericLoader.from_filesystem(
    repo_path + "/pymilvus/",
    glob="**/*",
    suffixes=[".py"],
    parser=LanguageParser(language=Language.PYTHON),
)
documents = loader.load()
len(documents)

325

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# and then we're leveraging Python splitter of langchain to split those python files proper way...

python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
len(texts)

694

In [8]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.milvus import Milvus
import os


# vector database initiation of Milvus! 

vector_db = Milvus.from_documents(
    texts,
    OpenAIEmbeddings(),
    connection_args = {
                "uri": os.environ["MILVUS_URI"],
                "token": os.environ["MILVUS_TOKEN"], # API key as replacements for user and password
                "secure": True
            },
)

# Turning your Milvus vector database into retriever never get easier! 🏔️

retriever = vector_db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 8},
)

In [9]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory

# Chaining everything (memory, llm and retriever) and you're good to go!

llm = ChatOpenAI(model_name="gpt-3.5-turbo")
memory = ConversationSummaryMemory(
    llm=llm, memory_key="chat_history", return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [10]:
question = "How can I initialize a Milvus project in Python?"


# Querying your own question and BAMM! 


result = qa(question) 
result["answer"]

'To initialize a Milvus project in Python, you can use the `MilvusClient` class from the `pymilvus` library. Here\'s an example of how to initialize a Milvus project:\n\n```python\nfrom pymilvus import MilvusClient\n\n# Initialize a Milvus client\nclient = MilvusClient(uri="http://localhost:19530", user="", password="", db_name="", token="", timeout=None)\n\n# Create a collection\nclient.create_collection(collection_name="my_collection", dimension=128, primary_field_name="id", id_type="int", vector_field_name="vector", metric_type="IP")\n```\n\nIn the example above, the `MilvusClient` is initialized with the following parameters:\n- `uri`: The connection address to the Milvus instance. By default, it is set to "http://localhost:19530".\n- `user`: The username for authentication. If not provided, it can be left empty.\n- `password`: The password for authentication. If not provided, it can be left empty.\n- `db_name`: The name of the database to use. If not provided, it can be left empty

In [13]:
# Some answers are below! 🏄‍♂️

## More halucinated answer! 

To initialize a Milvus project in Python, you can use the `MilvusClient` class from the `pymilvus` library. 

Here\'s an example of how to initialize a Milvus project:

```python from pymilvus import MilvusClient

# Initialize a Milvus client

client = MilvusClient(uri="http://localhost:19530", user="", password="", db_name="", token="", timeout=None)

# Create a collection
 
client.create_collection(collection_name="my_collection", dimension=128, primary_field_name="id", id_type="int", vector_field_name="vector", metric_type="IP"```

In the example above, the `MilvusClient` is initialized with the following parameters:

- `uri`: The connection address to the Milvus instance. By default, it is set to "http://localhost:19530".
- `user`: The username for authentication. If not provided, it can be left empty.
- `password`: The password for authentication. If not provided, it can be left empty.
- `db_name`: The name of the database to use. If not provided, it can be left empty.
- `token`: An authentication token. If not provided, it can be left empty.
- `timeout`: The timeout for function calls. If not provided, it is set to `None`.

The `create_collection` method is used to create a collection in the Milvus project. It takes the following parameters:
- `collection_name`: The name of the collection to create.
- `dimension`: The dimension of the vector data to be stored in the collection.
- `primary_field_name`: The name of the primary key field. By default, it is set to "id".
- `id_type`: The type of the primary key field. It can be either "int" or "string".
- `vector_field_name`: The name of the vector field. By default, it is set to "vector".
- `metric_type`: The metric type to be used for similarity search. By default, it is set to "IP".
- `auto_id`: Whether to automatically generate IDs for inserted data. By default, it is set to False.
- `timeout`: The timeout for the create collection operation. If not provided, it can be left empty.

You can customize these parameters according to your project requirements.'

---

## And this one is sounds more accurate imo

'The initialization of a Milvus project in Python can be done by creating an instance of the `MilvusClient` class. 

Here is an example:

```py
python from pymilvus import MilvusClient

# Initialize the Milvus client

client = MilvusClient(uri="http://localhost:19530")

# Use the client to perform operations on the Milvus project

# For example, create a collection

client.create_collection(collection_name="my_collection", dimension=128)```

In the example above, the `uri` parameter is used to specify the connection address to the Milvus instance. By default, it is set to "http://localhost:19530". You can change it to the appropriate address for your Milvus project. After initializing the client, you can use it to perform various operations on the Milvus project, such as creating collections, inserting data, and querying data.'

# Don't forget to give a star to those cool Milvus guys here: https://github.com/milvus-io/milvus