# VectrixDB Loading Data

Different ways to load data into VectrixDB: strings, lists, DataFrames, and files.

## From Strings and Lists

In [None]:
from vectrixdb import Vectrix

db = Vectrix("data_test", tier="hybrid", language="en")

# Add single string
db.add("This is a single document.")

# Add list of strings
db.add([
    "First document in the list.",
    "Second document in the list.",
    "Third document in the list."
])

print(f"Total documents: {len(db)}")

## With Metadata

In [2]:
# Add with metadata
db.add(
    texts=[
        "Python is great for data science.",
        "JavaScript powers the web.",
        "Rust is known for memory safety."
    ],
    metadata=[
        {"language": "Python", "category": "backend", "year": 1991},
        {"language": "JavaScript", "category": "frontend", "year": 1995},
        {"language": "Rust", "category": "systems", "year": 2010}
    ]
)

print(f"Total documents: {len(db)}")

Total documents: 7


## Filter by Metadata

In [3]:
# Search with filter
results = db.search("programming language", filter={"category": "backend"})
print("Backend languages:")
for r in results:
    print(f"  {r.text} - {r.metadata}")

Backend languages:
  Python is great for data science. - {'language': 'Python', 'category': 'backend', 'year': 1991}


## From Pandas DataFrame

In [4]:
import pandas as pd

# Create sample DataFrame
df = pd.DataFrame({
    "title": ["AI Basics", "ML Guide", "Deep Learning"],
    "content": [
        "Artificial intelligence is transforming industries.",
        "Machine learning automates pattern recognition.",
        "Deep learning excels at complex pattern detection."
    ],
    "author": ["Alice", "Bob", "Charlie"],
    "views": [1000, 2500, 1800]
})

print(df)

           title                                            content   author  \
0      AI Basics  Artificial intelligence is transforming indust...    Alice   
1       ML Guide    Machine learning automates pattern recognition.      Bob   
2  Deep Learning  Deep learning excels at complex pattern detect...  Charlie   

   views  
0   1000  
1   2500  
2   1800  


In [5]:
# Load DataFrame into VectrixDB
db2 = Vectrix("df_test", tier="hybrid")

# Use 'content' column as text, rest as metadata
db2.add(
    texts=df["content"].tolist(),
    metadata=df[["title", "author", "views"]].to_dict(orient="records")
)

# Search
results = db2.search("pattern recognition")
print("Search results:")
for r in results:
    print(f"  Title: {r.metadata.get('title')} by {r.metadata.get('author')}")
    print(f"  Text: {r.text}")
    print()

Search results:
  Title: ML Guide by Bob
  Text: Machine learning automates pattern recognition.

  Title: Deep Learning by Charlie
  Text: Deep learning excels at complex pattern detection.

  Title: AI Basics by Alice
  Text: Artificial intelligence is transforming industries.



## With Custom IDs

In [6]:
db3 = Vectrix("id_test", tier="hybrid")

# Add with custom IDs
db3.add(
    texts=["Document A", "Document B", "Document C"],
    ids=["doc-001", "doc-002", "doc-003"]
)

# Search returns the IDs
results = db3.search("document")
for r in results:
    print(f"  ID: {r.id} | Text: {r.text}")

  ID: doc-003 | Text: Document C
  ID: doc-001 | Text: Document A
  ID: doc-002 | Text: Document B


## Batch Loading

In [7]:
# For large datasets, add in batches
db4 = Vectrix("batch_test", tier="hybrid")

large_dataset = [f"Document number {i} with some content." for i in range(100)]

# Add in batches of 20
batch_size = 20
for i in range(0, len(large_dataset), batch_size):
    batch = large_dataset[i:i+batch_size]
    db4.add(batch)
    print(f"Added batch {i//batch_size + 1}: {len(batch)} documents")

print(f"\nTotal documents: {len(db4)}")

Added batch 1: 20 documents


Added batch 2: 20 documents


Added batch 3: 20 documents


Added batch 4: 20 documents


Added batch 5: 20 documents

Total documents: 100


## Cleanup

In [8]:
import shutil, os

for folder in ["data_test", "df_test", "id_test", "batch_test"]:
    if os.path.exists(folder):
        shutil.rmtree(folder)
        print(f"Deleted {folder}")