In [104]:
import weaviate

def delete_collection(collection_name):
    client = weaviate.connect_to_local()
    """Delete collection if it already exists"""
    try:
        if client.collections.exists(collection_name):
            client.collections.delete(collection_name)
            print(f"Deleted existing collection: {collection_name}")
    except Exception as e:
        print(f"Error deleting collection: {str(e)}")
    
    client.close()

def list_collections():
    client = weaviate.WeaviateClient(
        connection_params=weaviate.connect.ConnectionParams.from_url(
            url="http://localhost:8080",
            grpc_port=50051
        )
    )

    client.connect()

    try:
        # Get all collections
        collections = client.collections.list_all()
        
        print("Available collections:")
        for collection in collections:
            print(f"- {collection}")

    finally:
        client.close()


def get_size_of_collection(collection_name):
    client = weaviate.connect_to_local()
    collection = client.collections.get(collection_name)
    response = collection.aggregate.over_all(total_count=True)
    print(f"{collection_name} collection size is: {response.total_count}")
    client.close()

def print_collection_info(collection_name):
    client = weaviate.connect_to_local()
    """Print detailed information about a collection's configuration"""
    try:
        collection = client.collections.get(collection_name)
        collection_config = collection.config.get()

        print(f"\nPrinting all the config for collection {collection_name} and its keys")
        print(collection_config.__dict__)
        print(type(collection_config))
        print(collection_config.__dict__.keys())
        # Extract properties
        print(f"\nProperties for collection '{collection_name}':")
        for prop in collection_config.properties:
            print(f" - Property: {prop.name}, Data Type: {prop.data_type}")

            if prop.vectorizer_configs:
                for vectorizer, config in prop.vectorizer_configs.items():
                    print(f" Vectorizer: {vectorizer},Skip: {config.skip}, Vectorize Property Name: {config.vectorize_property_name}")

        # Extract weights from vetor_config
        if collection_config.vector_config:
            print("\nVector Configuration")
            for vector_name, vector_config in collection_config.vector_config.items():
                print(f"Vector: {vector_name}")
                if vector_config.vectorizer.model:
                    model_config = vector_config.vectorizer.model
                    print(f"  Image Fields: {model_config.get('imageFields', [])}")
                    print(f"  Text Fields: {model_config.get('textFields', [])}")
                    weights = model_config.get('weights', {})
                    print(f"  Weights:")
                    print(f"    Image Fields: {weights.get('imageFields', [])}")
                    print(f"    Text Fields: {weights.get('textFields', [])}")

    except Exception as e:
        print(f"Error getting collection info: {str(e)}")
    
    client.close()

In [63]:
list_collections()

Available collections:
- Flickr30k_manual_test
- Flickr30k_multi2vec_50_50
- Flickr30k_multi2vec_70_30
- Flickr30k_manual_70_30
- Flickr30k_manual_50_50
- Flickr30k_manual_60_40
- Flickr30k_multi2vec_test
- Flickr30k_multi2vec_60_40


In [64]:
get_size_of_collection("Flickr30k_multi2vec_50_50")

Flickr30k_multi2vec_50_50 collection size is: 598


In [111]:
print_collection_info("Flickr30k_multi2vec_70_30")


Printing all the config for collection Flickr30k_multi2vec_70_30 and its keys
{'name': 'Flickr30k_multi2vec_70_30', 'description': None, 'generative_config': None, 'inverted_index_config': _InvertedIndexConfig(bm25=_BM25Config(b=0.75, k1=1.2), cleanup_interval_seconds=60, index_null_state=False, index_property_length=False, index_timestamps=False, stopwords=_StopwordsConfig(preset=<StopwordsPreset.EN: 'en'>, additions=None, removals=None)), 'multi_tenancy_config': _MultiTenancyConfig(enabled=False, auto_tenant_creation=False, auto_tenant_activation=False), 'properties': [_Property(name='image', description=None, data_type=<DataType.BLOB: 'blob'>, index_filterable=True, index_range_filters=False, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=None, vectorizer=None, vectorizer_configs={'multi2vec-clip': _PropertyVectorizerConfig(skip=False, vectorize_property_name=True)}), _Property(name='image_id', description=None, data_type=<DataType.TEXT: 'text'