**Test to see if all network components are working as expected**

In [1]:
import os
import json
import weaviate
import requests
import weaviate
import pandas as pd
import weaviate.classes as wvc

from tqdm import tqdm

In [2]:
client = weaviate.connect_to_local(
    host='host.docker.internal',
    port='8080'
)

print(client.is_ready())

True


**Checking the metadata to see if all modules are enabled**

In [3]:
meta_info = client.get_meta()
print(meta_info)

{'grpcMaxMessageSize': 10485760, 'hostname': 'http://[::]:8080', 'modules': {'text2vec-transformers': {'model': {'_name_or_path': './models/model', 'add_cross_attention': False, 'architectures': ['BertModel'], 'attention_probs_dropout_prob': 0.1, 'bad_words_ids': None, 'begin_suppress_tokens': None, 'bos_token_id': None, 'chunk_size_feed_forward': 0, 'classifier_dropout': None, 'cross_attention_hidden_size': None, 'decoder_start_token_id': None, 'diversity_penalty': 0, 'do_sample': False, 'early_stopping': False, 'encoder_no_repeat_ngram_size': 0, 'eos_token_id': None, 'exponential_decay_length_penalty': None, 'finetuning_task': None, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'gradient_checkpointing': False, 'hidden_act': 'gelu', 'hidden_dropout_prob': 0.1, 'hidden_size': 384, 'id2label': {'0': 'LABEL_0', '1': 'LABEL_1'}, 'initializer_range': 0.02, 'intermediate_size': 1536, 'is_decoder': False, 'is_encoder_decoder': False, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'lay

**Now we are gonna create our test collection**

In [6]:
questions = client.collections.create(
    name="Questions",
    vectorizer_config=[wvc.config.Configure.NamedVectors.text2vec_transformers(name="category_vector",source_properties=["category"])],
    properties=[
        wvc.config.Property(
            name="question",
            data_type=wvc.config.DataType.TEXT,
        ),
        wvc.config.Property(
            name="answer",
            data_type=wvc.config.DataType.TEXT,
        ),
        wvc.config.Property(
            name="category",
            data_type=wvc.config.DataType.TEXT,
        )
    ]
)

print(questions.config.get(simple=False))

_CollectionConfig(name='Questions', description=None, generative_config=None, inverted_index_config=_InvertedIndexConfig(bm25=_BM25Config(b=0.75, k1=1.2), cleanup_interval_seconds=60, index_null_state=False, index_property_length=False, index_timestamps=False, stopwords=_StopwordsConfig(preset=<StopwordsPreset.EN: 'en'>, additions=None, removals=None)), multi_tenancy_config=_MultiTenancyConfig(enabled=False, auto_tenant_creation=False, auto_tenant_activation=False), properties=[_Property(name='question', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer='none'), _Property(name='answer', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer='n

**Time to import some test data**

In [7]:
resp = requests.get(
    "https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json"
)
df = pd.DataFrame(resp.json())

In [8]:
df.head()

Unnamed: 0,Category,Question,Answer
0,SCIENCE,This organ removes excess glucose from the blo...,Liver
1,ANIMALS,It's the only living mammal in the order Probo...,Elephant
2,ANIMALS,The gavial looks very much like a crocodile ex...,the nose or snout
3,ANIMALS,"Weighing around a ton, the eland is the larges...",Antelope
4,ANIMALS,Heaviest of all poisonous snakes is this North...,the diamondback rattler


**https://weaviate.io/developers/academy/py/starter_text_data/text_collections/import_data**

In [9]:
questions = client.collections.get("Questions")

with questions.batch.dynamic() as batch:
    for i, question in tqdm(df.iterrows()):
        question_obj={
            "question": question["Question"],
            "answer": question["Answer"],
            "category": question["Category"]
        }
        batch.add_object(properties = question_obj)

if len(questions.batch.failed_objects) > 0:
    print(f"Failed to import {len(questions.batch.failed_objects)} objects")

10it [00:00, 11422.40it/s]


**Let's do some searches**

**https://weaviate.io/developers/academy/py/starter_text_data/text_searches/semantic**

In [10]:
response = questions.query.near_text(
    query="heavy animal", limit=5, return_metadata=wvc.query.MetadataQuery(distance=True)
)

for o in response.objects:
    print(o.properties["category"], o.properties["question"], o.properties["answer"])
    print(f"Distance to query: {o.metadata.distance:.3f}\n")

ANIMALS The gavial looks very much like a crocodile except for this bodily feature the nose or snout
Distance to query: 0.465

ANIMALS It's the only living mammal in the order Proboseidea Elephant
Distance to query: 0.465

ANIMALS Weighing around a ton, the eland is the largest species of this animal in Africa Antelope
Distance to query: 0.465

ANIMALS Heaviest of all poisonous snakes is this North American rattlesnake the diamondback rattler
Distance to query: 0.465

SCIENCE A metal that is ductile can be pulled into this while cold & under pressure wire
Distance to query: 0.889



**https://weaviate.io/developers/academy/py/starter_text_data/text_searches/keyword_hybrid**

In [11]:
response = questions.query.bm25(
    query="heavy animal", limit=5, return_metadata=wvc.query.MetadataQuery(score=True)
)

for o in response.objects:
    print(o.properties["category"], o.properties["question"], o.properties["answer"])
    print(f"BM25 score: {o.metadata.score:.3f}\n")

ANIMALS Weighing around a ton, the eland is the largest species of this animal in Africa Antelope
BM25 score: 0.544



**https://weaviate.io/developers/academy/py/starter_text_data/text_searches/filters** <br />
**https://weaviate.io/developers/weaviate/search/filters**

In [13]:
response = questions.query.near_text(
    query="heavy animal", limit=5, return_metadata=wvc.query.MetadataQuery(distance=True), filters = wvc.query.Filter.by_property("category").equal("Animals")
)

for o in response.objects:
    print(o.properties["category"], o.properties["question"], o.properties["answer"])
    print(f"Distance to query: {o.metadata.distance:.3f}\n")

ANIMALS Heaviest of all poisonous snakes is this North American rattlesnake the diamondback rattler
Distance to query: 0.465

ANIMALS Weighing around a ton, the eland is the largest species of this animal in Africa Antelope
Distance to query: 0.465

ANIMALS The gavial looks very much like a crocodile except for this bodily feature the nose or snout
Distance to query: 0.465

ANIMALS It's the only living mammal in the order Proboseidea Elephant
Distance to query: 0.465



**And finally we are gonna delete this collection**

In [14]:
client.collections.delete("Questions")