Install the Weaviate-Client library

In [1]:
!pip install -U weaviate-client



DEPRECATION: torchsde 0.2.5 has a non-standard dependency specifier numpy>=1.19.*; python_version >= "3.7". pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of torchsde or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


# Import the Schema Classes for Article & Author

## Setup of Schema and Data

This cell does the following:
- initializes the client
- defines the schema using json
- creates the objects using json

In [15]:
import weaviate
client = weaviate.Client("http://localhost:8080")

schema = {
  "classes": [
    {
      "class": "Article",
      "description": "A class to store articles",
      "properties": [
        {"name": "title", "dataType": ["string"], "description": "The title of the article"},
        {"name": "content", "dataType": ["text"], "description": "The content of the article"},
        {"name": "datePublished", "dataType": ["date"], "description": "The date the article was published"},
        {"name": "url", "dataType": ["string"], "description": "The URL of the article"}
      ]
    },
    {
      "class": "Author",
      "description": "A class to store authors",
      "properties": [
        {"name": "name", "dataType": ["string"], "description": "The name of the author"},
        {"name": "articles", "dataType": ["Article"], "description": "The articles written by the author"}
      ]
    }
  ]
}
client.schema.delete_class('Article')
client.schema.delete_class('Author')
client.schema.create(schema)

# JSON data to be Ingested

data = [
    {
        "class": "Article",
        "properties": {
            "title": "LangChain: OpenAI + S3 Loader",
            "content": "This article discusses the integration of LangChain with OpenAI and S3 Loader...",
            "url": "https://blog.min.io/langchain-openai-s3-loader/"
        }
    },
    {
        "class": "Article",
        "properties": {
            "title": "MinIO Webhook Event Notifications",
            "content": "Exploring the webhook event notification system in MinIO...",
            "url": "https://blog.min.io/minio-webhook-event-notifications/"
        }
    },
    {
        "class": "Article",
        "properties": {
            "title": "MinIO Postgres Event Notifications",
            "content": "An in-depth look at Postgres event notifications in MinIO...",
            "url": "https://blog.min.io/minio-postgres-event-notifications/"
        }
    },
    {
        "class": "Article",
        "properties": {
            "title": "From Docker to Localhost",
            "content": "A guide on transitioning from Docker to localhost environments...",
            "url": "https://blog.min.io/from-docker-to-localhost/"
        }
    }
]

for item in data:
    client.data_object.create(
        data_object=item["properties"],
        class_name=item["class"]
    )

# Creating a Backup

In [13]:
import weaviate

client = weaviate.Client("http://localhost:8080")
result = client.backup.create(
  backup_id="backup-id-5",
  backend="s3",
  include_classes=["Article", "Author"],  # specify classes to include or omit this for all classes
  wait_for_completion=True,
)
print(result)

{'backend': 's3', 'classes': ['Article', 'Author'], 'id': 'backup-id-5', 'path': 's3://weaviate-backups/backup-id-5', 'status': 'SUCCESS'}


## Deleting the Schema Classes for Restoring Purposes

In [16]:
client.schema.delete_class("Article")
client.schema.delete_class("Author")

# Restoring the Backup

In [17]:
result = client.backup.restore(
  backup_id="backup-id-2",
  backend="s3",
  wait_for_completion=True,
)

print(result)

{'backend': 's3', 'classes': ['Article', 'Author'], 'id': 'backup-id-2', 'path': 's3://weaviate-backups/backup-id-2', 'status': 'SUCCESS'}


## Deleting the Schema Classes for Restoring Purposes

In [18]:
client.schema.delete_class("Article")
client.schema.delete_class("Author")

# Restoring the Backup with error handling

In [19]:
import weaviate
from weaviate.exceptions import BackupFailedError

client = weaviate.Client("http://192.168.0.25:8080")

try:
    result = client.backup.restore(
        backup_id="backup-id-2",
        backend="s3",
        wait_for_completion=True,
    )
    print("Backup restored successfully:", result)

except BackupFailedError as e:
    print("Backup restore failed with error:", e)
    # Here you can add logic to handle the failure, such as retrying the operation or logging the error.


Backup restored successfully: {'backend': 's3', 'classes': ['Author', 'Article'], 'id': 'backup-id-2', 'path': 's3://weaviate-backups/backup-id-2', 'status': 'SUCCESS'}


# Get the Schema Class for "Article" to Prove the Backup/Restore

In [22]:
client.schema.get("Article")

{'class': 'Article',
 'description': 'A class to store articles',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'moduleConfig': {'text2vec-transformers': {'poolingStrategy': 'masked_mean',
   'vectorizeClassName': True}},
 'multiTenancyConfig': {'enabled': False},
 'properties': [{'dataType': ['text'],
   'description': 'The title of the article',
   'indexFilterable': True,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-transformers': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'title',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'The content of the article',
   'indexFilterable': True,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-transformers': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'content',
   'tokenization': 'word'},
  {'dataType': ['date'],
   'description': 'Th