In [1]:
from __future__ import annotations
import os
import json
import logging
from datetime import datetime
from io import BytesIO
from minio import Minio
from pydantic import BaseModel, Field
from typing import Optional

# Setting up logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# MinIO Client Initialization
minio_client = Minio(
    "play.min.io:443",
    access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"),
    secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"),
    secure=True
)

# Pydantic model for data handling
class ResponseData(BaseModel):
    question: str
    response: str
    timestamp: datetime = Field(default_factory=datetime.now)

    class Config:
        json_encoders = {
            datetime: lambda v: v.isoformat()
        }

# Check environment variables for Weaviate credentials
# if not os.environ.get("WEAVIATE_API_KEY") or not os.environ.get("WEAVIATE_ENVIRONMENT"):
#     logging.error("Missing Weaviate credentials")
#     raise EnvironmentError("Weaviate credentials are required for this script to run.")

# Function to fetch data from MinIO
def fetch_data_from_minio(bucket_name, object_name) -> Optional[str]:
    try:
        response = minio_client.get_object(bucket_name, object_name)
        data = BytesIO(response.read())
        response.close()
        return data.getvalue().decode('utf-8')
    except Exception as e:
        logging.error(f"Error fetching data from MinIO: {e}")
        return None

# Data processing and storage functions
def process_and_store_data(data: str, bucket_name: str) -> None:
    try:
        # Splitting data into manageable chunks
        splits = [data[i:i+500] for i in range(0, len(data), 500)]  # Simplified splitting mechanism
        for index, split in enumerate(splits):
            content_bytes = split.encode('utf-8')
            minio_client.put_object(bucket_name, f"split_{index}.txt", BytesIO(content_bytes), len(content_bytes))
        logging.info("Data processed and stored successfully.")
    except Exception as e:
        logging.error(f"Error processing or storing data: {e}")

# Execution 
data = fetch_data_from_minio("hydrate-bucket", "blog.min.io_minio-weaviate-unstructured-io_.txt")
if data:
    process_and_store_data(data, "processed-data-bucket")
else:
    logging.error("Failed to retrieve data.")

2024-04-26 13:07:55,356 - DEBUG - Starting new HTTPS connection (1): play.min.io:443
2024-04-26 13:07:55,789 - DEBUG - https://play.min.io:443 "GET /hydrate-bucket?location= HTTP/1.1" 200 137
2024-04-26 13:07:55,917 - DEBUG - https://play.min.io:443 "GET /hydrate-bucket/blog.min.io_minio-weaviate-unstructured-io_.txt HTTP/1.1" 200 16379
2024-04-26 13:07:56,107 - DEBUG - https://play.min.io:443 "GET /processed-data-bucket?location= HTTP/1.1" 200 137
2024-04-26 13:07:56,340 - DEBUG - https://play.min.io:443 "PUT /processed-data-bucket/split_0.txt HTTP/1.1" 200 0
2024-04-26 13:07:56,582 - DEBUG - https://play.min.io:443 "PUT /processed-data-bucket/split_1.txt HTTP/1.1" 200 0
2024-04-26 13:07:56,822 - DEBUG - https://play.min.io:443 "PUT /processed-data-bucket/split_2.txt HTTP/1.1" 200 0
2024-04-26 13:07:57,055 - DEBUG - https://play.min.io:443 "PUT /processed-data-bucket/split_3.txt HTTP/1.1" 200 0
2024-04-26 13:07:57,309 - DEBUG - https://play.min.io:443 "PUT /processed-data-bucket/split