# MinIO Connection Examples

This notebook demonstrates how to connect to MinIO and access data stored in the data lake.

## 1. Install Required Libraries

In [None]:
!pip install -q minio pandas pyarrow boto3

## 2. Connect to MinIO using MinIO Client

In [None]:
from minio import Minio
from minio.error import S3Error

# MinIO connection
minio_client = Minio(
    "minio:9000",
    access_key="admin",
    secret_key="password123",
    secure=False
)

# List buckets
buckets = minio_client.list_buckets()
print("Available buckets:")
for bucket in buckets:
    print(f"  - {bucket.name}")

## 3. List Objects in Warehouse Bucket

In [None]:
# List objects in warehouse bucket
objects = minio_client.list_objects("warehouse", recursive=True)
print("Objects in warehouse:")
for obj in objects:
    print(f"  - {obj.object_name} ({obj.size} bytes)")

## 4. Read JSON Data from MinIO

In [None]:
import json
import pandas as pd

# Read JSON file
response = minio_client.get_object("warehouse", "data/customers.json")
data = json.loads(response.read())
response.close()
response.release_conn()

# Convert to DataFrame
df_customers = pd.DataFrame(data)
print("\nCustomers data:")
df_customers

## 5. Read Parquet Data from MinIO

In [None]:
import io
import pyarrow.parquet as pq

# Read Parquet file
response = minio_client.get_object("warehouse", "data/orders.parquet")
parquet_data = response.read()
response.close()
response.release_conn()

# Convert to DataFrame
table = pq.read_table(io.BytesIO(parquet_data))
df_orders = table.to_pandas()
print("\nOrders data:")
df_orders.head(10)

## 6. Using Boto3 (S3-compatible API)

In [None]:
import boto3
from botocore.client import Config

# Create S3 client
s3_client = boto3.client(
    's3',
    endpoint_url='http://minio:9000',
    aws_access_key_id='admin',
    aws_secret_access_key='password123',
    config=Config(signature_version='s3v4'),
    region_name='us-east-1'
)

# List objects
response = s3_client.list_objects_v2(Bucket='warehouse', Prefix='data/')
print("Objects using boto3:")
for obj in response.get('Contents', []):
    print(f"  - {obj['Key']} ({obj['Size']} bytes)")

## 7. Simple Data Analysis

In [None]:
# Analyze orders by status
print("Orders by status:")
print(df_orders['status'].value_counts())

print("\nTotal revenue by product:")
revenue_by_product = df_orders.groupby('product_name')['total_amount'].sum().sort_values(ascending=False)
print(revenue_by_product)