# Contabo Storage

Contabo Storage is a S3 compatible Object Storage.


In [None]:
import os

import boto3

CONTABO_ACCESS_KEY_ID = os.getenv("CONTABO_ACCESS_KEY_ID") or ""
CONTABO_SECRET_ACCESS_KEY = os.getenv("CONTABO_SECRET_ACCESS_KEY") or ""
CONTABO_REGION = "eu2"
CONTABO_ENDPOINT = f"https://{CONTABO_REGION}.contabostorage.com"

storage = boto3.resource(
    service_name="s3",
    endpoint_url=CONTABO_ENDPOINT,
    region_name=CONTABO_REGION,
    aws_access_key_id=CONTABO_ACCESS_KEY_ID,
    aws_secret_access_key=CONTABO_SECRET_ACCESS_KEY,
)

### List Buckets


In [None]:
for bucket in storage.buckets.all():
    print(bucket.name)

### List Objects in Bucket


In [None]:
for obj in storage.Bucket("data-lake").objects.all():
    print(obj.key)

## DuckDB


In [None]:
import duckdb

duck = duckdb.connect()
duck.execute(f"SET s3_access_key_id='{CONTABO_ACCESS_KEY_ID}'")
duck.execute(f"SET s3_secret_access_key='{CONTABO_SECRET_ACCESS_KEY}'")
duck.execute(f"SET s3_region='{CONTABO_REGION}'")
duck.execute("SET s3_endpoint='eu2.contabostorage.com'")
duck.execute("SET s3_url_style='path'")

duck.read_parquet("s3://data-lake/part-0.parquet")

# Polars


In [None]:
import polars as pl

storage_options = {
    "aws_endpoint_url": f"https://{CONTABO_REGION}.contabostorage.com",
    "aws_access_key_id": CONTABO_ACCESS_KEY_ID,
    "aws_secret_access_key": CONTABO_SECRET_ACCESS_KEY,
}

df = pl.scan_parquet(
    "s3://data-lake/raw/part-0.parquet",
    storage_options=storage_options,
)
df.head(5).collect()

### PyArrow dataset


In [None]:
import pyarrow.dataset as ds
from pyarrow import fs

filesystem = fs.S3FileSystem(
    access_key=CONTABO_ACCESS_KEY_ID,
    secret_key=CONTABO_SECRET_ACCESS_KEY,
    endpoint_override=f"{CONTABO_REGION}.contabostorage.com",
)

dataset = ds.dataset("data-lake/raw/", format="parquet", filesystem=filesystem)
dataset.head(10)