In [None]:
!pip install dlt[s3] pyarrow python-dotenv

In [1]:
import dlt
import pyarrow.parquet as pq
import fsspec # Filesystem specification for Python

In the following code we're using fsspec since it gives you a unified layer for file access, regardless of whether the files are on your local disk, in MinIO, in Azure, in S3, or at a URL.

In [2]:
# Define a DLT resource to load data from a Parquet file at a given URL
@dlt.resource(table_name="df_data")
def my_df():
    parquet_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet"
    with fsspec.open(parquet_url, mode="rb") as f:
        table = pq.read_table(f)
        yield table

Before running the following code make sure to have your credentials in `.dlt/secrets.toml` You can use `secrets-template.toml` as a reference.

In [3]:
# 🚀 Pipeline configuration
pipeline = dlt.pipeline(
    pipeline_name="parquet_to_minio",
    destination="filesystem",
    dataset_name="taxis_parquet"
)

In [4]:
# Execute pipeline
load_info = pipeline.run(
    my_df,
    loader_file_format="parquet",
    write_disposition="replace"
)
print(load_info)

Pipeline parquet_to_minio load step completed in 2.55 seconds
1 load package(s) were loaded to destination filesystem and into dataset taxis_parquet
The filesystem destination used s3://taxis location to store data
Load package 1757342683.0509217 is LOADED and contains no failed jobs


> In case of having errors while setting up the secrets.toml for DLT keep in mind you can also do this by setting the secrets as enviroment variables. You just need to keep in mind the dlt naming standard. The following code will create an .env file as reference and you only need to add the values of your secrets once the file is created. 

Here's the [docs](https://dlthub.com/docs/general-usage/credentials/setup)

In [None]:
import os

# DO NOT SET YOUR VALUES HERE, THIS IS JUST A TEMPLATE
# YOU MUST SETUP YOUR CREDENTIALS ONCE THE FILE IS CREATED
env_path = os.path.join(os.getcwd(), "../.env")
env_content = """# ============================
# Environment variables for DLT
# Fill in your credentials below
# ============================

# -- Destination (Azure ADLS) --
DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME=your_account_name
DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY=your_account_key
DESTINATION__FILESYSTEM__BUCKET_URL=your_bucket_url

# -- Source (S3 / MinIO) --
SOURCES__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID=randomuser
SOURCES__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY=randompassword
SOURCES__FILESYSTEM__CREDENTIALS__ENDPOINT_URL=http://minio:9000 # Change if needed

# -- Bucket / File URLs --
DESTINATION__FILESYSTEM__BUCKET_URL=
SOURCES__FILESYSTEM__BUCKET_URL=
"""

# Write the .env file
with open(env_path, "w") as f:
    f.write(env_content)

print(f".env file created at: {env_path}")
print("👉 Please edit this file and replace placeholders with your actual credentials.")


Once you have edited your credentials run the following code to load them. and then run again the `pipeline.run`

> If you're using this approach make sure to erase the secrets.toml files and restart your enviroment.

In [None]:
import os
from dotenv import load_dotenv 


env_path = os.path.abspath("../.env")
print("Looking for .env at:", env_path)

# Make sure the file exists
print("File exists?", os.path.isfile(env_path))

load_dotenv(dotenv_path=env_path)

# Check if the variables are loaded
print("Azure account:", os.getenv("DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"))
