## Access the audit reports via Cloudflare R2 Storage

> This example notebook shows how to download the audit reports from Cloudflare R2 Storage using the `boto3` library.

To run this notebook, you need to have the `boto3`, `tqdm` libraries installed.

In [None]:
# Read-Only API Token
R2_ENDPOINT= "https://5f0c0f9b5170d2fe9310473a3da483af.r2.cloudflarestorage.com"
R2_ACCESS_KEY_ID = "77107a604647ebcd016f76fc419d3f2c"
R2_SECRET_ACCESS_KEY="14c93e49b06e0d3889e76b771c082ac4e1f7dfd8c429e3e3f548039e3ad36ea2"

import os
import boto3
from tqdm import tqdm

def get_r2_client(endpoint_url, access_key_id, secret_access_key):
    return boto3.client(
        's3',
        endpoint_url=endpoint_url,
        aws_access_key_id=access_key_id,
        aws_secret_access_key=secret_access_key
    )

def download_file(client, bucket_name, object_key, local_path):
    """Download a file from the bucket"""
    try:
        response = client.head_object(Bucket=bucket_name, Key=object_key)
        file_size = response['ContentLength']
        
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        
        with tqdm(total=file_size, unit='B', unit_scale=True, desc=f"Download {object_key}") as pbar:
            client.download_file(
                bucket_name, 
                object_key, 
                local_path,
                Callback=lambda bytes_transferred: pbar.update(bytes_transferred)
            )
        return True
    except Exception as e:
        print(f"Download error ({object_key}): {e}")
        return False

def list_objects(client, bucket_name, prefix=""):
    """List all objects in the bucket"""
    objects = []
    paginator = client.get_paginator('list_objects_v2')
    
    try:
        for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
            if 'Contents' in page:
                for obj in page['Contents']:
                    objects.append(obj['Key'])
    except Exception as e:
        print(f"List object error: {e}")
    
    return objects

def download_dataset(client, bucket_name, prefix="", output_dir="./download"):
    """Download all objects in the bucket"""
    print(f"List objects in bucket {bucket_name} with prefix {prefix}")
    objects = list_objects(client, bucket_name, prefix)
    if not objects:
        print(f"No object found in bucket {bucket_name}")
        return 0, 0
    
    print(f"Find {len(objects)} objects in bucket {bucket_name}")
    
    success_count = 0
    error_count = 0
    
    for obj_key in objects:
        # Remove the prefix from the object key
        if prefix and obj_key.startswith(prefix):
            rel_path = obj_key[len(prefix):].lstrip('/')
        else:
            rel_path = obj_key
            
        local_path = os.path.join(output_dir, rel_path)
        
        if download_file(client, bucket_name, obj_key, local_path):
            success_count += 1
        else:
            error_count += 1
    
    return success_count, error_count

In [None]:
class Args:
    endpoint = R2_ENDPOINT
    access_key = R2_ACCESS_KEY_ID
    secret_key = R2_SECRET_ACCESS_KEY
    bucket = "forge-dataset"
    prefix = "artifacts"
    output = "./artifacts"

args = Args()
client = get_r2_client(args.endpoint, args.access_key, args.secret_key)
print(f"Start downloading dataset from bucket {args.bucket} with prefix {args.prefix} to {args.output}")
success, error = download_dataset(client, args.bucket, args.prefix, args.output)
print(f"Download completed. Successful: {success} files, Failed: {error} files")
if success > 0:
    print(f"Downloaded files are saved to {args.output}")