# Transferring files to MinIO

This notebook contains code to transfer files/directories to MniIO.

First, we import the classes and libraries we need to run the notebook.

In [2]:

!pip install -q typing python-dotenv minio

Now we define all the methods required to perform the transfer. 

In [17]:
from minio import Minio
from minio.error import S3Error
from dotenv import load_dotenv
from typing import NamedTuple
import os
import pathlib

class S3Env(NamedTuple):
  access_key_id: str
  secret_access_key: str
  s3_endpoint: str
  bucket_name: str
  default_region: str
  client: Minio  

class BucketMeta(NamedTuple):
  client: Minio
  bucket_name: str
  prefix: str = ""
  model_data_dir: str = None
  file_name: str = None
  object_name: str = None
  exclude_dirs_set: set[str] = None
  exclude_files_set: set[str] = None

In [18]:
def init(isSecure: bool = True) -> S3Env:
  load_dotenv()

  access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
  secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
  default_region = os.environ.get('AWS_DEFAULT_REGION')
  s3_endpoint = os.environ.get('AWS_S3_ENDPOINT')
  bucket_name = os.environ.get('AWS_S3_BUCKET')

  # The s3 endpoint seems to have a prefix of "https://" even though it's not set like that in the env
  # removing that prefix
  s3_endpoint = s3_endpoint.replace('https://', '').replace('http://', '')

  # print(f'key_id={access_key_id}, secret_key={secret_access_key}')
  # print(f's3_endpoint={s3_endpoint}, bucket={bucket_name}')
  
  client: Minio = Minio(
    s3_endpoint,
    access_key = access_key_id,
    secret_key = secret_access_key,
    secure = isSecure
  )

  return S3Env(access_key_id, secret_access_key, s3_endpoint, bucket_name, default_region, client)


def upload_file(file_model: BucketMeta):
  client: Minio = file_model.client

  # Replace the model directory name with the prefix (for the bucket object)
  object_name = file_model.file_name.replace(file_model.model_data_dir, file_model.prefix)

  print(f'   -> Trying to upload file [{file_model.file_name}] with key [{object_name}] to the bucket [{file_model.bucket_name}]...', end=" ")

  try:
    client.fput_object(file_model.bucket_name, object_name, file_model.file_name)
    print('SUCCESS')
  except S3Error as e:
    print('FAILURE')
    print("Error:: ", e)


def upload_files(dir_model: BucketMeta):
  print(f'Uploading files from [{dir_model.model_data_dir}] directory...')
  client = dir_model.client

  if not client.bucket_exists(dir_model.bucket_name):
    client.make_bucket(dir_model.bucket_name)
    print(f' -->> Created bucket [{dir_model.bucket_name}]')
  else:
    print(f' -->> Bucket already exists [{dir_model.bucket_name}]')

  for (dir_path, dirs, files) in os.walk(dir_model.model_data_dir):
    print(f'root={dir_path}, dirs={dirs}, files={files}')

    # Remove the exclude directories
    if dir_model.exclude_dirs_set is not None:
      dirs_size = len(dirs)
      dirs[:] = [d for d in dirs if d not in dir_model.exclude_dirs_set]
      if len(dirs) < dirs_size:
        print(f'Ignored {dir_model.exclude_dirs_set} directories...')

    for f in files:
      if not f.startswith('.') and (dir_model.exclude_files_set is None or f not in dir_model.exclude_files_set):
        file_name = pathlib.Path(dir_path, f)

        upload_file(BucketMeta(client=dir_model.client,
                               bucket_name=dir_model.bucket_name,
                               file_name=str(file_name),
                               prefix=dir_model.prefix,
                               model_data_dir=dir_model.model_data_dir))
      else:
        print(f'   -->> *** File (or dir) [{f}] IGNORED... *** <<--')