In [None]:
from datetime import datetime, timedelta
import io
import json
import os
import tempfile
from typing import Dict, Any

from minio import Minio
from minio.commonconfig import Filter, Tags
from minio.objectlockconfig import ObjectLockConfig, DAYS, GOVERNANCE, YEARS
from minio.versioningconfig import VersioningConfig, ENABLED, DISABLED, SUSPENDED
from minio.lifecycleconfig import LifecycleConfig, Rule, Expiration, Transition
from minio.retention import Retention, GOVERNANCE, COMPLIANCE
from minio.error import S3Error
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

### Different ways to create a bucket

In [None]:
def create_bucket_with_versioning(bucket: str) -> None:
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)
    
    # Make the bucket if it does not exist.
    found = client.bucket_exists(bucket)
    if not found:
        client.make_bucket(bucket)
    client.set_bucket_versioning(bucket, VersioningConfig(ENABLED))


def create_bucket_with_locking(bucket: str) -> None:
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)
    
    # Make the bucket if it does not exist.
    found = client.bucket_exists(bucket)
    if not found:
        client.make_bucket(bucket, object_lock=True)


def create_bucket(bucket: str) -> None:
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)
    
    # Make the bucket if it does not exist.
    found = client.bucket_exists(bucket)
    if not found:
        client.make_bucket(bucket)

### Versioning

In [None]:
def save_dataframe(df: pd.DataFrame, bucket: str, object_name: str, version_tag: str) -> None:
    '''
    Function that will save a Pandas dataframe to MinIO.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],  # host.docker.internal
                credentials['accessKey'],  
                credentials['secretKey'], 
                secure=False)
    tags = Tags(for_object=True)
    tags['Version'] = version_tag
    encoded_df = df.to_csv(index=False).encode('utf-8')
    client.put_object(bucket, object_name, data=io.BytesIO(encoded_df), length=len(encoded_df), content_type='application/csv', tags=tags)

In [None]:
data = [['06/21/2023', 'New York', 75], ['06/21/2023', 'Boston', 78], ['06/21/2023', 'San Francisco', 69],
        ['06/22/2023', 'New York', 79], ['06/22/2023', 'Boston', 82], ['06/22/2023', 'San Francisco', 70]]
df = pd.DataFrame(data, columns=['Date', 'City', 'High Temp.'])

save_dataframe(df, 'test', 'train.csv', 'v0.001')

In [None]:
# Define model
class TheModelClass(nn.Module):
    def __init__(self):
        super(TheModelClass, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
# This does not work since the OrderedDict that PyTorch creates contains Tensor objects.
#json.dumps(model.state_dict(), ensure_ascii=False).encode('utf8')

In [None]:
def save_model(model: nn.Module, bucket: str, object_name: str, version_tag: str) -> None:
    '''
    Function that will save a PyTorch Model to MinIO.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],  # host.docker.internal
                credentials['accessKey'],  
                credentials['secretKey'], 
                secure=False)
    
    file_path = os.path.join(tempfile.gettempdir(), 'my_model.pt')
    print(file_path)
    torch.save(model.state_dict(), file_path)
    tags = Tags(for_object=True)
    tags['Version'] = version_tag
    client.fput_object(bucket, object_name, file_path, content_type='application/octet-stream', tags=tags)


# Initialize model
model = TheModelClass()
save_model(model, 'test', 'my_model.pt', 'v0.001')

In [None]:
def save_hyperparameters(hparams: Dict[str, Any], bucket: str, object_name: str, version_tag: str) -> None:
    '''
    Function that will save hyperparameters (dictionary) to MinIO.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],  # host.docker.internal
                credentials['accessKey'],  
                credentials['secretKey'], 
                secure=False)
    
    tags = Tags(for_object=True)
    tags['Version'] = version_tag
    json_data = json.dumps(hparams, ensure_ascii=False).encode('utf8')
    client.put_object(bucket, object_name, data=io.BytesIO(json_data), length=len(json_data), content_type='application/json', tags=tags)

In [None]:
hyper_parameters = {'epochs': 5,
                    'lr': 0.01,
                    'batch_size': 50}

save_hyperparameters(hyper_parameters, 'test', 'hparams.json', 'v0.001')

In [None]:
def save_experiment(bucket: str, experiment_name: str, model: nn.Module, hparams: Dict[str, Any], df_train: pd.DataFrame, 
                    df_valid: pd.DataFrame, df_test: pd.DataFrame, version_tag: str) -> None:
    # Set up the object names.
    hparam_name = f'/{experiment_name}/hparams.json'
    df_train_name = f'/{experiment_name}/df_train.csv'
    df_valid_name = f'/{experiment_name}/df_valid.csv'
    df_test_name = f'/{experiment_name}/df_test.csv'
    model_name = f'/{experiment_name}/model.pt'

    create_bucket_with_versioning(bucket)    
    save_dataframe(df_train, bucket, df_train_name, version_tag)
    save_dataframe(df_valid, bucket, df_valid_name, version_tag)
    save_dataframe(df_test, bucket, df_test_name, version_tag)
    save_model(model, bucket, model_name, version_tag)
    save_hyperparameters(hparams, bucket, hparam_name, version_tag)


# Initialize model
model = TheModelClass()
hyper_parameters = {'epochs': 5,
                    'lr': 0.01,
                    'batch_size': 50}
data = [['06/21/2023', 'New York', 75], ['06/21/2023', 'Boston', 78], ['06/21/2023', 'San Francisco', 69],
        ['06/22/2023', 'New York', 79], ['06/22/2023', 'Boston', 82], ['06/22/2023', 'San Francisco', 70]]
df_train = pd.DataFrame(data, columns=['Date', 'City', 'High Temp.'])
df_valid = pd.DataFrame(data, columns=['Date', 'City', 'High Temp.'])
df_test = pd.DataFrame(data, columns=['Date', 'City', 'High Temp.'])

save_experiment('test-project', 'exp2', model, hyper_parameters, df_train, df_valid, df_test, '0.01')

### Bucket Lifecycle Management 

In [None]:
def set_bucket_expiration(bucket: str, expire_days: int) -> None:
    '''
    Function that will set the life cycle rule on a bucket.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)

    # Configure life cycle.
    config = LifecycleConfig(
        [
            Rule(
                ENABLED,
                rule_filter=Filter(prefix="/"),
                rule_id="remove",
                expiration=Expiration(days=expire_days),
            ),
        ],
    )
    client.set_bucket_lifecycle(bucket, config)

set_bucket_expiration('test', 1)


In [None]:
def set_bucket_transition(bucket: str, tier_name: str, transition_days: int) -> None:
    '''
    Function that will set the life cycle rule on a bucket.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)

    # Configure life cycle.
    config = LifecycleConfig(
        [
            Rule(
                ENABLED,
                rule_filter=Filter(prefix="/"),
                rule_id="transition",
                transition=Transition(days=transition_days, storage_class=tier_name),
            ),
        ],
    )
    client.set_bucket_lifecycle(bucket, config)

### Bucket Level Object Locking

In [None]:
def set_bucket_object_lock(bucket: str, lock_days: int) -> None:
    '''
    Function that will set the lock configuration on a bucket.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)

    # Configure life cycle.
    config = ObjectLockConfig(GOVERNANCE, lock_days, DAYS)
    client.set_object_lock_config(bucket, config)


set_bucket_object_lock(bucket, 15)

In [None]:
def get_bucket_object_lock(bucket: str) -> ObjectLockConfig:
    '''
    Function that will get the lock configuration on a bucket.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)

    duration = (-1, -1)
    try:
        duration = client.get_object_lock_config(bucket).duration
    except S3Error as s3_err:
        print(f'S3 Error occurred: {s3_err}.')
    except Exception as err:
        print(f'Error occurred: {err}.')

    return duration

get_bucket_object_lock('census-data')


### Individual Object Retention

In [None]:
def set_object_retention(bucket: str, object_name, days_to_retain: int):
    '''
    Function that will set the lock retention on an object.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)

    # Configure life cycle.
    retain_date = datetime.utcnow() + timedelta(days=days_to_retain)
    config = Retention(GOVERNANCE, retain_until_date=retain_date)
    client.set_object_retention(bucket, object_name, config)

set_object_retention('test', 'train.csv', 1)

### Legal Hold

In [None]:
def set_object_legal_hold(bucket: str, object_name):
    '''
    Function that will set a legal hold on an object.
    The credentials file must contain the MinIO url, access key, and access secret.
    '''
    with open('credentials.json') as f:
        credentials = json.load(f)

    # Create client with access and secret key
    client = Minio(credentials['url'],
                   credentials['accessKey'],  
                   credentials['secretKey'], 
                   secure=False)

    # Set the legal hold.
    client.enable_object_legal_hold(bucket, object_name)

set_object_legal_hold('test', 'train.csv')