# archive to folder

In [7]:
# nuclio: ignore
import nuclio

In [7]:
import os
import zipfile
import urllib.request
import tarfile
import json

from mlrun.execution import MLClientCtx
from mlrun.datastore import DataItem

from typing import Union

def open_archive(
    context: MLClientCtx, 
    archive_url: Union[DataItem, str] = '',
    key: str = 'images'
):
    """Open a file/object archive into a target directory
    
    Currently supports zip and tar.gz
    
    :param context:      function execution context
    :param archive_url:  url of archive file
    :param key:          key of archive contents in artifact store
    """
    target_dir = context.artifact_path
    
    splits = str(archive_url).split('.')
    if ('.'.join(splits[-2:]) == 'tar.gz'):
        # Extract dataset from tar
        context.logger.info('opening tar_gz')
        ftpstream = urllib.request.urlopen(archive_url)
        with tarfile.open(fileobj=ftpstream, mode="r|gz") as ref:
            ref.extractall(target_dir)
    elif splits[-1] == 'zip':
        # Extract dataset from zip
        context.logger.info('opening zip')
        with zipfile.ZipFile(archive_url, 'r') as ref:
            ref.extractall(target_dir)
    
    context.log_artifact(key, local_path=target_dir)

In [7]:
# nuclio: end-code