Launch local S3 via s3proxy in a docker container (will not persist data):

```
docker run --rm -p 9000:80 --env S3PROXY_AUTHORIZATION=none andrewgaul/s3proxy
```

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import boto3, botocore
from fs import open_fs
import os
import sys
import time

from jupyterfs.fsmanager import FSManager

## Create a bucket to use as the filesystem

In [3]:
bucket_name = 'foo'
boto_kwargs = dict(
    config=botocore.client.Config(signature_version=botocore.UNSIGNED),
    endpoint_url='http://127.0.0.1:9000',
    aws_access_key_id='s3_local',
    aws_secret_access_key='s3_local',
#     region_name=self.region,
)

client = boto3.client('s3', **boto_kwargs)
resource = boto3.resource('s3', **boto_kwargs)

# check if bucket already exists
bucket = resource.Bucket(bucket_name)
bucket_exists = True
try:
    resource.meta.client.head_bucket(Bucket=bucket_name)
except botocore.exceptions.ClientError as e:
    # If it was a 404 error, then the bucket does not exist.
    error_code = e.response['Error']['Code']
    if error_code == '404':
        bucket_exists = False

if bucket_exists:
    # dump info on any existing s3 contents
    for key in bucket.objects.all():
        print(key)
    print(client.list_objects(Bucket=bucket_name))
    print(client.list_objects_v2(Bucket=bucket_name))

    # delete the bucket (faster in reverse order)
    for key in reversed(list(bucket.objects.all())):
        key.delete()
    bucket.delete()

client.create_bucket(Bucket=bucket_name)

{'ResponseMetadata': {'RequestId': '4442587FB7D0A2F9',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 04 May 2020 11:00:26 GMT',
   'x-amz-request-id': '4442587FB7D0A2F9',
   'location': '/foo',
   'content-length': '0',
   'server': 'Jetty(9.2.z-SNAPSHOT)'},
  'RetryAttempts': 0},
 'Location': '/foo'}

## Set up a S3 FSManager

In [4]:
s3uri = 's3://{id}:{key}@{bucket}?endpoint_url={endpoint_url}'.format(
    bucket='foo',
    endpoint_url='http://127.0.0.1:9000',
    id='s3_local',
    key='s3_local'
)

fooman = FSManager.open_fs(s3uri)
# fooman.get('')

## Set up a local FSManager

In [5]:
osman = FSManager.open_fs('osfs://%s' % os.getcwd())
# osman.get('')

## Create some dirs on our S3 filesystem and save this notebook into them

In [6]:
thisContent = osman.get('pyfilesystem_s3fs.ipynb')

fooman._save_directory('root0', None)
fooman._save_directory('root1', None)
fooman._save_directory('root1/leaf1', None)

fooman.save(thisContent, 'pyfilesystem_s3fs.ipynb')
fooman.save(thisContent, 'root0/pyfilesystem_s3fs.ipynb')
fooman.save(thisContent, 'root1/leaf1/pyfilesystem_s3fs.ipynb')

{'name': 'pyfilesystem_s3fs.ipynb',
 'path': 'root1/leaf1/pyfilesystem_s3fs.ipynb',
 'last_modified': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'created': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'content': None,
 'format': None,
 'mimetype': None,
 'size': 600171,
 'writable': True,
 'type': 'notebook'}

## Retrieve the saved data from our S3 filesystem

In [7]:
fpath = 'pyfilesystem_s3fs.ipynb'
fooman.get(fpath)

{'name': 'pyfilesystem_s3fs.ipynb',
 'path': 'pyfilesystem_s3fs.ipynb',
 'last_modified': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'created': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'content': {'cells': [{'cell_type': 'markdown',
    'metadata': {},
    'source': 'Launch local S3 via s3proxy in a docker container (will not persist data):\n\n```\ndocker run --rm -p 9000:80 --env S3PROXY_AUTHORIZATION=none andrewgaul/s3proxy\n```'},
   {'cell_type': 'code',
    'execution_count': 1,
    'metadata': {'trusted': False},
    'outputs': [],
    'source': '%load_ext autoreload\n%autoreload 2'},
   {'cell_type': 'code',
    'execution_count': 3,
    'metadata': {'trusted': False},
    'outputs': [],
    'source': 'import boto3, botocore\nfrom fs import open_fs\nimport os\nimport sys\nimport time\n\nfrom jupyterfs.fsmanager import FSManager'},
   {'cell_type': 'markdown',
    'metadata': {},
    'source': '## Create a bucket to use as the filesystem'},
   {'cell

In [8]:
fpath = 'root0/pyfilesystem_s3fs.ipynb'
fooman.get(fpath)

{'name': 'pyfilesystem_s3fs.ipynb',
 'path': 'root0/pyfilesystem_s3fs.ipynb',
 'last_modified': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'created': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'content': {'cells': [{'cell_type': 'markdown',
    'metadata': {},
    'source': 'Launch local S3 via s3proxy in a docker container (will not persist data):\n\n```\ndocker run --rm -p 9000:80 --env S3PROXY_AUTHORIZATION=none andrewgaul/s3proxy\n```'},
   {'cell_type': 'code',
    'execution_count': 1,
    'metadata': {'trusted': False},
    'outputs': [],
    'source': '%load_ext autoreload\n%autoreload 2'},
   {'cell_type': 'code',
    'execution_count': 3,
    'metadata': {'trusted': False},
    'outputs': [],
    'source': 'import boto3, botocore\nfrom fs import open_fs\nimport os\nimport sys\nimport time\n\nfrom jupyterfs.fsmanager import FSManager'},
   {'cell_type': 'markdown',
    'metadata': {},
    'source': '## Create a bucket to use as the filesystem'},
   

In [9]:
fpath = 'root1/leaf1/pyfilesystem_s3fs.ipynb'
fooman.get(fpath)

{'name': 'pyfilesystem_s3fs.ipynb',
 'path': 'root1/leaf1/pyfilesystem_s3fs.ipynb',
 'last_modified': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'created': datetime.datetime(2020, 5, 4, 11, 0, 30, tzinfo=<UTC>),
 'content': {'cells': [{'cell_type': 'markdown',
    'metadata': {},
    'source': 'Launch local S3 via s3proxy in a docker container (will not persist data):\n\n```\ndocker run --rm -p 9000:80 --env S3PROXY_AUTHORIZATION=none andrewgaul/s3proxy\n```'},
   {'cell_type': 'code',
    'execution_count': 1,
    'metadata': {'trusted': False},
    'outputs': [],
    'source': '%load_ext autoreload\n%autoreload 2'},
   {'cell_type': 'code',
    'execution_count': 3,
    'metadata': {'trusted': False},
    'outputs': [],
    'source': 'import boto3, botocore\nfrom fs import open_fs\nimport os\nimport sys\nimport time\n\nfrom jupyterfs.fsmanager import FSManager'},
   {'cell_type': 'markdown',
    'metadata': {},
    'source': '## Create a bucket to use as the filesystem'

# scratch

### examine underlying fs-s3fs

In [None]:
foofs = open_fs('s3://foo?endpoint_url=http://127.0.0.1:9000')
foofs.getinfo('').raw
foofs.getinfo('root1/')

In [None]:
for fpath in ('pyfilesystem_s3fs.ipynb', 'root0/pyfilesystem_s3fs.ipynb', 'root1/leaf1/pyfilesystem_s3fs.ipynb'):
    print(foofs.getinfo(fpath).raw)

## Use only boto3 resource api (not client api)

In [24]:
bucket_name = 'foo'
boto_kwargs = dict(
    config=botocore.client.Config(signature_version=botocore.UNSIGNED),
    endpoint_url='http://127.0.0.1:9000',
)

resource = boto3.resource('s3', **boto_kwargs)

# check if bucket already exists
bucket = resource.Bucket(bucket_name)
bucket_exists = True
try:
    resource.meta.client.head_bucket(Bucket=bucket_name)
except botocore.exceptions.ClientError as e:
    # If it was a 404 error, then the bucket does not exist.
    error_code = e.response['Error']['Code']
    if error_code == '404':
        bucket_exists = False

if bucket_exists:
    # delete the bucket
    for key in bucket.objects.all():
        key.delete()
    bucket.delete()

resource.create_bucket(Bucket=bucket_name)

s3.Bucket(name='foo')

In [8]:
bucket_name = 'foo'
boto_kwargs = dict(
    config=botocore.client.Config(signature_version=botocore.UNSIGNED),
    endpoint_url='http://127.0.0.1:9000',
    aws_access_key_id='s3_local',
    aws_secret_access_key='s3_local',
)


def _s3Resource():
    return boto3.resource('s3', **boto_kwargs)


def _s3CreateBucket(bucket_name):
    s3Resource = _s3Resource()

    # check if bucket already exists
    bucket = s3Resource.Bucket(bucket_name)
    bucket_exists = True
    try:
        s3Resource.meta.client.head_bucket(Bucket=bucket_name)
    except botocore.exceptions.ClientError as e:
        # If it was a 404 error, then the bucket does not exist.
        error_code = e.response['Error']['Code']
        if error_code == '404':
            bucket_exists = False

    if not bucket_exists:
        # create the bucket
        s3Resource.create_bucket(Bucket=bucket_name)


def _s3DeleteBucket(bucket_name):
    now = time.time()
    s3Resource = _s3Resource()

    # check if bucket already exists
    bucket = s3Resource.Bucket(bucket_name)
    bucket_exists = True
    try:
        s3Resource.meta.client.head_bucket(Bucket=bucket_name)
    except botocore.exceptions.ClientError as e:
        # If it was a 404 error, then the bucket does not exist.
        error_code = e.response['Error']['Code']
        if error_code == '404':
            bucket_exists = False
    
    print('existence check: %s' % (time.time() - now))
    now = time.time()
    
    if bucket_exists:
        # delete the bucket (faster in reverse order)
        for key in reversed(list(bucket.objects.all())):
            key.delete()
            print('fetch and delete %s: %s' % (key, time.time() - now))
            now = time.time()
        bucket.delete()
        print('delete empty bucket: %s' % (time.time() - now))

In [9]:
_s3DeleteBucket(bucket_name)

existence check: 0.015963077545166016
fetch and delete s3.ObjectSummary(bucket_name='foo', key='root1/leaf1/pyfilesystem_s3fs.ipynb'): 0.04035472869873047
fetch and delete s3.ObjectSummary(bucket_name='foo', key='root1/leaf1/'): 0.004658937454223633
fetch and delete s3.ObjectSummary(bucket_name='foo', key='root1/'): 0.004182100296020508
fetch and delete s3.ObjectSummary(bucket_name='foo', key='root0/pyfilesystem_s3fs.ipynb'): 0.004545927047729492
fetch and delete s3.ObjectSummary(bucket_name='foo', key='root0/'): 0.004651069641113281
fetch and delete s3.ObjectSummary(bucket_name='foo', key='pyfilesystem_s3fs.ipynb'): 0.004023075103759766
delete empty bucket: 0.0046961307525634766


In [None]:
# test idempotency
_s3CreateBucket(bucket_name)
_s3DeleteBucket(bucket_name)
_s3DeleteBucket(bucket_name)
_s3CreateBucket(bucket_name)
_s3CreateBucket(bucket_name)
_s3DeleteBucket(bucket_name)