# Testing S3 Access from Python Script

Before running this script, you should run `s3info` to find your AWS access key id and secret access key.  Put these into a file called `~/.boto` with the following format:

`[Credentials]
aws_access_key_id = ###############
aws_secret_access_key = ##########################`

In [1]:
import boto3

s3_resource = boto3.resource('s3', endpoint_url='https://s3.msi.umn.edu')
s3_client = boto3.client('s3', endpoint_url='https://s3.msi.umn.edu')

s3_client.list_buckets()

{'Buckets': [{'CreationDate': datetime.datetime(2020, 3, 11, 0, 7, 9, 698000, tzinfo=tzutc()),
   'Name': 'fma-dataset'}],
 'Owner': {'DisplayName': 'Samuel Piehl', 'ID': 'uid=75217'},
 'ResponseMetadata': {'HTTPHeaders': {'content-type': 'application/xml',
   'date': 'Wed, 11 Mar 2020 15:59:23 GMT',
   'transfer-encoding': 'chunked',
   'x-amz-request-id': 'tx0000000000000009daadc-005e690adb-58b9af9-default'},
  'HTTPStatusCode': 200,
  'HostId': '',
  'RequestId': 'tx0000000000000009daadc-005e690adb-58b9af9-default'}}

### Add User to Bucket

In [49]:
bucket_acl = s3_client.get_bucket_acl(Bucket='fma-dataset')
bucket_acl

{'Grants': [{'Grantee': {'DisplayName': 'Alexander Lampert',
    'ID': 'uid=75191',
    'Type': 'CanonicalUser'},
   'Permission': 'FULL_CONTROL'},
  {'Grantee': {'DisplayName': 'Samuel Piehl',
    'ID': 'uid=75217',
    'Type': 'CanonicalUser'},
   'Permission': 'FULL_CONTROL'}],
 'Owner': {'DisplayName': 'Samuel Piehl', 'ID': 'uid=75217'},
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '659',
   'content-type': 'application/xml',
   'date': 'Wed, 11 Mar 2020 01:48:18 GMT',
   'x-amz-request-id': 'tx000000000000000a1064e-005e684362-58be2fb-default'},
  'HTTPStatusCode': 200,
  'HostId': '',
  'RequestId': 'tx000000000000000a1064e-005e684362-58be2fb-default'}}

In [50]:
new_grant = [{'Grantee': {'DisplayName': 'Alex Lampert',
   'ID': 'uid=75191',
   'Type': 'CanonicalUser'},
  'Permission': 'FULL_CONTROL'}]
bucket_acl['Grants'] += new_grant

In [51]:
{'Grants': bucket_acl['Grants'], 'Owner': bucket_acl['Owner']}

{'Grants': [{'Grantee': {'DisplayName': 'Alexander Lampert',
    'ID': 'uid=75191',
    'Type': 'CanonicalUser'},
   'Permission': 'FULL_CONTROL'},
  {'Grantee': {'DisplayName': 'Samuel Piehl',
    'ID': 'uid=75217',
    'Type': 'CanonicalUser'},
   'Permission': 'FULL_CONTROL'},
  {'Grantee': {'DisplayName': 'Alex Lampert',
    'ID': 'uid=75191',
    'Type': 'CanonicalUser'},
   'Permission': 'FULL_CONTROL'}],
 'Owner': {'DisplayName': 'Samuel Piehl', 'ID': 'uid=75217'}}

In [14]:
# uncomment to add user
#s3_client.put_bucket_acl(Bucket='fma-dataset',
#                         AccessControlPolicy={'Grants': bucket_acl['Grants'], 'Owner': bucket_acl['Owner']})

{'ResponseMetadata': {'HTTPHeaders': {'content-length': '0',
   'content-type': 'application/xml',
   'date': 'Wed, 11 Mar 2020 01:07:57 GMT',
   'x-amz-request-id': 'tx000000000000000a5bef0-005e6839ed-58bdef0-default'},
  'HTTPStatusCode': 200,
  'HostId': '',
  'RequestId': 'tx000000000000000a5bef0-005e6839ed-58bdef0-default'}}

### Loading a CSV File into a Pandas Object

In [52]:
import pandas as pd

f_obj = s3_client.get_object(Bucket='fma-dataset', Key='fma-metadata/genres.csv')
genres = pd.read_csv(f_obj['Body'])
genres

Unnamed: 0,genre_id,#tracks,parent,title,top_level
0,1,8693,38,Avant-Garde,38
1,2,5271,0,International,2
2,3,1752,0,Blues,3
3,4,4126,0,Jazz,4
4,5,4106,0,Classical,5
5,6,914,38,Novelty,38
6,7,217,20,Comedy,20
7,8,868,0,Old-Time / Historic,8
8,9,1987,0,Country,9
9,10,13845,0,Pop,10


### Loading mp3 file with Librosa

In [None]:
import os
os.environ['PATH'] += ':/home/csci5980/piehl008/software/ffmpeg/bin/'
os.environ['PATH']

In [36]:
import librosa
from tempfile import TemporaryFile, NamedTemporaryFile
import numpy as np
from io import BytesIO

import warnings

def get_raw_audio(client, bucket, file, **kwargs):
    with NamedTemporaryFile(suffix='.mp3') as fp:
        client.download_fileobj(bucket, file, fp)
        waveform = librosa.load(fp.name, **kwargs)
    return waveform

def save_raw_audio(client, bucket, file, **kwargs):
    with NamedTemporaryFile(suffix='.mp3') as fp:
        client.download_fileobj(bucket, file, fp)
        waveform = librosa.load(fp.name, **kwargs)
        with TemporaryFile() as fb:
            np.save(fb, waveform)
            fb.seek(0)
            obj_name = os.path.splitext(file)[0] + '.npy'
            client.upload_fileobj(fb, bucket, obj_name)
    return None

def load_bytestream(client, bucket, file):
    bytes_obj = client.get_object(Bucket=bucket, Key=file)
    arr = np.load(BytesIO(bytes_obj['Body'].read()), allow_pickle=True)
    return arr

warnings.filterwarnings("ignore")

%timeit get_raw_audio(s3_client, 'fma-dataset', 'raw-audio/000/000002.mp3', sr=44100)
%timeit save_raw_audio(s3_client, 'fma-dataset', 'raw-audio/000/000002.mp3', sr=44100)
%timeit load_bytestream(s3_client, 'fma-dataset', 'raw-audio/000/000002.npy')

warnings.filterwarnings("default")

1 loop, best of 3: 174 ms per loop
1 loop, best of 3: 333 ms per loop
10 loops, best of 3: 79 ms per loop
