In [None]:
def save_to_cos( cred, bucket_name, object_name, data):
    import requests

    default_iam_url = 'https://iam.ng.bluemix.net/oidc/token'
    default_endpoint = 'https://s3-api.us-geo.objectstorage.service.networklayer.com'
    
    api_key = cred.get('apikey', cred.get('api_key', cred.get('IBM_API_KEY_ID')))
    iam_url = cred.get('iam_url', cred.get('IBM_AUTH_ENDPOINT', default_iam_url))
    endpoint = cred.get('url', cred.get('ENDPOINT', default_endpoint))

    if not api_key:
        print("ERROR: Missing api key")
        return

    full_object_path = bucket_name + "/" + object_name
    print("Saving", full_object_path, "(", str(len(data)), "bytes)")

    response = requests.post(
                url = iam_url,
                headers = {"Content-Type": "application/x-www-form-urlencoded"},
                params = {"grant_type":"urn:ibm:params:oauth:grant-type:apikey","apikey":api_key},
                verify = True)
    if response.status_code != 200:
        print( "ERROR: POST Response =", response.status_code, response.reason, response.text)
        return
    bearer_token = response.json()["access_token"]
    
    response = requests.put(
                url = endpoint + "/" + full_object_path,
                headers = {"Authorization": "bearer " + bearer_token},
                data = data)    
    if response.status_code != 200:
        print( "ERROR: PUT Response =", response.status_code, response.reason, response.text)

In [None]:
def read_from_cos(cred, bucket_name, object_name):
    """
    'cred' may have various key-values, depending on where the credentials where copied from.
    """
    import requests

    default_iam_url = 'https://iam.ng.bluemix.net/oidc/token'
    default_endpoint = 'https://s3-api.us-geo.objectstorage.service.networklayer.com'

    # Check for credentials or arguments. Any StopIteration error means that no key/value was defined.
    api_key = cred.get('apikey', cred.get('api_key', cred.get('IBM_API_KEY_ID')))
    iam_url = cred.get('iam_url', cred.get('IBM_AUTH_ENDPOINT', default_iam_url))
    endpoint = cred.get('url', cred.get('ENDPOINT', default_endpoint))

    full_object_path = bucket_name + "/" + object_name
    print("Reading", full_object_path)

    response = requests.post(
                url = iam_url,
                headers = {"Content-Type": "application/x-www-form-urlencoded"},
                params = {"grant_type":"urn:ibm:params:oauth:grant-type:apikey","apikey":api_key},
                verify = True)
    if str(response) != "<Response [200]>":
        print( "ERROR: POST Response =", response)
        return
    bearer_token = response.json()["access_token"]

    response=requests.get(
                url = endpoint + "/" + full_object_path,
                headers = {"Authorization": "bearer " + bearer_token},
                params=None,
                verify=True)
    if str(response) != "<Response [200]>":
        print( "ERROR: PUT Response =", response)
 
    return response.content

# Start HERE

In [6]:
import ibm_boto3
import json
import requests
import random
from ibm_botocore.client import Config
from pprint import pprint

In [7]:
def cos_client(credentials):

    print("Connecting to COS...")

    # Rquest detailed enpoint list
    endpoints = requests.get(credentials.get('endpoints')).json()
    #import pdb; pdb.set_trace()

    # Obtain iam and cos host from the the detailed endpoints
    iam_host = (endpoints['identity-endpoints']['iam-token'])
    cos_host = (endpoints['service-endpoints']['cross-region']['us']['public']['us-geo'])

#    default_iam_url = 'https://iam.ng.bluemix.net/oidc/token'
#    default_endpoint = 'https://s3-api.us-geo.objectstorage.service.networklayer.com'
#    api_key = cred.get('apikey', cred.get('api_key', cred.get('IBM_API_KEY_ID')))
#    iam_url = cred.get('iam_url', cred.get('IBM_AUTH_ENDPOINT', default_iam_url))
#    endpoint = cred.get('url', cred.get('ENDPOINT', default_endpoint))

    api_key = credentials.get('apikey')
    service_instance_id = credentials.get('resource_instance_id')

    # Constrict auth and cos endpoint
    auth_endpoint = "https://" + iam_host + "/oidc/token"
    service_endpoint = "https://" + cos_host

    print("Creating client...")
    cos = ibm_boto3.client('s3',
                    ibm_api_key_id=api_key,
                    ibm_service_instance_id=service_instance_id,
                    ibm_auth_endpoint=auth_endpoint,
                    config=Config(signature_version='oauth'),
                    endpoint_url=service_endpoint)
    return cos

In [2]:
source_credentials = {
  "apikey": "2SozF9MkHGQULJZHZTiZOnidaLSc3zIqr3SkDUC0YD0t",
  "endpoints": "https://cos-service.bluemix.net/endpoints",
  "iam_apikey_description": "Auto generated apikey during resource-key operation for Instance - crn:v1:bluemix:public:cloud-object-storage:global:a/db0d062d2b4c0836e18618a5222d8068:22e3b946-6154-4032-8e8f-7cfb0b429602::",
  "iam_apikey_name": "auto-generated-apikey-d7f66b84-8aee-41fa-afbf-b74e33124126",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Manager",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/db0d062d2b4c0836e18618a5222d8068::serviceid:ServiceId-4a452452-14c8-428a-b1c2-aa910d8f9244",
  "resource_instance_id": "crn:v1:bluemix:public:cloud-object-storage:global:a/db0d062d2b4c0836e18618a5222d8068:22e3b946-6154-4032-8e8f-7cfb0b429602::"
}
target_credentials = {
 "apikey": "rQUuIlpxEbMVzrtkvTJecWmmQex9pCqel2wbTyvFHzZV",
 "endpoints": "https://cos-service.bluemix.net/endpoints",
 "iam_apikey_description": "Auto generated apikey during resource-key operation for Instance - crn:v1:bluemix:public:cloud-object-storage:global:a/03f1fc5804a2a6567416b6c17ca5b4d9:e7cbda72-e6e1-47c6-aee6-f7b661bcc9f3::",
 "iam_apikey_name": "auto-generated-apikey-8ab30dfe-41f6-42e8-8ac9-da8b58a3770d",
 "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
 "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/03f1fc5804a2a6567416b6c17ca5b4d9::serviceid:ServiceId-f484b35f-05d5-4e6f-9225-f490498fea97",
 "resource_instance_id": "crn:v1:bluemix:public:cloud-object-storage:global:a/03f1fc5804a2a6567416b6c17ca5b4d9:e7cbda72-e6e1-47c6-aee6-f7b661bcc9f3::"
}

In [8]:
cos_source = cos_client(source_credentials)

Connecting to COS...
Creating client...



#Call COS to list current buckets
response = cos_source.list_buckets()

#Get a list of all bucket names from the response
buckets = [bucket['Name'] for bucket in response['Buckets']]

#Print out the bucket list
print("Current Bucket List:")
print(json.dumps(buckets, indent=2))
print("---")
#result = [bucket for bucket in buckets if 'cos-bucket-sample-' in bucket]

Current Bucket List:
[
  "click",
  "data-prod",
  "output-prod",
  "pyml",
  "pyml-output",
  "streams71aa690a314849ce9b242fd1022a028e",
  "streamsqaf9d8b40caeeb448fae73b536e83caac5",
  "test04",
  "testproject29e5abba386c7401aa6835178d3214c4b",
  "testproject3cbc0aba6c1904cc1a0e24cc5e6794c29",
  "testprojecteea700399cb0499997fbfd59da83aeb2"
]
---

In [42]:
def get_object_names(cos_client, bucket_name, prefix="", max=10000):
    all_objects = []
    response = None
    n=0
    cnt=0
    size=0
    nextMarker=''
    while response == None or response.get('IsTruncated',False):
        n += 1
        #%time 
        response = cos_client.list_objects(Bucket=bucket_name, MaxKeys=10000, Prefix="CLICKAGGPROD", Marker=nextMarker)
        if response and len(response['Contents']) > 0:
            objects = [object['Key'] for object in response['Contents']]
            all_objects.extend(objects)
            cnt += len(response['Contents'])
            size += sum(int(c['Size']) for c in response['Contents'])
            nextMarker = response.get('NextMarker')
            #print("Iteration:", n, "=" * 20)
            #print("Number of objects:", cnt)
            #print("Size of all objects:", size)
            #print("First object:", objects[0])
            #print("Last object:", nextMarker)
            if n == 200: break

    print("Final Iterations:", n, "*" * 20)
    if len(all_objects) > 0:
        print("Number of objects:", cnt, len(all_objects))
        print("Size of all objects:", size)
        print("First object:", all_objects[0])
        print("Last object:", all_objects[-1])
        
    return all_objects

print("Number of objects:", cnt, len(all_objects))
print("Size of all objects:", size)
print("First object:", all_objects[0])
print("Last object:", all_objects[-1])
Number of objects: 25361 25361
Size of all objects: 37175599
First object: CLICKAGGPROD/YEAR=2018/MONTH=01/DAY=14/HOUR=05/prod-20180117_170223.parq
Last object: CLICKAGGPROD/YEAR=2018/MONTH=1/DAY=18/HOUR=9/prod-20180118_114304.parq

ob1 = cos_source.get_object(Bucket='click', Key='CLICKAGGPROD/YEAR=2018/MONTH=01/DAY=14/HOUR=05/prod-20180117_170223.parq')
{'AcceptRanges': 'bytes',
 'Body': <ibm_botocore.response.StreamingBody at 0x7fea6031e828>,
 'ContentLength': 1311,
 'ContentType': 'application/octet-stream',
 'ETag': '"c9fbbbaf477a9e8105f5dc78adbd3c1a"',
 'LastModified': datetime.datetime(2018, 1, 17, 17, 3, 5, tzinfo=tzutc()),
 'Metadata': {},
 'ResponseMetadata': {'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '1311',
   'content-type': 'application/octet-stream',
   'date': 'Fri, 19 Jan 2018 01:28:54 GMT',
   'etag': '"c9fbbbaf477a9e8105f5dc78adbd3c1a"',
   'last-modified': 'Wed, 17 Jan 2018 17:03:05 GMT',
   'server': 'Cleversafe/3.12.1.28',
   'x-amz-request-id': 'ce1b3e1b-d24b-42e7-be97-a142d0636a7d',
   'x-clv-request-id': 'ce1b3e1b-d24b-42e7-be97-a142d0636a7d',
   'x-clv-s3-version': '2.5'},
  'HTTPStatusCode': 200,
  'HostId': '',
  'RequestId': 'ce1b3e1b-d24b-42e7-be97-a142d0636a7d',
  'RetryAttempts': 1}}

#ret = cos_source.put_object(Bucket='test04', Key='CLICKAGGPROD/YEAR=2018/MONTH=01/DAY=14/HOUR=05/prod-20180117_170223-COPY.parq', Body=ob1['Body'])
ret = cos_source.upload_fileobj(Bucket='test04', Key='CLICKAGGPROD/YEAR=2018/MONTH=01/DAY=14/HOUR=05/prod-20180117_170223-COPY.parq', Fileobj=ob1['Body'])

In [52]:
cos_target = cos_client(target_credentials)

Connecting to COS...
Creating client...


In [None]:
all_objects = get_object_names(cos_client, bucket_name, prefix="", max=10000):

In [55]:
for n, obj_name in enumerate(all_objects):
    if n < 1000: print('.', end="")
    cos_target.upload_fileobj(Bucket='raanon-demo-bucket', Key=obj_name, 
             Fileobj=cos_source.get_object(Bucket='click', Key=obj_name)['Body']
                             )
    if ((n+1) % 1000) == 0: print(n+1, end=" ")

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [56]:
print(n)

25360


In [42]:
def copy_objects(cos_source, source_bucket, cos_target, target_bucket, prefix="", max=10000):
    response = None
    n=0
    cnt=0
    size=0
    nextMarker=''
    while response == None or response.get('IsTruncated',False):
        n += 1
        response = cos_client.list_objects(Bucket=bucket_name, MaxKeys=10000, Prefix="CLICKAGGPROD", Marker=nextMarker)
        if response and len(response['Contents']) > 0:
            objects = [object['Key'] for object in response['Contents']]
            cnt += len(response['Contents'])
            size += sum(int(c['Size']) for c in response['Contents'])
            nextMarker = response.get('NextMarker')
            #print("Iteration:", n, "=" * 20)
            #print("Number of objects:", cnt)
            #print("Size of all objects:", size)
            #print("First object:", objects[0])
            #print("Last object:", nextMarker)

    print("Final Iterations:", n, "*" * 20)
    if len(all_objects) > 0:
        print("Number of objects:", cnt, len(all_objects))
        print("Size of all objects:", size)
        print("First object:", all_objects[0])
        print("Last object:", all_objects[-1])
        
    return all_objects