###  This Script Uses the LabelBox SDK and the AWS Python SDK to bulk import S3 objects to Labelbox

In [1]:
import boto3
import os
from dotenv import load_dotenv
import json

In [3]:
# set variables
load_dotenv()
name = 'lb-luzhang' # bucket name
access_key = os.environ.get("AWS_ACCESS_KEY_ID")
secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
region = os.environ.get("AWS_DEFAULT_REGION")
lbx_API = os.environ.get('LABELBOX_API')

In [5]:
# initiate AWS session 
session = boto3.Session(
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_access_key,
)

s3_resource = session.resource('s3')
bucket = s3_resource.Bucket(name)

print(s3_resource)
print(bucket)

s3.ServiceResource()
s3.Bucket(name='lb-luzhang')


In [26]:
# extract all object keys (file names), convert to json format

object_keys = ["https://"+ name + ".s3.us-east-1.amazonaws.com/"+obj.key for obj in bucket.objects.all() if ".png" in obj.key]
output_dicts = [{'data': value} for value in object_keys][:300]


In [27]:
# option 1: create json / Upload to Labelbox

file_path = "./s3_output_crowd.json"

# Write the data to the JSON file
with open(file_path, "w") as json_file:
    json.dump(output_dicts, json_file)


In [18]:
# option 2: Publish via SDK

import labelbox as lb
import datetime

client = lb.Client(api_key=lbx_API)
dataset = client.create_dataset(name="S3_Bulk")

In [23]:
object_keys = ["https://"+ name + ".s3.us-east-1.amazonaws.com/"+obj.key for obj in bucket.objects.all() if ".png" in obj.key]
sdk_dict = [{'row_data': value} for value in object_keys]

task = dataset.create_data_rows(sdk_dict)

task.wait_till_done()
print(task.errors)

None
