In [45]:
import boto3
import os
from dotenv import load_dotenv
import json
import re
import pandas as pd

In [46]:
# set variables
load_dotenv()
name = 'lb-luzhang' # bucket name
access_key = os.environ.get("AWS_ACCESS_KEY_ID")
secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
region = os.environ.get("AWS_DEFAULT_REGION")
lbx_API = os.environ.get('LABELBOX_API')

In [47]:
# initiate AWS session 
session = boto3.Session(
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_access_key,
)

s3_resource = session.resource('s3')
bucket = s3_resource.Bucket(name)

print(s3_resource)
print(bucket)

s3.ServiceResource()
s3.Bucket(name='lb-luzhang')


In [48]:
# get corresponding file names
image_list = ["https://"+ name + ".s3.us-east-1.amazonaws.com/" +obj.key for obj in bucket.objects.all() if "imagery/images" in obj.key and not "xml" in obj.key and "png" in obj.key]
xml_list = ["https://"+ name + ".s3.us-east-1.amazonaws.com/" +obj.key for obj in bucket.objects.all() if "imagery/images" in obj.key and "xml" in obj.key]
row_list = [obj.key for obj in bucket.objects.all() if "imagery/images" in obj.key and "xml" in obj.key] 
x_grid  = [row.split("_")[1] for row in row_list]
y_grid  = [re.sub(r'\D','',row.split("_")[2]) for row in row_list]

In [49]:
df_list = [image_list, xml_list, row_list, x_grid, y_grid]
df = pd.DataFrame(df_list).transpose()
df.columns = ("data", "xml_path", "file_name", "x_grid", "y_grid")
df = df[df['xml_path'].notnull()]
df

Unnamed: 0,data,xml_path,file_name,x_grid,y_grid
0,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_0.png.aux.xml,0,0
1,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_1.png.aux.xml,0,1
2,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_10.png.aux.xml,0,10
3,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_11.png.aux.xml,0,11
4,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_12.png.aux.xml,0,12
...,...,...,...,...,...
395,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_9_5.png.aux.xml,9,5
396,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_9_6.png.aux.xml,9,6
397,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_9_7.png.aux.xml,9,7
398,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_9_8.png.aux.xml,9,8


In [52]:
import labelbox as lb
from labelbox.schema.data_row_metadata import DataRowMetadataKind


client = lb.Client(api_key=lbx_API)
dataset = client.create_dataset(name = 'Test')

In [53]:
upload_list = []

for index, row in df.iterrows():

    asset = {
        "row_data": row[0],
        "global_key": row[0],
        "media_type": "IMAGE",
        "metadata_fields": [{"name": "XML", "value": row[1]}, {"name": "x_grid", "value": row[3]}, {"name": "y_grid", "value": row[4]}]
    }
    
    upload_list.append(asset)


In [54]:
task = dataset.create_data_rows(upload_list)
task.wait_till_done()
print(task.errors)

None
