In [2]:
import boto3
import os
from dotenv import load_dotenv
import json
import re
import pandas as pd

In [3]:
# set variables
load_dotenv()
name = 'lb-luzhang' # bucket name
access_key = os.environ.get("AWS_ACCESS_KEY_ID")
secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
region = os.environ.get("AWS_DEFAULT_REGION")
lbx_API = os.environ.get('LABELBOX_API')

In [4]:
# initiate AWS session 
session = boto3.Session(
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_access_key,
)

s3_resource = session.resource('s3')
bucket = s3_resource.Bucket(name)

print(s3_resource)
print(bucket)

s3.ServiceResource()
s3.Bucket(name='lb-luzhang')


In [5]:
# get corresponding file names
image_list = ["https://"+ name + ".s3.us-east-1.amazonaws.com/" +obj.key for obj in bucket.objects.all() if "imagery/images" in obj.key and not "xml" in obj.key and "png" in obj.key]
xml_list = ["https://"+ name + ".s3.us-east-1.amazonaws.com/" +obj.key for obj in bucket.objects.all() if "imagery/images" in obj.key and "xml" in obj.key]
row_list = [obj.key for obj in bucket.objects.all() if "imagery/images" in obj.key and "xml" in obj.key] 
x_grid  = [row.split("_")[1] for row in row_list]
y_grid  = [re.sub(r'\D','',row.split("_")[2]) for row in row_list]

In [6]:
df_list = [image_list, xml_list, row_list, x_grid, y_grid]
df = pd.DataFrame(df_list).transpose()
df.columns = ("data", "xml_path", "file_name", "x_grid", "y_grid")
df

Unnamed: 0,data,xml_path,file_name,x_grid,y_grid
0,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_0.png.aux.xml,0,0
1,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_1.png.aux.xml,0,1
2,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_10.png.aux.xml,0,10
3,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_11.png.aux.xml,0,11
4,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_2.png.aux.xml,0,2
5,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_3.png.aux.xml,0,3
6,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_4.png.aux.xml,0,4
7,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_5.png.aux.xml,0,5
8,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_6.png.aux.xml,0,6
9,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,https://lb-luzhang.s3.us-east-1.amazonaws.com/...,imagery/images/output_0_7.png.aux.xml,0,7


In [8]:
import labelbox as lb

client = lb.Client(api_key=lbx_API)
dataset = client.create_dataset(name="S3_NAIP")

In [10]:
for index, row in df.iterrows():

    asset = {
        "row_data": row[0],
        "global_key": row[0],
        "media_type": "IMAGE",
        "metadata_fields": [{"name": "XML", "value": row[1]}, {"name": "x", "value": row[3]}, {"name": "y", "value": row[4]}]
    }

[{'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_0.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_1.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_10.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_11.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_2.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_3.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_4.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_5.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_6.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.com/imagery/images/output_0_7.png'},
 {'row_data': 'https://lb-luzhang.s3.us-east-1.amazonaws.c