In [1]:
import requests
import pandas as pd
from xml.etree import ElementTree

Parse Public File to Extract All

In [2]:

bucket_name = 'urbanriverrangers'
base_url = f'https://{bucket_name}.s3.amazonaws.com'

    
def list_objects(bucket_url, continuation_token=None):
    params = {'list-type': '2'}  # Use list-type=2 for continuation token support
    if continuation_token:
        params['continuation-token'] = continuation_token

    response = requests.get(bucket_url, params=params)
    response.raise_for_status()  # Ensure we get a successful response

    return response.text

def parse_objects(xml_response):
    root = ElementTree.fromstring(xml_response)
    # Define the namespace
    namespace = {'ns': 'http://s3.amazonaws.com/doc/2006-03-01/'}
    data = []
    for item in root.findall('.//ns:Contents', namespace):
        row = {
            'Key': item.find('ns:Key', namespace).text,
            'ETag': item.find('ns:ETag', namespace).text,
            'LastModified': item.find('ns:LastModified', namespace).text
        }
        data.append(row)

    next_token = root.find('.//{http://s3.amazonaws.com/doc/2006-03-01/}NextContinuationToken')
    return data, next_token.text if next_token is not None else None

continuation_token = None
all_data = []

while True:
    xml_response = list_objects(base_url, continuation_token)
    data, continuation_token = parse_objects(xml_response)
    all_data.extend(data)
    
    if continuation_token is None:
        break

# Store keys in a pandas DataFrame
df = pd.DataFrame(all_data, columns=['Key','ETag','LastModified'])

# Save the DataFrame to a CSV file
csv_file = 's3_keys.csv'
df.to_csv(csv_file, index=False)

print(f"Keys have been saved to {csv_file}")


Keys have been saved to s3_keys.csv


In [3]:
# Find the unique deployment names if not logged from sd uploaded

# Specify Deployment Year of interest
deploy_year = '2024'

# Read in the master list of all s3_keys
df_a = pd.read_csv("s3_keys.csv")

unique_deploy = set()

for index, row in df_a.iterrows():
    key = row["Key"]
    key_parts = key.split('/')

    if len(key_parts) > 3 and key_parts[1] == deploy_year:
        unique_deploy.add(key_parts[2])

list_deploys = [deploymentID for deploymentID in unique_deploy]
list_deploys.sort()

for deploymentID in list_deploys:
    print(deploymentID)

# 2024-05-25_WM_Boardwalk_F
# 2024-05-25_WM_Boardwalk_G
# 2024-06-03_UR011
# 2024-06-08_WM_Boardwalk_D

## Exported 2024-07-11: 
# 2024-06-29_WM_Boardwalk_D
# 2024-06-29_WM_Boardwalk_G
# 2024-07-06_WM_Boardwalk_G

## Explorted 2024-07-21
# 2024-07-20_WM_Boardwalk_D_UR010
# 2024-07-20_WM_Boardwalk_G_UR004
# 2024-07-20_WM_DIS_A_UR016
# 2024-07-20_WM_DIS_B_UR007

2024-01-30_Learnin_platform_camera_test
2024-01-30_prologis_02
2024-01-31_LearningPlatformBeaver
2024-02-01_16-41-42
2024-02-01_Bubbly_003
2024-02-01_Bubbly_spypoint_garden
2024-02-03_Bubbly003WildMile
2024-02-10_Prologis02WildMile
2024-02-10_Reveal002WildMile
2024-02-24_Prologis
2024-03-06_RevealCamWildMile
2024-03-23_00-38-55
2024-03-23_WildMileNorth
2024-04-13_ZoneD
2024-05-01_GIOG
2024-05-12_Prologis
2024-05-14_TESTDEL2
2024-05-14_TESTDELETE
2024-05-14_test_delete
2024-05-16_WM_Boardwalk_D
2024-05-25_WM_Boardwalk_D
2024-05-25_WM_Boardwalk_F
2024-05-25_WM_Boardwalk_G
2024-06-03_UR011
2024-06-08_WM_Boardwalk_D
2024-06-08_WM_Boardwalk_G
2024-06-29_WM_Boardwalk_D
2024-06-29_WM_Boardwalk_G
2024-07-06_WM_Boardwalk_G
2024-07-20_UR004
2024-07-20_UR010
2024-07-20_WM_Boardwalk_D_UR010
2024-07-20_WM_Boardwalk_G_UR004
2024-07-20_WM_DIS_A_UR016
2024-07-20_WM_DIS_B_UR007
Bubbly_faces_of_fishing


In [None]:
# Step 2: Parse the XML generated in s3_keys (also df) into a media table?
# Alternately, we could do this right after filtering

In [None]:
# Step 3: Grab Exif Information