In [None]:
 !pip3 install boto3

In [2]:
import boto3
from boto3.resources.collection import ResourceCollection
from datetime import datetime, timedelta
import pytz

In [3]:
session = boto3.Session(
    aws_access_key_id="",
    aws_secret_access_key="",
    aws_session_token=""
)

s3_client = session.client("s3")

bucket_raw = session.resource("s3").Bucket("bucket-biosentinel-raw")
bucket_raw_name = "bucket-biosentinel-raw"

bucket_trusted = session.resource("s3").Bucket("bucket-biosentinel-trusted")
bucket_trusted_name = "bucket-biosentinel-trusted"

In [5]:
def filter_objects(bucket_name: str, sensor_to_move: str):
    print("Filtering objects")
    objects_found = []
    continuation_token = None

    while True:
        if continuation_token:
            response = s3_client.list_objects_v2(
                Bucket=bucket_name,
                ContinuationToken=continuation_token
            )
        else:
            response = s3_client.list_objects_v2(Bucket=bucket_name)

        if "Contents" in response:
            for obj in response["Contents"]:
                if sensor_to_move in obj["Key"]:
                    objects_found.append(obj["Key"])

        if response.get("IsTruncated"):  # Check if there are more pages of results
            continuation_token = response.get("NextContinuationToken")
        else:
            break

    if not objects_found:
        print(f"No objects were found with {filter}")
    
    print(objects_found)
    return objects_found

def exists_folder(bucket: ResourceCollection, path: str):
    objects_in_path = list(bucket.objects.filter(Prefix=path))
    return True if objects_in_path else False

# Função para filtrar objetos por intervalo de tempo
def filter_recent_objects(bucket_name: str, sensor_to_move: str, minutes_interval: int = 10):
    print(f"Filtering objects for sensor {sensor_to_move} from the last {minutes_interval} minutes")
    objects_found = []
    continuation_token = None

    # Define o intervalo de tempo dos últimos 10 minutos
    end_time = datetime.now(pytz.UTC)
    start_time = end_time - timedelta(minutes=minutes_interval)

    while True:
        # Lista os objetos do bucket
        if continuation_token:
            response = s3_client.list_objects_v2(
                Bucket=bucket_name,
                ContinuationToken=continuation_token
            )
        else:
            response = s3_client.list_objects_v2(Bucket=bucket_name)

        if "Contents" in response:
            for obj in response["Contents"]:
                # Verifica se o objeto contém o sensor e está dentro do intervalo de 10 minutos
                if sensor_to_move in obj["Key"] and start_time <= obj["LastModified"] <= end_time:
                    objects_found.append(obj["Key"])

        if response.get("IsTruncated"):  # Verifica se há mais páginas de resultados
            continuation_token = response.get("NextContinuationToken")
        else:
            break

    if not objects_found:
        print(f"No objects were found in the last {minutes_interval} minutes for sensor {sensor_to_move}")
    
    print(f"Objects found for {sensor_to_move}: {objects_found}")
    return objects_found

# Função para copiar objetos recentes de todos os sensores
def copy_all_sensors(minutes_interval: int = 10):
    sensors = {
        "geolocation": "geolocation/",
        "heart_rate-electric_pulse": "heart_rate-electric_pulse/",
        "umid-temp": "umid-temp/",
        "presence": "presence/",
        "presenca": "presence/",
        "sound": "sound/",
        "som": "sound/",
        "temp-pressure": "temp-pressure/"
    }

    for sensor_name, destiny_path in sensors.items():
        print(f"\nProcessing sensor: {sensor_name}")
        objects_to_copy = filter_recent_objects(bucket_raw_name, sensor_name, minutes_interval)

        for object_key in objects_to_copy:
            specific_destiny_path = f"{destiny_path}"
            destination_key = f"{specific_destiny_path}{object_key.split('/')[-1]}"

            # Verifica se o arquivo já existe no destino para evitar duplicação
            existing_files = s3_client.list_objects_v2(
                Bucket=bucket_trusted_name,
                Prefix=destination_key
            )
            if 'Contents' in existing_files:
                print(f"{destination_key} already exists in bucket_trusted, skipping copy.")
                continue

            # Cria o diretório no destino, se necessário
            if not exists_folder(bucket_trusted, specific_destiny_path):
                s3_client.put_object(
                    Bucket=bucket_trusted_name,
                    Key=f"{specific_destiny_path}"
                )
                print(f"{specific_destiny_path} created in bucket_trusted")

            source = {"Bucket": bucket_raw_name, "Key": object_key}
            s3_client.copy_object(CopySource=source, Bucket=bucket_trusted_name, Key=destination_key)
            print(f"{object_key} copied to {destination_key} in bucket_trusted")

# Exemplo de chamada para copiar todos os sensores a cada 10 minutos
copy_all_sensors(10)

Filtering objects
['presence_2024-09-10_00-00-00_1.json', 'presence_2024-09-10_00-00-10_1.json', 'presence_2024-09-10_00-00-20_1.json', 'presence_2024-09-10_00-00-30_1.json', 'presence_2024-09-10_00-00-40_1.json', 'presence_2024-09-10_00-00-50_1.json', 'presence_2024-09-10_00-01-00_1.json', 'presence_2024-09-10_00-01-10_1.json', 'presence_2024-09-10_00-01-20_1.json', 'presence_2024-09-10_00-01-30_1.json', 'presence_2024-09-10_00-01-40_1.json', 'presence_2024-09-10_00-01-50_1.json', 'presence_2024-09-10_00-02-00_1.json', 'presence_2024-09-10_00-02-10_1.json', 'presence_2024-09-10_00-02-20_1.json', 'presence_2024-09-10_00-02-30_1.json', 'presence_2024-09-10_00-02-40_1.json', 'presence_2024-09-10_00-02-50_1.json', 'presence_2024-09-10_00-03-00_1.json', 'presence_2024-09-10_00-03-10_1.json', 'presence_2024-09-10_00-03-20_1.json', 'presence_2024-09-10_00-03-30_1.json', 'presence_2024-09-10_00-03-40_1.json', 'presence_2024-09-10_00-03-50_1.json', 'presence_2024-09-10_00-04-00_1.json', 'prese