In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import boto3
import base64
from io import BytesIO
from PIL import Image
import os
from dotenv import load_dotenv, find_dotenv

In [None]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
# Encontra o arquivo .env no diretório pai
dotenv_path = find_dotenv()

# Carrega as variáveis de ambiente do arquivo .env encontrado
load_dotenv(dotenv_path)

In [None]:
dataset_peoples = pd.read_csv('../data/peoples.csv')
dataset_pets = pd.read_csv('../data/dataset_union.csv')

In [None]:
uuids_peoples = dataset_peoples['ID'].to_list()
ids_pets = dataset_pets['ID'].to_list()

In [None]:
from collections import namedtuple

Pet = namedtuple('Pet', ['id', 'animal_name', 'animal_gender', 'animal_birth_month', 'breed_name', 'borough', 'zip_code', 'type', 'image'])

In [None]:
print(len(dataset_pets))

In [None]:
dict_people_pet = {}

# garantindo que todo usuário terá um animal
for index, id in tqdm(enumerate(uuids_peoples), total=len(uuids_peoples)):
    pet = dataset_pets.loc[dataset_pets.index[index]]
    dict_people_pet[f"{id}"] = [Pet(id = pet['ID'], animal_name = pet['AnimalName'], animal_gender = pet['AnimalGender'], animal_birth_month = pet['AnimalBirthMonth'], breed_name = pet['BreedName'], borough = pet['Borough'],  zip_code = str(pet['ZipCode']), type = pet['Type'], image = pet['Image'])]
    # if index in dataset_pets.index:
    #     dataset_pets = dataset_pets.drop(index)

In [None]:
uuids_peoples = dataset_peoples['ID'].to_list()


In [None]:
# distribuir aleatoriamente os animais para os usuarios

count_pets = len(ids_pets)
count_users = len(uuids_peoples)


count_pets_distributeds = 0
tqdm_instance = tqdm(total=count_pets)

dict_people_pet_final = {}

count = 0


while count < 20:
    people_random = np.random.choice(uuids_peoples)
    quantity_pets = np.random.randint(1, 9)

    for _ in range(quantity_pets):
        ids_pets = dataset_pets['ID'].to_list()
        random_index = np.random.choice(ids_pets)
        if random_index in dataset_pets.index:
            pet = dataset_pets.loc[random_index]
            dict_people_pet[people_random].append(Pet(id = pet['ID'], animal_name = pet['AnimalName'], animal_gender = pet['AnimalGender'], animal_birth_month = pet['AnimalBirthMonth'], breed_name = pet['BreedName'], borough = pet['Borough'],  zip_code = str(pet['ZipCode']), type = pet['Type'], image = pet['Image']))
            dataset_pets = dataset_pets.drop(random_index)
            tqdm_instance.update(1)

    dict_people_pet_final[people_random] = dict_people_pet.pop(people_random)
    uuids_peoples.remove(people_random)

    count_pets_distributeds += quantity_pets
    count += 1

In [None]:
dict_people_pet_test = {}

for user_id, pets in dict_people_pet_final.items():
    if len(pets) > 1:
        dict_people_pet_test[user_id] = pets

In [None]:
session = boto3.Session(
    aws_access_key_id       = os.getenv('AWS_ACCESS_KEY_ID'),  
    aws_secret_access_key   = os.getenv('AWS_SECRET_ACCESS_KEY'),
    region_name             = os.getenv('AWS_DEFAULT_REGION'), 
    aws_session_token       = os.getenv('AWS_ACCESS_TOKEN')
)

In [None]:
s3 = session.resource('s3')

In [None]:
def directory_exists(bucket_name, directory_name):
    try:
        s3.head_object(Bucket=bucket_name, Key=directory_name)
        return True
    except:
        return False
    
def is_base64(text):
    try:
        base64.b64decode(text)
        return True
    except:
        return False

In [None]:
bucket_name = 'pilha-nuvem-tcc-sptech-bucket'

for user_id, pets in tqdm(dict_people_pet.items()):
    user_folder = f"train/{user_id}"
    if not directory_exists(bucket_name = bucket_name, directory_name = user_folder):
        s3.Bucket(bucket_name).put_object(Key = user_folder + "/")
        for pet in pets:
            if is_base64(pet.image):
                img_data = base64.b64decode(pet.image)
                img = Image.open(BytesIO(img_data))
                imagem_bytes = BytesIO()
                img.save(imagem_bytes, format='JPEG')
                imagem_bytes.seek(0)
                s3.Bucket(bucket_name).put_object(Key=f"{user_folder}/{pet.id}/{str(pet.animal_name).lower().strip()}_pet_image.jpg", Body=imagem_bytes)
