In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import boto3
import base64
from io import BytesIO
from PIL import Image
import os
from dotenv import load_dotenv, find_dotenv

In [2]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
# Encontra o arquivo .env no diretório pai
dotenv_path = find_dotenv()

# Carrega as variáveis de ambiente do arquivo .env encontrado
load_dotenv(dotenv_path)

True

In [3]:
dataset_peoples = pd.read_csv('../data/peoples.csv')
dataset_pets = pd.read_csv('../data/dataset_union.csv')

In [4]:
uuids_peoples = dataset_peoples['ID'].to_list()
ids_pets = dataset_pets['ID'].to_list()

In [5]:
from collections import namedtuple

Pet = namedtuple('Pet', ['id', 'animal_name', 'animal_gender', 'breed_name', 'borough', 'zip_code', 'type', 'image'])

In [6]:
print(len(dataset_pets))

134291


In [7]:
dict_people_pet = {}

# garantindo que todo usuário terá um animal
for index, id in tqdm(enumerate(uuids_peoples), total=len(uuids_peoples)):
    pet = dataset_pets.loc[dataset_pets.index[index]]
    dict_people_pet[f"{id}"] = [Pet(id = pet['ID'], animal_name = pet['AnimalName'], animal_gender = pet['AnimalGender'], breed_name = pet['BreedName'], borough = pet['Borough'],  zip_code = str(pet['ZipCode']), type = pet['Type'], image = pet['Image'])]
    # if index in dataset_pets.index:
    #     dataset_pets = dataset_pets.drop(index)

100%|██████████| 24500/24500 [00:00<00:00, 30904.73it/s]


In [8]:
count_pets = len(ids_pets)
count_users = len(uuids_peoples)

count_pets_distributeds = 0

dict_people_pet_final = {}

with tqdm(total=count_pets, desc=f"Handing out pets...") as inner_tqdm:
    while count_pets_distributeds < count_pets and len(uuids_peoples) > 0:
        people_random = np.random.choice(uuids_peoples)
        quantity_pets = np.random.randint(1, 9)

        # Gera aleatoriamente uma lista de índices de animais
        pet_indices = np.random.choice(dataset_pets.index, size=quantity_pets, replace=False)

        for pet_index in pet_indices:
            pet = dataset_pets.loc[pet_index]
            dict_people_pet[people_random].append(Pet(id=pet['ID'], animal_name=pet['AnimalName'], animal_gender=pet['AnimalGender'], breed_name=pet['BreedName'], borough=pet['Borough'], zip_code=str(pet['ZipCode']), type=pet['Type'], image=pet['Image']))

        # Cria um novo dataset com os animais que ainda não foram distribuídos, excluindo os animais distribuídos
        dataset_pets = dataset_pets.drop(index=pet_indices)
        
        inner_tqdm.update(quantity_pets)
        count_pets_distributeds += quantity_pets
        dict_people_pet_final[people_random] = dict_people_pet.pop(people_random)
        uuids_peoples.remove(people_random)

Handing out pets...:  81%|████████▏ | 109347/134291 [03:03<00:41, 594.84it/s] 


In [9]:
session = boto3.Session(
    aws_access_key_id       = os.getenv('AWS_ACCESS_KEY_ID'),  
    aws_secret_access_key   = os.getenv('AWS_SECRET_ACCESS_KEY'),
    region_name             = os.getenv('AWS_DEFAULT_REGION'), 
    aws_session_token       = os.getenv('AWS_ACCESS_TOKEN')
)

In [10]:
s3 = session.resource('s3')

In [11]:
def directory_exists(bucket_name, directory_name):
    try:
        s3.head_object(Bucket=bucket_name, Key=directory_name)
        return True
    except:
        return False

In [14]:
import os

file_list = []
dir_atual = os.getcwd()
dir_destino = os.path.abspath(f'{os.pardir}/inteligencia_artificial/train')

os.chdir(dir_destino)

arquivos_jpg = [f for f in os.listdir() if f.endswith('.jpg')]
fotos = set(arquivos_jpg)

In [15]:
len(fotos)

10222

In [None]:
bucket_name = 'pilha-nuvem-tcc-sptech-bucket'

for user_id, pets in tqdm(dict_people_pet_final.items()):
    user_folder = f"train/{user_id}"
    if not directory_exists(bucket_name = bucket_name, directory_name = user_folder):
        s3.Bucket(bucket_name).put_object(Key = user_folder + "/")
        for pet in pets:
            if pet.image:
                name_image = pet.image
                img = Image.open(BytesIO(name_image))
                imagem_bytes = BytesIO()
                img.save(imagem_bytes, format='JPEG')
                imagem_bytes.seek(0)
                s3.Bucket(bucket_name).put_object(Key=f"{user_folder}/{pet.id}/{str(pet.animal_name).lower().strip()}_pet_image.jpg", Body=imagem_bytes)
