In [1]:
import pandas as pd
import os
import shutil

In [2]:
all_images = set(os.listdir("raw_data/images"))

In [3]:
def write_folder(folder_name, df):
    for file_name in df['file_name'].dropna().unique():
        if file_name in all_images:
            src_path = os.path.join('raw_data/images', file_name)
            dst_path = os.path.join(f'manipulated_data/images/{folder_name}', file_name)

            # Copia a imagem para a pasta manipulada
            shutil.copy2(src_path, dst_path)

            # Remove da lista para evitar duplicatas
            all_images.remove(file_name)

In [4]:
df = pd.read_csv("manipulated_data/initial_filtered_clothes.csv")

In [5]:
animal_df = df[df['Details'] == 'Animal print']
write_folder('animal_print', animal_df)

In [6]:
checkers_df = df[df['Details'] == 'Checkers']
write_folder('checkers', checkers_df)

In [7]:
stripes_df = df[df['Details'] == 'Stripes']
write_folder('stripes', stripes_df)

In [8]:
for category in ["Pattern", "Floral", "Solid"]:
    subset = df[df['Details'] == category]
    valid_subset = subset[subset['file_name'].isin(all_images)].dropna(subset=['file_name'])
    sample_subset = valid_subset.sample(n=1500, random_state=42)
    for file_name in sample_subset['file_name']:
        src_path = os.path.join('raw_data/images', file_name)
        dst_path = os.path.join(f'manipulated_data/images/{category.lower()}', file_name)
        shutil.copy2(src_path, dst_path)
        all_images.remove(file_name)

In [9]:
# Cria um DataFrame apenas com os arquivos restantes
remaining_df = df[df['file_name'].isin(all_images)].copy()
remaining_df.to_csv("remaining_clothes.csv", index=False)