### Download Dataset

In [1]:
# !kaggle datasets download -d paramaggarwal/fashion-product-images-small
# !unzip fashion-product-images-small.zip

### Importação das bibliotecas

In [4]:
import os
import itertools
from pathlib import Path
from shutil import move
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import tensorflow as tf
import tensorflow_hub as hub
from tqdm import tqdm

# Extensão do tqdm para DataFrames
tqdm.pandas()

print("TF version:", tf.__version__)
print("Hub version:", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

TF version: 2.18.0
Hub version: 0.16.1
GPU is available


### Preparação de diretórios e organização das imagens

In [None]:
# Criar pasta principal
data_dir = 'Fashion_data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# Ler o CSV
df = pd.read_csv('styles.csv', usecols=['id', 'masterCategory'])
df['id'] = df['id'].astype(str)

# Mover imagens para subpastas de categoria
images_source = 'images'
categories_dir = os.path.join(data_dir, 'categories')

# Criar diretório de categorias se não existir
os.makedirs(categories_dir, exist_ok=True)

# Processar cada imagem
all_images = os.listdir(images_source)
moved_count = 0

for image in tqdm(all_images):
    image_id = image.split('.')[0]
    category = df.loc[df['id'] == image_id, 'masterCategory'].values
    
    if len(category) == 0:
        continue  # Pula se a categoria não for encontrada
    
    category = category[0]
    category_path = os.path.join(categories_dir, category)
    
    # Criar subpasta da categoria se necessário
    os.makedirs(category_path, exist_ok=True)
    
    # Mover a imagem
    src = os.path.join(images_source, image)
    dst = os.path.join(category_path, image)
    move(src, dst)
    moved_count += 1

print(f'Total de imagens movidas: {moved_count}')

### Definição de parâmetros e modelo

In [5]:
MODULE_HANDLE = "https://tfhub.dev/google/bit/m-r50x3/1"
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
N_FEATURES = 256

# Classe de encapsulamento do hub.KerasLayer
class HubWrapper(tf.keras.layers.Layer):
    def __init__(self, module_handle, trainable=False, **kwargs):
        super().__init__(**kwargs)
        # Define uma camada do TF Hub
        self.hub_layer = hub.KerasLayer(module_handle, trainable=trainable)

    def call(self, inputs):
        # Encaminha os dados de entrada para a camada do Hub
        return self.hub_layer(inputs)

# Construção do modelo Sequential
model = tf.keras.Sequential([
    # Camada de entrada
    tf.keras.layers.Input(shape=IMAGE_SIZE + (3,)),
    
    # Envolve a camada do TensorFlow Hub no wrapper personalizado
    HubWrapper(MODULE_HANDLE, trainable=False),
    
    # Camada de Dropout para reduzir overfitting
    tf.keras.layers.Dropout(rate=0.2),
    
    # Camada densa intermediária
    tf.keras.layers.Dense(
        N_FEATURES,
        kernel_regularizer=tf.keras.regularizers.l2(0.0001)
    ),
    tf.keras.layers.Dropout(rate=0.2),
    
    # Camada de saída com 7 neurônios, pois temos 7 classes
    tf.keras.layers.Dense(
        7,  # Ajustado para 7 classes
        kernel_regularizer=tf.keras.regularizers.l2(0.0001)
    )
])

# Constrói formalmente o modelo (opcional, mas útil para summary)
model.build((None,) + IMAGE_SIZE + (3,))
model.summary()

I0000 00:00:1740083226.680205   16148 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2795 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5


### Preparação dos geradores de dados(treino/validação)

In [6]:
data_dir = 'Fashion_data/categories'

# Normalização e separação (20% para validação)
datagen_kwargs = dict(rescale=1./255, validation_split=0.20)

# Parâmetros para o fluxo de dados
dataflow_kwargs = dict(
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    interpolation="bilinear"
)

# Gerador de dados para o conjunto de validação
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**datagen_kwargs)
valid_generator = valid_datagen.flow_from_directory(
    data_dir,
    subset="validation",
    shuffle=False,
    **dataflow_kwargs
)

# Gerador de dados para treinamento
# Define se vamos utilizar data augmentation
do_data_augmentation = False
if do_data_augmentation:
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=40,
        horizontal_flip=True,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        **datagen_kwargs
    )
else:
    train_datagen = valid_datagen

train_generator = train_datagen.flow_from_directory(
    data_dir,
    subset="training",
    shuffle=True,
    **dataflow_kwargs
)

Found 8886 images belonging to 7 classes.
Found 35555 images belonging to 7 classes.


### Compilação e treinamento do modelo

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

EPOCHS = 10

# Treina o modelo
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // BATCH_SIZE,
    epochs=EPOCHS
)

Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1740083277.711043   16908 service.cc:148] XLA service 0x7f004c00b9d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1740083277.711214   16908 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2025-02-20 17:27:58.194533: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1740083280.349265   16908 cuda_dnn.cc:529] Loaded cuDNN version 90600
2025-02-20 17:28:01.445832: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Constant folding an instruction is taking > 1s:

  %multiply.277 = f32[32,224,224,3]{3,2,1,0} multiply(f32[32,224,224,3]{3,2,1,0} %broadcast.276, f32[32,224,224,3]{3,2,1,0} %constant.275), metadata={op_type="Mul" op_name="sequential_1/hub_wrapper_1/keras_layer/StatefulPartitionedCall/StatefulPartitionedCall/

[1m   3/1111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:44:02[0m 35s/step - accuracy: 0.1059 - loss: 3.8303

In [2]:
print("GPUs disponíveis:", tf.config.list_physical_devices('GPU'))

GPUs disponíveis: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
