### Lista para lembrar

- Fazer um `pip freeze` para os `requirements.txt`

- usar a pasta `manipulated_data` criado no nosso repositório

- precisa baixar as imagens do *dataset* completo para criar a pasta com as imagens que serão usadas para teste

- precisa do arquivo `used_clothes.csv` do nosso repositório

### Importando bibliotecas

In [11]:
BATCH_SIZE = 64

In [2]:
# Import das bibliotecas (usando PyTorch)

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchvision.datasets.folder import default_loader
from torchvision.datasets import DatasetFolder
from torchvision.datasets import ImageFolder
from PIL import Image
import pandas as pd
import torch.optim as optim
import os
import re
import json

import shutil

In [3]:
# Função equivalente ao create_model()

class CNNModel(nn.Module):
    def __init__(self, num_categories):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.fc1 = nn.Linear(32 * 14 * 14, 128)  # ajustado para entrada 64x64
        self.fc2 = nn.Linear(128, num_categories)

    def forward(self, x):
        x = F.relu(self.conv1(x))      # Conv1 + ReLU
        x = self.pool(x)               # MaxPool1
        x = F.relu(self.conv2(x))      # Conv2 + ReLU
        x = self.pool(x)               # MaxPool2
        x = x.view(-1, 32 * 14 * 14)   # Flatten
        x = F.relu(self.fc1(x))        # Dense1 + ReLU
        x = self.fc2(x)                # Dense2 (Logits)
        return x


In [4]:
# Função equivalente ao fitting()

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

def get_dataloaders(train_dir, test_df, test_dir, batch_size=16):
    train_dataset = datasets.ImageFolder(train_dir, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Dataset personalizado para testar
    class TestDataset(torch.utils.data.Dataset):
        def __init__(self, dataframe, img_dir, transform=None):
            self.dataframe = dataframe
            self.img_dir = img_dir
            self.transform = transform

        def __len__(self):
            return len(self.dataframe)

        def __getitem__(self, idx):
            img_name = self.dataframe.iloc[idx, 0]
            img_path = os.path.join(self.img_dir, img_name)
            image = Image.open(img_path).convert("RGB")
            if self.transform:
                image = self.transform(image)
            return image

    test_dataset = TestDataset(test_df, test_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader, train_dataset.class_to_idx

In [6]:
# Função para treinamento do modelo

def train_model(model, train_loader, num_epochs=5, learning_rate=0.001, device="cuda" if torch.cuda.is_available() else "cpu"):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    history = {"loss": [], "accuracy": []}

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total

        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")
        history["loss"].append(epoch_loss)
        history["accuracy"].append(epoch_acc)

    return model, history

In [7]:
# Funções para salvar o modelo

def save_model(model, model_name="model.pth"):
    torch.save(model.state_dict(), model_name)
    print(f"-> Model saved as {model_name}")

def save_history(history, filename="history.json"):
    with open(filename, "w") as f:
        json.dump(history, f, indent=4)

In [8]:
# Funções em comum com a versão utilizando TensorFlow

def create_train_dict(csv_filename: str):
    df_file = pd.read_csv(csv_filename)

    try:
        df_file.drop("Unnamed: 0", axis=1, inplace=True)
    except:
        pass

    df_file1 = df_file.loc[:, df_file.columns.isin(["file_name", "Details"])]

    rel_dict = {}

    df_file_dict = df_file1.to_dict()

    for i in range(len(df_file_dict["Details"])):
        rel_dict[df_file_dict["file_name"][i]] = df_file_dict["Details"][i]

    return rel_dict, df_file1

def create_folder(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

        print(f"-> Folder {folder_path} created successfully")
    else:
        print(f"-> Folder {folder_path} already exists")

def create_folder_train_dataset(rel_dict: dict, foldername: str, src_path: str = "images"):
    for img, category in rel_dict.items():
        category1 = category.lower().replace("-", "_")
        
        src = os.path.join(src_path, img)
        dest = os.path.join(f"{foldername}/{category1}", img)

        if os.path.exists(src):
            shutil.copy(src, dest)
        else:
            print(f"-> File not found: {img}")

def create_test_dataset(csv_filename: str, default_csv_filename: str = "manipulated_data/initial_filtered_clothes.csv", images_path: str = "images") -> list:

    file_list = pd.read_csv(csv_filename)["file_name"].tolist()
    default_list = pd.read_csv(default_csv_filename)["file_name"].tolist()

    test_clothes_list = []

    for img in os.listdir(images_path):
        if img not in file_list and img in default_list:
            test_clothes_list.append(img)

    return test_clothes_list[:len(file_list)//2]

def create_folder_dataset(test_dataset: list, dest_path: str, src_path: str = "images") -> pd.DataFrame:
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)

        print(f"-> Folder {dest_path} created successfully")
    else:
        print(f"-> Folder {dest_path} already exists")

    for img in test_dataset:
        src = os.path.join(src_path, img)
        dest = os.path.join(dest_path, img)

        if os.path.exists(src):
            shutil.copy(src, dest)
        else:
            print(f"-> File not found: {img}")
    
    return pd.DataFrame({"file_name": test_dataset})

In [9]:
# Nome dos arquivos CSV a serem utilizados

csv_list = []

csv_path = "csv_files"

if os.path.exists("csv_files"):
    for filename in os.listdir("csv_files"):
        if re.match(r"\d+_details_categories\.csv", filename):
            csv_list.append(f"{csv_path}/{filename}")
        else:
            print(f"Arquivo {filename} possui nome que não segue o padrão exigido (num_details_categories.csv")
else:
    print("Caminho para arquivos não existe")

In [None]:
# Loop principal (single-thread)

history_list = []

for csv_file in csv_list:
    train_dict, df_file1 = create_train_dict(csv_file)
    categories_list = [cat.lower().replace("-", "_") for cat in df_file1["Details"].value_counts().keys()]
    num_classes = len(categories_list)

    dataset_train_path = f"dataset_train_{csv_file[0]}"
    dataset_test_path = f"dataset_test_{csv_file[0]}"
    test_dataset = create_test_dataset(csv_file)
    test_dataframe = create_folder_dataset(test_dataset, dataset_test_path)

    train_loader, test_loader, class_map = get_dataloaders(
        dataset_train_path, test_dataframe, dataset_test_path
    )

    model = CNNModel(num_categories=num_classes)
    model, history = train_model(model, train_loader)

    history_list.append(history)
    save_model(model, f"model_{csv_file[0]}.pth")
    save_history(history, f"model_{csv_file[0]}_summary.json")

In [12]:
# Loop principal (multi-thread)

history_list = []

for csv_file in csv_list:
    train_dict, df_file1 = create_train_dict(csv_file)
    categories_list = [cat.lower().replace("-", "_") for cat in df_file1["Details"].value_counts().keys()]
    num_classes = len(categories_list)

    dataset_train_path = f"dataset_train_{csv_file[10]}"
    dataset_test_path = f"dataset_test_{csv_file[10]}"
    test_dataset = create_test_dataset(csv_file)
    test_dataframe = create_folder_dataset(test_dataset, dataset_test_path)

    train_dataset = ImageFolder(root=dataset_train_path, transform=transform)
    class_map = train_dataset.class_to_idx

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

    model = CNNModel(num_categories=num_classes)
    model, history = train_model(model, train_loader)

    history_list.append(history)
    save_model(model, f"model_{csv_file[10]}.pth")
    save_history(history, f"model_{csv_file[10]}_summary.json")

-> Folder dataset_test_2 already exists
Epoch 1/5 - Loss: 0.6699 - Accuracy: 0.5741
Epoch 2/5 - Loss: 0.6200 - Accuracy: 0.6468
Epoch 3/5 - Loss: 0.5769 - Accuracy: 0.6891
Epoch 4/5 - Loss: 0.5259 - Accuracy: 0.7300
Epoch 5/5 - Loss: 0.5111 - Accuracy: 0.7344
-> Model saved as model_2.pth
-> Folder dataset_test_3 already exists
Epoch 1/5 - Loss: 1.0186 - Accuracy: 0.4990
Epoch 2/5 - Loss: 0.9510 - Accuracy: 0.5470
Epoch 3/5 - Loss: 0.9143 - Accuracy: 0.5710
Epoch 4/5 - Loss: 0.8575 - Accuracy: 0.6168
Epoch 5/5 - Loss: 0.7950 - Accuracy: 0.6407
-> Model saved as model_3.pth
-> Folder dataset_test_6 already exists
Epoch 1/5 - Loss: 1.6940 - Accuracy: 0.2691
Epoch 2/5 - Loss: 1.5822 - Accuracy: 0.3419
Epoch 3/5 - Loss: 1.4569 - Accuracy: 0.4181
Epoch 4/5 - Loss: 1.3145 - Accuracy: 0.4871
Epoch 5/5 - Loss: 1.1961 - Accuracy: 0.5436
-> Model saved as model_6.pth


### Versão tensorflow

In [None]:
history_list = []

for csv_file in csv_list:
    # Treino
    train_dict, df_file1 = create_train_dict(csv_file)

    create_folder(f"dataset_train_{csv_file[0]}")

    categories_list = [cat.lower().replace("-", "_") for cat in df_file1["Details"].value_counts().keys()]

    for category in categories_list:
        create_folder(f"dataset_train_{csv_file[0]}/{category}")

    create_folder_train_dataset(train_dict, f"dataset_train_{csv_file[0]}")

    # Teste
    test_dataset = create_test_dataset(csv_file)
    test_dataframe = create_folder_dataset(test_dataset, f"dataset_test_{csv_file[0]}")
    
    classifier = create_model(num_categories=int(csv_file[0])) # mudar para receber o tamanho da camada Dense

    training_set, test_set = fitting(test_dataframe, train_image_path=f"dataset_train_{csv_file[0]}", test_image_path=f"dataset_test_{csv_file[0]}")
    
    history = classifier.fit(training_set, steps_per_epoch=800, epochs=5)

    history_list.append(history)
    
    save_model(classifier, f"dataset_test_{csv_file[0]}")

-> Folder dataset_train_2 already exists
-> Folder dataset_train_2/solid already exists
-> Folder dataset_train_2/non_solid already exists
-> Folder dataset_test_2 already exists
Found 5011 images belonging to 2 classes.
Found 2505 validated image filenames.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
-> Model saved successfuly in file model_dataset_test_2.h5
-> Folder dataset_train_3 already exists
-> Folder dataset_train_3/pattern already exists
-> Folder dataset_train_3/solid already exists
-> Folder dataset_train_3/geometric already exists
-> Folder dataset_test_3 already exists
Found 4932 images belonging to 3 classes.
Found 2915 validated image filenames.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
-> Model saved successfuly in file model_dataset_test_3.h5
-> Folder dataset_train_6 already exists
-> Folder dataset_train_6/pattern already exists
-> Folder dataset_train_6/floral already exists
-> Folder dataset_train_6/solid already exists
-> Folder dataset_train_6/stri

TypeError: write() argument must be str, not dict

In [None]:
# Salva o sumário do modelo em um arquivo JSON
for idx, h in enumerate(history_list):
    h.history["loss"] = [float(item) for item in h.history["loss"]]
    h.history["accuracy"] = [float(item) for item in h.history["accuracy"]]

    with open(f"model_{idx}_summary.json", "w") as json_file:
        json.dump(h.history, json_file, indent=4, ensure_ascii=True)

In [87]:
predictions_list = []

for i in [2, 3, 6]:
    aux_dict = {}

    data_path = f"dataset_test_{i}"
    model_path = f"model_dataset_test_{i}.h5"
    train_path = f"dataset_train_{i}"

    classifier = create_model(num_categories=i)
    classifier.load_weights(model_path)

    datagen = ImageDataGenerator(rescale=1./255)
    generator = datagen.flow_from_directory(
        train_path,
        target_size=(64, 64),
        batch_size=1,
        class_mode='categorical'
    )

    class_indices = generator.class_indices
    inv_class_indices = {v: k for k, v in class_indices.items()}

    dset_size = len(os.listdir(data_path))

    for idx, img in enumerate(os.listdir(data_path)):
        img_path = os.path.join(data_path, img)

        test_image = image.load_img(img_path, target_size = (64, 64))
        test_image = image.img_to_array(test_image)
        test_image = np.expand_dims(test_image, axis = 0)

        result = classifier.predict(test_image)

        index = np.argmax(result[0])
        prediction = inv_class_indices[index]

        if prediction in aux_dict:
            aux_dict[prediction].append(img)
        else:
            aux_dict[prediction] = [img]

        print(f"Dataset: {data_path}; Imagem: {img} ({idx+1}/{dset_size}) -> {((idx+1)/dset_size) * 100:.2f}%", end="\r")
        print("\n" + "-" * 100)
    
    predictions_list.append(aux_dict)

Found 5011 images belonging to 2 classes.
Found 4932 images belonging to 3 classes.82684c2d96fc771379fb354e.jpg (2505/2505) -> 100.00%
Found 6845 images belonging to 6 classes.26a44fa4b182af3f86968d89.jpg (2915/2915) -> 100.00%
Dataset: dataset_test_6; Imagem: 1d21c9f90e524b2dbf310d632b8659ab.jpg (3422/3422) -> 100.00%