### Lista para lembrar

- Fazer um `pip freeze` para os `requirements.txt`

- usar a pasta `manipulated_data` criado no nosso repositório

- precisa baixar as imagens do *dataset* completo para criar a pasta com as imagens que serão usadas para teste

- precisa do arquivo `used_clothes.csv` do nosso repositório

### Importando bibliotecas

In [15]:
# Importing the Keras libraries and packages
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

import shutil

In [10]:
def create_train_dict(csv_filename: str):
    df_file = pd.read_csv(csv_filename)

    try:
        df_file.drop("Unnamed: 0", axis=1, inplace=True)
    except:
        pass

    df_file1 = df_file.loc[:, df_file.columns.isin(["file_name", "Details"])]

    rel_dict = {}

    df_file_dict = df_file1.to_dict()

    for i in range(len(df_file_dict["Details"])):
        rel_dict[df_file_dict["file_name"][i]] = df_file_dict["Details"][i]

    return rel_dict, df_file1

def create_folder(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

        print(f"-> Folder {folder_path} created successfully")
    else:
        print(f"-> Folder {folder_path} already exists")

def create_folder_train_dataset(rel_dict: dict, foldername: str, src_path: str = "images"):
    for img, category in rel_dict.items():
        category1 = category.lower().replace("-", "_")
        
        src = os.path.join(src_path, img)
        dest = os.path.join(f"{foldername}/{category1}", img)

        if os.path.exists(src):
            shutil.copy(src, dest)
        else:
            print(f"-> File not found: {img}")

def create_test_dataset(csv_filename: str, default_csv_filename: str = "manipulated_data/initial_filtered_clothes.csv", images_path: str = "images") -> list:

    file_list = pd.read_csv(csv_filename)["file_name"].tolist()
    default_list = pd.read_csv(default_csv_filename)["file_name"].tolist()

    test_clothes_list = []

    for img in os.listdir(images_path):
        if img not in file_list and img in default_list:
            test_clothes_list.append(img)

    return test_clothes_list[:len(file_list)//2]

def create_folder_dataset(test_dataset: list, dest_path: str, src_path: str = "images") -> pd.DataFrame:
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)

        print(f"-> Folder {dest_path} created successfully")
    else:
        print(f"-> Folder {dest_path} already exists")

    for img in test_dataset:
        src = os.path.join(src_path, img)
        dest = os.path.join(dest_path, img)

        if os.path.exists(src):
            shutil.copy(src, dest)
        else:
            print(f"-> File not found: {img}")
    
    return pd.DataFrame({"file_name": test_dataset})

def create_model(num_categories: int, summary: bool = False) -> Sequential:
    # Criando o classificador
    classifier = Sequential()

    # Step 1 - Convolution
    classifier.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu')) # por padrão kernel_size é (3, 3)

    # Step 2 - Pooling
    classifier.add(MaxPooling2D(pool_size = (2, 2))) # por padrão pool_size é (2, 2)

    # Adding a second convolutional layer
    classifier.add(Conv2D(32, (3, 3), activation = 'relu')) # por padrão kernel_size é (3, 3)
    classifier.add(MaxPooling2D(pool_size = (2, 2))) # por padrão pool_size é (2, 2)

    # Step 3 - Flattening
    classifier.add(Flatten())

    # Step 4 - Full connection
    classifier.add(Dense(units = 128, activation = 'relu'))
    classifier.add(Dense(units = num_categories, activation = 'softmax'))

    # Compiling the CNN
    classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

    if summary:
        classifier.summary()

    return classifier

def fitting(test_dataframe: pd.DataFrame, train_image_path: str, test_image_path: str) -> tuple:
    train_datagen = ImageDataGenerator(rescale=1. / 255)

    test_datagen = ImageDataGenerator(rescale = 1. / 255)

    training_set = train_datagen.flow_from_directory(train_image_path,
                                                    target_size=(64, 64),
                                                    batch_size=16,
                                                    class_mode='categorical')

    test_set = test_datagen.flow_from_dataframe(dataframe=test_dataframe,
                                                directory=test_image_path,
                                                x_col='file_name',
                                                class_mode=None,
                                                target_size=(64, 64),
                                                batch_size=16,
                                                shuffle=False)
    
    return training_set, test_set

def save_model(classifier: Sequential, model_name: str):
    model_json = classifier.to_json()
    with open(f"model_{model_name}.json", "w") as json_file:
        json_file.write(model_json)

    classifier.save_weights(f"model_{model_name}.h5")
    print(f"-> Model saved successfuly in file model_{model_name}.h5")

def save_history(history_list: list):
    for idx, history in enumerate(history_list):
        with open(f"model_{idx}.json", "w") as file:
            file.write(history.history)

In [4]:
csv_list = ["2_details_categories.csv", "3_details_categories.csv", "6_details_categories.csv"]

In [None]:
history_list = []

for csv_file in csv_list:
    # Treino
    train_dict, df_file1 = create_train_dict(csv_file)

    create_folder(f"dataset_train_{csv_file[0]}")

    categories_list = [cat.lower().replace("-", "_") for cat in df_file1["Details"].value_counts().keys()]

    for category in categories_list:
        create_folder(f"dataset_train_{csv_file[0]}/{category}")

    create_folder_train_dataset(train_dict, f"dataset_train_{csv_file[0]}")

    # Teste
    test_dataset = create_test_dataset(csv_file)
    test_dataframe = create_folder_dataset(test_dataset, f"dataset_test_{csv_file[0]}")
    
    classifier = create_model(num_categories=int(csv_file[0])) # mudar para receber o tamanho da camada Dense

    training_set, test_set = fitting(test_dataframe, train_image_path=f"dataset_train_{csv_file[0]}", test_image_path=f"dataset_test_{csv_file[0]}")
    
    history = classifier.fit(training_set, steps_per_epoch=800, epochs=5)

    history_list.append(history)
    
    save_model(classifier, f"dataset_test_{csv_file[0]}")

-> Folder dataset_train_2 already exists
-> Folder dataset_train_2/solid already exists
-> Folder dataset_train_2/non_solid already exists
-> Folder dataset_test_2 already exists
Found 5011 images belonging to 2 classes.
Found 2505 validated image filenames.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
-> Model saved successfuly in file model_dataset_test_2.h5
-> Folder dataset_train_3 already exists
-> Folder dataset_train_3/pattern already exists
-> Folder dataset_train_3/solid already exists
-> Folder dataset_train_3/geometric already exists
-> Folder dataset_test_3 already exists
Found 4932 images belonging to 3 classes.
Found 2915 validated image filenames.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
-> Model saved successfuly in file model_dataset_test_3.h5
-> Folder dataset_train_6 already exists
-> Folder dataset_train_6/pattern already exists
-> Folder dataset_train_6/floral already exists
-> Folder dataset_train_6/solid already exists
-> Folder dataset_train_6/stri

TypeError: write() argument must be str, not dict

In [29]:
print(json.dumps({"a": 0.42345}, indent=4))

{
    "a": 0.42345
}


In [None]:
for idx, h in enumerate(history_list):
    h.history["loss"] = [float(item) for item in h.history["loss"]]
    h.history["accuracy"] = [float(item) for item in h.history["accuracy"]]

    with open(f"model_{idx}_summary.json", "w") as json_file:
        json.dump(h.history, json_file, indent=4, ensure_ascii=True)

In [87]:
predictions_list = []

for i in [2, 3, 6]:
    aux_dict = {}

    data_path = f"dataset_test_{i}"
    model_path = f"model_dataset_test_{i}.h5"
    train_path = f"dataset_train_{i}"

    classifier = create_model(num_categories=i)
    classifier.load_weights(model_path)

    datagen = ImageDataGenerator(rescale=1./255)
    generator = datagen.flow_from_directory(
        train_path,
        target_size=(64, 64),
        batch_size=1,
        class_mode='categorical'
    )

    class_indices = generator.class_indices
    inv_class_indices = {v: k for k, v in class_indices.items()}

    dset_size = len(os.listdir(data_path))

    for idx, img in enumerate(os.listdir(data_path)):
        img_path = os.path.join(data_path, img)

        test_image = image.load_img(img_path, target_size = (64, 64))
        test_image = image.img_to_array(test_image)
        test_image = np.expand_dims(test_image, axis = 0)

        result = classifier.predict(test_image)

        index = np.argmax(result[0])
        prediction = inv_class_indices[index]

        if prediction in aux_dict:
            aux_dict[prediction].append(img)
        else:
            aux_dict[prediction] = [img]

        print(f"Dataset: {data_path}; Imagem: {img} ({idx+1}/{dset_size}) -> {((idx+1)/dset_size) * 100:.2f}%", end="\r")
        print("\n" + "-" * 100)
    
    predictions_list.append(aux_dict)

Found 5011 images belonging to 2 classes.
Found 4932 images belonging to 3 classes.82684c2d96fc771379fb354e.jpg (2505/2505) -> 100.00%
Found 6845 images belonging to 6 classes.26a44fa4b182af3f86968d89.jpg (2915/2915) -> 100.00%
Dataset: dataset_test_6; Imagem: 1d21c9f90e524b2dbf310d632b8659ab.jpg (3422/3422) -> 100.00%