# Thrift Hunter Machine Learning Model

## Download and prepare the dataset

In [None]:
# Permission to download the dataset
! chmod 600 /content/kaggle.json

In [None]:
!KAGGLE_CONFIG_DIR=/content/ kaggle datasets download -d dqmonn/zalando-store-crawl

In [None]:
# Extract the dataset
import zipfile

zip_file = zipfile.ZipFile('/content/zalando-store-crawl.zip')
zip_file.extractall('/content/')

In [None]:
# Check one of the images inside the dataset
import cv2

img = cv2.imread("/content/zalando/longsleeve/1VJ22O059-Q11@7.jpg")

img.shape

In [None]:
import matplotlib.pyplot as plt

plt.imshow(img)

In [None]:
!rm -rf /content/zalando/zalando

In [None]:
!pip install split-folders

In [None]:
# Split folders
import splitfolders
from sklearn.model_selection import train_test_split

input_folder = '/content/zalando'

# train, val
splitfolders.ratio(input_folder, output="/content/zalando2", 
                   seed=42, ratio=(.8, .2), 
                   group_prefix=None) # default values

## Prepare the ImageDataGenerator

In [None]:
# Initialize ImageDataGenerator for training and testing
from keras_preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale= 1./255,                                                                                                                   
                               rotation_range=40,
                               width_shift_range=0.2,
                               height_shift_range=0.2,
                               zoom_range = 0.2, 
                               horizontal_flip=True,
                               fill_mode = 'nearest')

val_datagen = ImageDataGenerator(rescale= 1./255)

In [None]:
# Take image data from directory and resize it
TRAINING_DIR = '/content/zalando2/train'
VALIDATION_DIR = '/content/zalando2/val'

train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                           target_size = (150,150),
                                           class_mode = 'categorical',
                                           batch_size = 126)

val_generator = val_datagen.flow_from_directory(VALIDATION_DIR,
                                         target_size = (150,150),
                                         class_mode = 'categorical',
                                         batch_size = 126)

In [None]:
labels = list(train_generator.class_indices.keys())

labels

## Build the model

In [None]:
# Build the model

import tensorflow as tf

model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')
])

# Print the model summary
model.summary()

In [None]:
# Set the training parameters
model.compile(loss = 'categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

## Train the model and evaluate the results

In [None]:
# Train the model
history = model.fit(train_generator, epochs=25, steps_per_epoch=102, validation_data = val_generator, verbose = 1, validation_steps=25)

In [None]:
import matplotlib.pyplot as plt

# Plot the results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()