# Sorting images to men / women folders

In [1]:
import os
import csv
import os.path

In [2]:
images_path = "myntradataset/images/"
men_training_path = "train/man/"
women_training_path = "train/woman/"
men_validation_path = "validation/man/"
women_validation_path = "validation/woman/"
men_testing_path = "testing/man/"
women_testing_path = "testing/woman/"

count = 0

with open('styles.csv', 'r') as f:
    reader = csv.reader(f, delimiter=',')
    for rowDict in reader:
        if os.path.isfile(images_path + rowDict[0]):
            if rowDict[1] == "Men":
                if count <= 8:
                    os.rename(images_path + rowDict[0], men_training_path + rowDict[0])
                elif count == 9:
                    os.rename(images_path + rowDict[0], men_validation_path + rowDict[0])
                elif count == 10:
                    os.rename(images_path + rowDict[0], men_testing_path + rowDict[0])
                    count = 0
                count += 1
            elif rowDict[1] == "Women":
                if count <= 8:
                    os.rename(images_path + rowDict[0], women_training_path + rowDict[0])
                elif count == 9:
                    os.rename(images_path + rowDict[0], women_validation_path + rowDict[0])
                elif count == 10:
                    os.rename(images_path + rowDict[0], women_testing_path + rowDict[0])
                    count = 0
                count += 1

# Train the model

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np


In [4]:
train = ImageDataGenerator(rescale =1/255)
validation = ImageDataGenerator(rescale =1/255)

In [5]:
train_dataset = train.flow_from_directory("train/", 
                                          target_size = (60,60),
                                         batch_size = 1000,
                                         class_mode = 'binary')

validation_dataset = train.flow_from_directory("validation/", 
                                          target_size = (60,60),
                                         batch_size = 20,
                                         class_mode = 'binary')

Found 33823 images belonging to 2 classes.
Found 4227 images belonging to 2 classes.


In [6]:
validation_dataset.class_indices

{'man': 0, 'woman': 1}

In [7]:
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16,(3,3),activation = 'relu', input_shape= (60,60,3)),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(32,(3,3),activation = 'relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(512,activation= 'relu'),
                                    tf.keras.layers.Dense(1,activation='sigmoid')                                
                                    
])

In [8]:
model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
              metrics=['accuracy'])

In [9]:
model_fit = model.fit(train_dataset,
                     steps_per_epoch = 33,
                     epochs = 10,
                     validation_data = validation_dataset)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Testing using testing set from the same set as the training dataset (unique images though)
Link to set: https://www.kaggle.com/paramaggarwal/fashion-product-images-small

In [10]:
men_testing_path = 'testing/man/'
women_testing_path = 'testing/woman/'

total_images = 0

total_men_found = 0
total_men_found_in_men_folder = 0
total_men_found_in_women_folder = 0
total_men_images = 0

total_women_found = 0
total_women_found_in_women_folder = 0
total_women_found_in_men_folder = 0
total_women_images = 0



testing_paths = ['testing/man/', 'testing/woman/']

for testing_path in testing_paths:

    for i in os.listdir(testing_path):

        img = image.load_img(testing_path + "/" + i, target_size = (60,60))
        X = image.img_to_array(img)
        X = np.expand_dims(X,axis = 0)
        images = np.vstack([X])
        val = model.predict(images)
        if val == 0:
            pass
        elif val == 1:
            pass
        else :
            val = 0
        
        total_images += 1
        
        if testing_path == 'testing/man/':
            total_men_images += 1
            if val == 0:
                total_men_found +=1
                total_men_found_in_men_folder += 1
            elif val == 1:
                total_women_found +=1
                total_women_found_in_men_folder += 1

                
                
        elif testing_path == 'testing/woman/':
            total_women_images += 1
            if val == 0:
                total_men_found +=1
                total_men_found_in_women_folder += 1
            elif val == 1:
                total_women_found +=1
                total_women_found_in_women_folder += 1




In [11]:
import pandas as pd

In [12]:
overall_accuracy = "{:.2f}".format(((total_men_found_in_men_folder + total_women_found_in_women_folder)/ total_images)*100)+ "%"
men_accuracy = "{:.2f}".format(((total_men_found_in_men_folder)/ total_men_images)*100)+ "%"
women_accuracy = "{:.2f}".format(((total_women_found_in_women_folder)/ total_women_images)*100)+ "%"
men_in_women_folder = "{:.2f}".format(((total_women_found_in_men_folder)/ total_men_images)*100)+ "%"
women_in_men_folder = "{:.2f}".format(((total_men_found_in_women_folder)/ total_women_images)*100)+ "%"





In [13]:
print("Overall")
print("Total Images:", total_images)
print("Overall Accuracy:", overall_accuracy)
print("")
print("")
print("Men folder")
print("Total Men Images:", total_men_images)
print("Total Men Found:", total_men_found)
print("Total Men Correctly Identified:", total_men_found_in_men_folder, "/", total_men_images)
print("Total Women found in Men Folder:", total_women_found_in_men_folder, "/", total_men_images)
print("Men Accuracy:", men_accuracy)
print("Women in men folder %:", men_in_women_folder)
print("")
print("")
print("Women folder")
print("Total Women Images:", total_women_images)
print("Total Women Found:", total_women_found)
print("Total Women Correctly Identified:", total_women_found_in_women_folder, "/", total_women_images)
print("Total Men found in Women Folder:", total_men_found_in_women_folder, "/", total_women_images)
print("Women Accuracy", women_accuracy)
print("Men in women folder %:", women_in_men_folder)



Overall
Total Images: 4227
Overall Accuracy: 91.25%


Men folder
Total Men Images: 2290
Total Men Found: 2344
Total Men Correctly Identified: 2132 / 2290
Total Women found in Men Folder: 158 / 2290
Men Accuracy: 93.10%
Women in men folder %: 6.90%


Women folder
Total Women Images: 1937
Total Women Found: 1883
Total Women Correctly Identified: 1725 / 1937
Total Men found in Women Folder: 212 / 1937
Women Accuracy 89.06%
Men in women folder %: 10.94%


# Testing using completely different test set than was used to train, mostly google image bulk downloads

In [14]:
total_images = 0

total_men_found = 0
total_men_found_in_men_folder = 0
total_men_found_in_women_folder = 0
total_men_images = 0

total_women_found = 0
total_women_found_in_women_folder = 0
total_women_found_in_men_folder = 0
total_women_images = 0



testing_paths = ['testing/different-set/man/', 'testing/different-set/woman/']

for testing_path in testing_paths:

    for i in os.listdir(testing_path):

        img = image.load_img(testing_path + "/" + i, target_size = (60,60))
        X = image.img_to_array(img)
        X = np.expand_dims(X,axis = 0)
        images = np.vstack([X])
        val = model.predict(images)
        if val == 0:
            pass
        elif val == 1:
            pass
        else :
            val = 0
        
        total_images += 1
        
        if testing_path == 'testing/different-set/man/':
            total_men_images += 1
            if val == 0:
                total_men_found +=1
                total_men_found_in_men_folder += 1
            elif val == 1:
                total_women_found +=1
                total_women_found_in_men_folder += 1

                
                
        elif testing_path == 'testing/different-set/woman/':
            total_women_images += 1
            if val == 0:
                total_men_found +=1
                total_men_found_in_women_folder += 1
            elif val == 1:
                total_women_found +=1
                total_women_found_in_women_folder += 1


In [15]:
overall_accuracy = "{:.2f}".format(((total_men_found_in_men_folder + total_women_found_in_women_folder)/ total_images)*100)+ "%"
men_accuracy = "{:.2f}".format(((total_men_found_in_men_folder)/ total_men_images)*100)+ "%"
women_accuracy = "{:.2f}".format(((total_women_found_in_women_folder)/ total_women_images)*100)+ "%"
men_in_women_folder = "{:.2f}".format(((total_women_found_in_men_folder)/ total_men_images)*100)+ "%"
women_in_men_folder = "{:.2f}".format(((total_men_found_in_women_folder)/ total_women_images)*100)+ "%"


In [16]:
print("Overall")
print("Total Images:", total_images)
print("Overall Accuracy:", overall_accuracy)
print("")
print("")
print("Men folder")
print("Total Men Images:", total_men_images)
print("Total Men Found:", total_men_found)
print("Total Men Correctly Identified:", total_men_found_in_men_folder, "/", total_men_images)
print("Total Women found in Men Folder:", total_women_found_in_men_folder, "/", total_men_images)
print("Men Accuracy:", men_accuracy)
print("Women in men folder %:", men_in_women_folder)
print("")
print("")
print("Women folder")
print("Total Women Images:", total_women_images)
print("Total Women Found:", total_women_found)
print("Total Women Correctly Identified:", total_women_found_in_women_folder, "/", total_women_images)
print("Total Men found in Women Folder:", total_men_found_in_women_folder, "/", total_women_images)
print("Women Accuracy", women_accuracy)
print("Men in women folder %:", women_in_men_folder)


Overall
Total Images: 901
Overall Accuracy: 63.82%


Men folder
Total Men Images: 466
Total Men Found: 408
Total Men Correctly Identified: 274 / 466
Total Women found in Men Folder: 192 / 466
Men Accuracy: 58.80%
Women in men folder %: 41.20%


Women folder
Total Women Images: 435
Total Women Found: 493
Total Women Correctly Identified: 301 / 435
Total Men found in Women Folder: 134 / 435
Women Accuracy 69.20%
Men in women folder %: 30.80%


# Another set from kaggle (its pretty bad, dont take these results as anything... unless they are good)

In [17]:
total_images = 0

total_men_found = 0
total_men_found_in_men_folder = 0
total_men_found_in_women_folder = 0
total_men_images = 0

total_women_found = 0
total_women_found_in_women_folder = 0
total_women_found_in_men_folder = 0
total_women_images = 0



testing_paths = ['testing/another-set/man/', 'testing/another-set/woman/']

for testing_path in testing_paths:

    for i in os.listdir(testing_path):

        img = image.load_img(testing_path + "/" + i, target_size = (60,60))
        X = image.img_to_array(img)
        X = np.expand_dims(X,axis = 0)
        images = np.vstack([X])
        val = model.predict(images)
        if val == 0:
            pass
        elif val == 1:
            pass
        else :
            val = 0
        
        total_images += 1
        
        if testing_path == 'testing/another-set/man/':
            total_men_images += 1
            if val == 0:
                total_men_found +=1
                total_men_found_in_men_folder += 1
            elif val == 1:
                total_women_found +=1
                total_women_found_in_men_folder += 1

                
                
        elif testing_path == 'testing/another-set/woman/':
            total_women_images += 1
            if val == 0:
                total_men_found +=1
                total_men_found_in_women_folder += 1
            elif val == 1:
                total_women_found +=1
                total_women_found_in_women_folder += 1


In [18]:
overall_accuracy = "{:.2f}".format(((total_men_found_in_men_folder + total_women_found_in_women_folder)/ total_images)*100)+ "%"
men_accuracy = "{:.2f}".format(((total_men_found_in_men_folder)/ total_men_images)*100)+ "%"
women_accuracy = "{:.2f}".format(((total_women_found_in_women_folder)/ total_women_images)*100)+ "%"
men_in_women_folder = "{:.2f}".format(((total_women_found_in_men_folder)/ total_men_images)*100)+ "%"
women_in_men_folder = "{:.2f}".format(((total_men_found_in_women_folder)/ total_women_images)*100)+ "%"

In [19]:
print("Overall")
print("Total Images:", total_images)
print("Overall Accuracy:", overall_accuracy)
print("")
print("")
print("Men folder")
print("Total Men Images:", total_men_images)
print("Total Men Found:", total_men_found)
print("Total Men Correctly Identified:", total_men_found_in_men_folder, "/", total_men_images)
print("Total Women found in Men Folder:", total_women_found_in_men_folder, "/", total_men_images)
print("Men Accuracy:", men_accuracy)
print("Women in men folder %:", men_in_women_folder)
print("")
print("")
print("Women folder")
print("Total Women Images:", total_women_images)
print("Total Women Found:", total_women_found)
print("Total Women Correctly Identified:", total_women_found_in_women_folder, "/", total_women_images)
print("Total Men found in Women Folder:", total_men_found_in_women_folder, "/", total_women_images)
print("Women Accuracy", women_accuracy)
print("Men in women folder %:", women_in_men_folder)

Overall
Total Images: 299
Overall Accuracy: 41.14%


Men folder
Total Men Images: 92
Total Men Found: 228
Total Men Correctly Identified: 72 / 92
Total Women found in Men Folder: 20 / 92
Men Accuracy: 78.26%
Women in men folder %: 21.74%


Women folder
Total Women Images: 207
Total Women Found: 71
Total Women Correctly Identified: 51 / 207
Total Men found in Women Folder: 156 / 207
Women Accuracy 24.64%
Men in women folder %: 75.36%
