# Tagging Fashion Photos

### A multi class binary classification problem. Predicting tags for a set of fashion photos.

## Importing data

In [1]:
#import libraries
import json
import pandas as pd
import numpy as np

In [2]:
from tqdm import tqdm

In [3]:
#get data from the json file
with open('C:/Users/mtrig/Documents/data science/tashion photos/data/train.json') as data_file:
    data = json.load(data_file)

##### Looking at data...

In [4]:
list(data.keys())

['info', 'images', 'annotations', 'license']

In [5]:
data['images'][0]

{'imageId': '1',
 'url': 'https://contestimg.wish.com/api/webimage/570f35feb2f4b95d223aa9b1-large'}

In [6]:
len(data['images'])

1014544

##### Data to Pandas dataframe

In [7]:
img_urls = data['images']

In [8]:
img_urls = pd.DataFrame(img_urls)

In [9]:
annos = data['annotations']

In [10]:
annos = pd.DataFrame(annos)

In [11]:
#create the dataframe
train=pd.merge(img_urls, annos, on='imageId', how='inner')

In [12]:
#check out the new dataframe
train.head()

Unnamed: 0,imageId,url,labelId
0,1,https://contestimg.wish.com/api/webimage/570f3...,"[95, 66, 137, 70, 20]"
1,2,https://contestimg.wish.com/api/webimage/5468f...,"[36, 66, 44, 214, 105, 133]"
2,3,https://contestimg.wish.com/api/webimage/54641...,"[170, 66, 97, 153, 105, 138]"
3,4,https://contestimg.wish.com/api/webimage/550b9...,"[18, 66, 44, 153, 164]"
4,5,https://contestimg.wish.com/api/webimage/54451...,"[189, 66, 176, 137, 153, 74, 181, 171, 138]"


##### How many unique labels do we have?

In [13]:
#counter function in another notebook
%run uniqueCounter.ipynb

In [14]:
#call counter function on labels column
uniqueCount(train, train.labelId[0:20])

100%|██████████| 20/20 [00:00<00:00, 1666.13it/s]


52

## Data preprocessing

##### Getting right format for y labels

In [15]:
#numpy array
y = np.array(train['labelId'][0:20])

In [16]:
#y

##### We will use multilabel binarizer to one hot encode y labels

In [17]:
from sklearn.preprocessing import MultiLabelBinarizer

In [18]:
mlb = MultiLabelBinarizer(sparse_output=False)

In [19]:
oh_labelId = mlb.fit_transform(y)

In [20]:
#oh_labelId

In [21]:
oh_labelId.shape

(20, 52)

##### Getting pictures

In [22]:
from urllib.request import urlretrieve
import os

In [23]:
#Download pictjres to file
os.makedirs('C:/Users/mtrig/Documents/data science/tashion photos/photos_test/')
for i in tqdm(range(0,20)):
    urlretrieve(train['url'][i], 'C:/Users/mtrig/Documents/data science/tashion photos/photos_test/photo' +
                str(i+1) + '.png')

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:/Users/mtrig/Documents/data science/tashion photos/photos_test/'

## Image preprocessing

In [None]:
from PIL import Image, ImageOps
import os

In [None]:
import imageio

In [None]:
from scipy import ndimage

In [None]:
img_dir = r"C:\Users\mtrig\Documents\data science\tashion photos\photos\\" 
data_path = os.listdir(img_dir)

for item in data_path:
    im = Image.open(img_dir+item)
    
    im = im.resize((128,128), Image.ANTIALIAS)
    im.save(img_dir+item)

In [None]:
dir = r'C:\Users\mtrig\Documents\data science\tashion photos\photos'
imList = []
for imageName in sorted(os.listdir(dir)):
    imList.append(imageio.imread(dir + os.sep + imageName).transpose((2,0,1)))

In [None]:
imList[0].shape

In [None]:
imTensor = np.stack(imList).astype('uint8')

In [None]:
imTensor.shape

## Validation data...

In [None]:
#get data from the json file
with open('C:/Users/mtrig/Documents/data science/tashion photos/data/train.json') as data_file:
    data = json.load(data_file)

## Build the neural network!

In [None]:
from keras.models import Sequential

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten

In [None]:
from keras.layers import Conv2D, MaxPooling2D

In [None]:
model = Sequential()

In [None]:
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(3,128,128)))

In [None]:
model.add(Conv2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

In [None]:
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(oh_labelId.shape[1], activation='sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
model.fit(imTensor, oh_labelId, batch_size=32, epochs=10, verbose=1)