In [None]:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import pandas as pd
import seaborn as sns
import tqdm
from skimage import feature
from skimage.feature import hog

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, 
                             classification_report, confusion_matrix, roc_auc_score)
from sklearn.model_selection import (StratifiedKFold, train_test_split, 
                                     GridSearchCV, cross_val_score)
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
from sklearn.multiclass import OneVsRestClassifier

### Preprocessing

In [None]:
# Read data
style = pd.read_csv('../input/fashion-product-images-small/styles.csv', nrows=10000, error_bad_lines=False)
#style['image'] = style.apply(lambda row: str(row['id'])+'.jpg', axis=1)
style = style.drop(['year','season','gender','subCategory','articleType',
                    'baseColour','usage','productDisplayName'], axis=1)

style.dropna(how='any', inplace=True)
style.reset_index(drop=True, inplace=True)
style.tail()

In [None]:
sns.countplot(y=style.masterCategory)
#plt.xticks(rotation=90)
plt.show()

In [None]:
file_images=[]
dir = '../input/fashion-product-images-small/images/'
for file in os.listdir(dir):
    id = file.split('.')[0]
    path = os.path.join(dir+file)
    img = cv.imread(path, cv.IMREAD_GRAYSCALE)
    img_resize = cv.resize(img, (60, 80), interpolation=cv.INTER_AREA)
    file_images.append([id, img_resize])

print(len(file_images))

In [None]:
# Convert to DataFrame
labels_df = pd.DataFrame(file_images, columns=['id','image'])
labels_df['id'] = labels_df['id'].astype(int)    # Make sure id columns dtype is int
labels_df.head()

In [None]:
# concatenate based on id
data = pd.merge(style, labels_df, how='left', on=['id'])
data.dropna(how='any', inplace=True)
data.reset_index(drop=True, inplace=True)
data.tail()

In [None]:
# Label Encoder
le = LabelEncoder()
data['label'] = le.fit_transform(data.masterCategory)
data.head()

### Feature Extraction

In [None]:
train_images = np.stack(data.image.values,axis=0)
print(train_images.shape)

In [None]:
random_sample = np.random.randint(0,9999)
plt.imshow(train_images[random_sample], cmap='gray')
plt.title(data.masterCategory[random_sample])
plt.show()

In [None]:
# HOG
hog_images = []
hog_features = []
for i in train_images:
    #blur = cv.GaussianBlur(image,(5,5),0)
    fd, hog_image = hog(i, orientations=9, 
                        pixels_per_cell=(8,8),cells_per_block=(2,2), 
                        block_norm= 'L2', visualize=True)
    
    hog_images.append(hog_image)
    hog_features.append(fd)

hog_features = np.array(hog_features)
hog_features.shape

### Model

In [None]:
x_train, x_test, y_train, y_test = train_test_split(hog_features, data.label, 
                                                    stratify=data.label, 
                                                    test_size=.2,shuffle=True)
print(x_train.shape)
print(x_test.shape)

In [None]:
# SVM
start = time.perf_counter()
# Validation
svm = SVC()
cv = StratifiedKFold(n_splits=10)
validation = cross_val_score(svm, x_train, y_train, cv=cv, scoring='accuracy', n_jobs=-1)

# Training
svm = SVC(probability=True)
svm.fit(x_train, y_train)
y_pred_train = svm.predict(x_train)
y_pred_test = svm.predict(x_test)    # Testing

elapsed = time.perf_counter() - start
print('Elapsed %.3f seconds.' % elapsed)

In [None]:
print('Accuracy_Val : {:.4f}\n'.format(validation.mean()))

print('Accuracy_test : {:.4f}'.format(accuracy_score(y_test, y_pred_test)), 
      'Precision_test : {:.4f}'.format(precision_score(y_test, y_pred_test, average='macro')), 
      'Recall_test : {:.4f}'.format(recall_score(y_test, y_pred_test, average='macro')), 
      'F1-Score : {:.4f}'.format(f1_score(y_test, y_pred_test, average='macro')))

# Classification report
from sklearn.metrics import classification_report
#target_names = ['Apparel', 'Accessories', 'Footwear', 'Personal Care', 'Free Items', 'Sporting Goods']
print('\nclassification report testing : \n', classification_report(y_test, y_pred_test))

#print('\nPR_AUC crackles: %.3f' % auc_score_svm)
print('Elapsed %.3f seconds.' % elapsed)