# Importing the Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt

#To retrieve image from numpy array
from PIL import Image 
from matplotlib import cm

%matplotlib inline

# Showing Directories with List

In [None]:
DATASET_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/fashion-dataset/"
print(os.listdir(DATASET_PATH))

# What is inside the image directory
## Observing some sample image

In [None]:
# what is inside the main directory
list_directory = os.listdir(DATASET_PATH)
print(list_directory)

# each time directory files gets reordered. That's why needed to find where images folder is.
index=0
for i in range (len(list_directory)):
    if list_directory[i]=="images":
        index=i
        break

# what is in the image directory
imageIndex = os.listdir(DATASET_PATH+list_directory[index])
# print(type(imageIndex))
head = 10

# collecting some samples in list
sampleImages = []

# showing indices
for i in range (head):
    sampleImages.append(imageIndex[i])
    print(sampleImages[i])

# choosing some samples to observe
fig=plt.figure()
fig.set_figheight(15)
fig.set_figwidth(15)
axis=[]
row = len(sampleImages)/2
col = row+1

for i in range (0,len(sampleImages)):
    Image_path=DATASET_PATH+list_directory[index]+"/"+sampleImages[i]
    src = cv2.imread(Image_path)
    image = cv2.cvtColor(src, cv2.COLOR_BGR2RGB) 
    axis.append(fig.add_subplot(row, col, i+1))
    subplot_title=sampleImages[i]
    axis[-1].set_title(subplot_title)  
    plt.imshow(image)

fig.tight_layout()
plt.show()

# Problem1->We have no labeled data. Exploring styles.csv for more info. 

In [None]:
df = pd.read_csv(DATASET_PATH + "styles.csv", nrows=10000, error_bad_lines=False)
df.head(10)

# Image id matches with styles.csv file Id column. 
## For ease of access, adding another column naming 'id.jpg'

In [None]:
df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)
df.head(10)

# Building another dataframe with columns- image and article type

In [None]:
augmentedDataframe = pd.DataFrame({
    'filename': df['image'],
    'type': df['articleType']
})

# total number of entries in the dataframe
total_row = len(augmentedDataframe)
print('total row count: ', total_row)

augmentedDataframe.head(10)

In [None]:
augmentedDataframe['type'].value_counts()

# Here total counting length is 108. Total number of type classes available 108.

In [None]:
plt.figure(figsize=(20,5))
augmentedDataframe['type'].value_counts().plot(kind='bar',x='type',y='filename')
plt.show()

# Starting with train and test data
## Inserting Image and Labels in numpy array

# Utility Function to get images and inserted into image array
## Start and Stop parameters will be useful train and test separation

## Here all X=image_data, Y=label_data

In [None]:
def getData(start,stop):
    x_data = []
    y_data = []
    #start = 0 #setting start parameter
    #stop = int(0.001*total_row) #setting stop parameter 
    
    for i in range(start, stop): 
        Image_path=DATASET_PATH+list_directory[index]+"/"+augmentedDataframe.loc[i,'filename']
#         print(augmentedDataframe.loc[i,'filename'])
        image = cv2.imread(Image_path,cv2.IMREAD_GRAYSCALE)
        try:
            resized_img = cv2.resize(image, dsize=(28, 28))
        except:
            print("Rejected Image: "+augmentedDataframe.loc[i,'filename'])
            continue
        resized_img = cv2.resize(image, dsize=(80, 80))
        x_data.append(resized_img) 
        y_values = augmentedDataframe.loc[i,'type']
        y_data.append(y_values)
                
    return x_data, y_data

# Utility function to extract image from dataframe according to index

In [None]:
def getImage(it):
    list_directory = os.listdir(DATASET_PATH)
    index=0
    for i in range (len(list_directory)):
        if list_directory[i]=="images":
            index=i
            break
    Image_path=DATASET_PATH+list_directory[index]+"/"+augmentedDataframe.loc[it,'filename']
    image = cv2.imread(Image_path,cv2.IMREAD_GRAYSCALE)
    resized_img = cv2.resize(image, dsize=(80, 80))
#     plt.imshow(resized_img)
    return resized_img

# Test of the function

In [None]:
X=[]
Y=[]
# for now start=0,stop=(0.001*total_row)
# total_row=count of rows in augmented data frame
start = 0
stop = int(0.001*total_row) # testing with 0.1% image of the dataset
X,Y=getData(start,stop)

In [None]:
print(type(X))
print(X[0])
plt.title(Y[0])
plt.imshow(X[0])

# Successful insertion of images in the array. Also we got the labels required.

# Change into numpy array

In [None]:
X = np.array(X).reshape(-1, 80,80,1)
Y = np.array(Y)

# Normalizing by 255

In [None]:
X = X/255.0

# Processing labels for dimension matching

In [None]:
Y = Y.reshape(len(X), 1)

# Sample printing

In [None]:
print(type(X))
print(X.shape)
print(Y.shape)
print(X[0])
print(Y[0])

# Now we got our Function working. Next will be Spliting both train and test data.

## Spliting 80% image for training

In [None]:
X_train=[]
Y_train=[]
start = 0
stop = int(0.8*total_row)
X_train,Y_train=getData(start,stop)

# conversion into numpy array
X_train = np.array(X_train).reshape(-1, 80,80,1)
Y_train = np.array(Y_train)

# normalizing
X_train = X_train/255

# processing Y_train for dimension matching
Y_train = Y_train.reshape(len(X_train),1)

In [None]:
print('Train Image Data Shape: ',X_train.shape)
print('Train Label Data Shape: ',Y_train.shape)
print('Dimension of Train Data: ',X_train.ndim)

## Rest 20% image for testing

In [None]:
X_test=[]
Y_test=[]
start = int(0.8*total_row)
stop = total_row 
X_test,Y_test=getData(start,stop)

# conversion into numpy array
X_test = np.array(X_test).reshape(-1, 80,80,1)
Y_test = np.array(Y_test)

# normalizing
X_test = X_test/255

# processing Y_train for dimension matching
Y_test = Y_test.reshape(len(X_test),1)

In [None]:
print('Test Image Data Shape: ',X_test.shape)
print('Test Label Data Shape: ',Y_test.shape)

> ## Splitting done with train and test set.

# Starting with decision tree and support vector machine

> # Each model we will train later works with only array size of 2D in shape. Prerocessing is required. According to the documentation of sklearn library.

> # As the function takes only 2-Dimensional matrix.

In [None]:
number_of_train_samples=X_train.shape[0]
number_of_test_samples=X_test.shape[0]
nx=80
ny=80
# print(number_of_train_samples)
X_train_reshaped = X_train.reshape((number_of_train_samples,nx*ny))
print(X_train_reshaped.shape)
X_test_reshaped = X_test.reshape((number_of_test_samples,nx*ny))
print(X_test_reshaped.shape)

# Starting with decision tree classifier algorithom

In [None]:
from sklearn.tree import DecisionTreeClassifier 

> # Creating model->Decision Tree

In [None]:
dtree_model = DecisionTreeClassifier(max_depth = 8).fit(X_train_reshaped, Y_train) 

> # Model accuracy

In [None]:
accuracy = dtree_model.score(X_test_reshaped, Y_test)
print(accuracy)

In [None]:
dtree_predictions = dtree_model.predict(X_test_reshaped)
print(type(dtree_predictions))
print(dtree_predictions.shape)
print(X_test_reshaped.shape)
print(X_test_reshaped[0])

> # Problem2->How to convert image from numpy array
> ## solve-getting image from index of dataframe

> # Showing image and label from predicted test_set

In [None]:
nsample=0 # index of test sample
index=number_of_train_samples+nsample # index of the row of dataframe the image we want to show
#print(index)
image = getImage(index)
# plt.title()
plt.title(f"PC = {dtree_predictions[nsample]}\n TC = {Y_test[nsample]}")
plt.imshow(image)

In [None]:
#PC=PredictedClass #TC=TrueClass
L = 5
W = 5
fig, axes = plt.subplots(L, W, figsize = (12,12))
axes = axes.ravel()

for i in np.arange(0, L * W): 
    src = X_test[i].reshape(80,80)
    im = Image.fromarray(np.uint8(cm.gist_earth(src)*255))
    axes[i].imshow(im)
    axes[i].set_title(f"PC = {dtree_predictions[i]}\n TC = {Y_test[i]}") #predicted_classes[i]:0.1f #Y_test[i]:0.1f
    axes[i].axis('off')
plt.subplots_adjust(wspace=0.6)

> # Done with decision tree model

# Testing with support vector machine

In [None]:
from sklearn.svm import SVC

In [None]:
print(Y_train.shape)
flat_Y_train=np.ravel(Y_train)

In [None]:
print(flat_Y_train)
print(flat_Y_train.shape)

> # Creating model->Support Vector Machine

In [None]:
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train_reshaped, flat_Y_train)

> # Prediction of test data

In [None]:
svm_predictions = svm_model_linear.predict(X_test_reshaped)

> # Accuracy of the model

In [None]:
accuracy = svm_model_linear.score(X_test_reshaped, Y_test) 
print(accuracy)

> # Showing image and label from predicted data set

In [None]:
nsample=0 # index of test sample
index=number_of_train_samples+nsample # index of the row of dataframe the image we want to show
#print(index)
image = getImage(index)
plt.title(f"PC = {svm_predictions[nsample]}\n TC = {Y_test[nsample]}")
plt.imshow(image)

In [None]:
#PC=PredictedClass #TC=TrueClass
L = 5
W = 5
fig, axes = plt.subplots(L, W, figsize = (12,12))
axes = axes.ravel()

for i in np.arange(0, L * W):  
    src = X_test[i].reshape(80,80)
    im = Image.fromarray(np.uint8(cm.gist_earth(src)*255))
    axes[i].imshow(im)
    axes[i].set_title(f"PC = {svm_predictions[i]}\n TC = {Y_test[i]}") #predicted_classes[i]:0.1f #Y_test[i]:0.1f
    axes[i].axis('off')
plt.subplots_adjust(wspace=0.6)

# Single Image Prediction

In [None]:
nsample=511 # index of test sample
index=number_of_train_samples+nsample # index of the row of dataframe the image we want to show
#print(index)
image = getImage(index)

plt.imshow(image)

In [None]:
nx=80
ny=80
single_reshaped = image.reshape((1,nx*ny))
single_reshaped = single_reshaped/255
print(single_reshaped)
print(single_reshaped.shape)

svm_predictions = svm_model_linear.predict(single_reshaped)
print(type(svm_predictions))
print(svm_predictions[0])