# Importing Libraries 

In [None]:
import pandas as pd
import numpy as np

from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import GridSearchCV

from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

# Loading Training and Test Data 

In [None]:
train = pd.read_csv('fashion-mnist_train.csv')
test = pd.read_csv('fashion-mnist_test.csv')

In [None]:
X_train = train.iloc[:, 1:]
y_train = train.iloc[:, 0] # target values of training dataset
X_test = test.iloc[:, 1:]
y_test = test.iloc[:, 0] # target values of testing dataset

# Reducing the Number of Dimensions of the Training Data

In [None]:
# Scaling the Data
from sklearn.preprocessing import StandardScaler
X_train_scaled = StandardScaler().fit_transform(X_train)
X_test_scaled = StandardScaler().fit_transform(X_test)

In [None]:
# Using SVD to reduce the number of dimensions of the training data set
svd = TruncatedSVD(n_components=150, random_state=42)
svd.fit(X_train_scaled)

In [None]:
# 150 components were selected
print(svd.explained_variance_ratio_.sum())

In [None]:
# Variance Rations of each feature in the dataset
evr = svd.explained_variance_ratio_
evr_df = pd.DataFrame(evr, index=[('C'+ str(i+1)) for i in range(len(evr))])
evr_df

In [None]:
# Transforming the datasets
X_train_svd = svd.transform(X_train_scaled)
X_train_svd

In [None]:
# Using SVD to reduce the number of dimensions of the training data set
svd = TruncatedSVD(n_components=150, random_state=42)
svd.fit(X_test_scaled)

In [None]:
# 150 components were selected
print(svd.explained_variance_ratio_.sum())

In [None]:
# Variance Rations of each feature in the dataset
evr = svd.explained_variance_ratio_
evr_df = pd.DataFrame(evr, index=[('C'+ str(i+1)) for i in range(len(evr))])
evr_df

In [None]:
# Transforming the datasets
X_test_svd = svd.transform(X_test_scaled)
X_test_svd

# Training and Testing Naive Bayes 

## Original Dataset

In [None]:
gNB = GaussianNB()
gNB.fit(X_train,y_train)
nb_predict = gNB.predict(X_test)

print(metrics.classification_report(y_test, nb_predict))
average_accuracy = np.mean(y_test == nb_predict) * 100
print("The average test accuracy of Gaussian Naive Bayes Classifier is {0:.1f}%.".format(average_accuracy))

## Processed Dataset 

In [None]:
gNB = GaussianNB()
gNB.fit(X_train_svd,y_train)
nb_predict = gNB.predict(X_test_svd)

print(metrics.classification_report(y_test, nb_predict))
average_accuracy = np.mean(y_test == nb_predict) * 100
print("The average test accuracy of Gaussian Naive Bayes Classifier is {0:.1f}%.".format(average_accuracy))

# Training and Testing KNN

## Default Parameters

### Original Dataset

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
knn_predict = knn.predict(X_test)

print(metrics.classification_report(y_test, knn_predict))
average_accuracy = np.mean(y_test == knn_predict) * 100
print("The average test accuracy of the KNeighbors Classifier is {0:.1f}%.".format(average_accuracy))

### Processed Dataset 

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train_svd,y_train)
knn_predict = knn.predict(X_test_svd)

print(metrics.classification_report(y_test, knn_predict))
average_accuracy = np.mean(y_test == knn_predict) * 100
print("The average test accuracy of the KNeighbors Classifier is {0:.1f}%.".format(average_accuracy))

## Hypertuning KNN 

In [None]:
knn_hypertune_params = dict(n_neighbors=list(range(5,10)))

knn_hypertune = KNeighborsClassifier()

knn_grid = GridSearchCV(knn_hypertune, knn_hypertune_params, n_jobs=6, verbose=5)

knn_hypertuned = knn_grid.fit(X_train_svd,y_train)

best_knn_model = knn_hypertuned.best_estimator_
print(f'Best estimator: {best_knn_model}')

In [None]:
knn_predict = best_knn_model.predict(X_test_svd)

print(metrics.classification_report(y_test, knn_predict))
average_accuracy = np.mean(y_test == knn_predict) * 100
print("The average test accuracy of the KNeighbors Classifier is {0:.1f}%.".format(average_accuracy))

# Training and Testing MLR

## Default Parameters

### Original Dataset

In [None]:
mlr = LogisticRegression()
mlr.fit(X_train,y_train)
mlr_predict = mlr.predict(X_test)

print(metrics.classification_report(y_test, mlr_predict))
average_accuracy = np.mean(y_test == mlr_predict) * 100
print("The average test accuracy of the Multinomial Logistic Regression is {0:.1f}%.".format(average_accuracy))

### Processed Dataset 

In [None]:
mlr = LogisticRegression()
mlr.fit(X_train_svd,y_train)
mlr_predict = mlr.predict(X_test_svd)

print(metrics.classification_report(y_test, mlr_predict))
average_accuracy = np.mean(y_test == mlr_predict) * 100
print("The average test accuracy of the Multinomial Logistic Regression is {0:.1f}%.".format(average_accuracy))

# Conclusion

**Naive Bayes Classifier**
- Original Dataset: 
    - Accuracy: 0.591
    
- Processed Dataset:
    - Accuracy: 0.552
    
**K Nearest Neighbors Classifier**
- Original Dataset:
    - Accuracy: 0.86
    
- Processed Dataset:
    - Accuracy: 0.722
    
- Hypertuning:
    - Accuracy: 0.724
   
**Multinomial Logistic Regression**
- Original Dataset:
    - Accuracy: 0.854

- Processed Dataset:
    - Accuracy: 0.666