In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn import metrics
from math import sqrt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
# Surpass Warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Load the Fashion MNIST data
training_data = pd.read_csv('fashion-mnist_train.csv')
test_data = pd.read_csv('fashion-mnist_test.csv')

# Data Splitting and Visualization 

In [None]:
# Split the training set into validation and training sets
valid_data = training_data.tail(10000)
train_data = training_data.head(50000)

In [None]:
plt.figure(figsize=(10,10))
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(valid_data.iloc[i,1:].values.reshape((28,28)), cmap='gray')
    plt.title(valid_data.iloc[i,0])
    plt.xticks([])
    plt.yticks([])
plt.show()

In [None]:
X_train = train_data.iloc[:, 1:].values / 255.0
y_train = train_data.iloc[:, 0].values
X_valid = valid_data.iloc[:, 1:].values / 255.0
y_valid = valid_data.iloc[:, 0].values
X_test = test_data.iloc[:, 1:].values / 255.0
y_test = test_data.iloc[:, 0].values

In [None]:
X_train

In [None]:
y_train

In [None]:
X_train.shape

Dictionary to store all the **Models** and their **Accuracy** and **Precision** values 

In [None]:
accuracy_dic={'model':[],'accuracy':[],'precision':[]}

In [None]:
# Plotting performance
def plot_perf(train_accuracy, valid_accuracy, test_accuracy, model):
    plt.plot([train_accuracy, valid_accuracy, test_accuracy], label='Accuracy')
    plt.legend()
    plt.xticks(range(3), ['Training', 'Validation', 'Test'])
    plt.title('Performance of '+model)
    plt.show()

# Multinomial Logistic Regression (Softmax Regression)

Using different C values to find the best fitting model 

In [None]:
# Train the logistic regression model
# Trying out different hyperparameter values to find the best accuracy model
%%time
C = [0.01, 0.1, 1, 10, 100]
accuracy = {'C':[], 'Accuracy':[]}
logistic_models = {'C':[], 'Model':[]}
for c in C:
    # solver=saga for using large dataset and run faster
    logistic_reg = LogisticRegression(multi_class='multinomial', C=c ,random_state=43, solver='saga')
    logistic_models['Model'].append(logistic_reg.fit(X_train, y_train))
    # Finding accuracy of the model using the validation data
    accuracy['C'].append(c)
    logistic_models['C'].append(c)
    accuracy['Accuracy'].append(accuracy_score(y_valid,logistic_reg.predict(X_valid)))

In [None]:
accuracy_dataframe = pd.DataFrame.from_dict(accuracy)
accuracy_dataframe

In [None]:
model_dataframe = pd.DataFrame.from_dict(logistic_models)
model_dataframe

In [None]:
c_values = accuracy['C']
accuracy_values = accuracy['Accuracy']

max_accuracy = max(accuracy_values)
best_c = [c_values[i] for i in range(len(c_values)) if accuracy_values[i] == max_accuracy][0]
print('Best C for Logistic Model:')
best_c

In [None]:
best_logistic_model = logistic_models['Model'][logistic_models['C'].index(best_c)]
print('Best Logistic Model:')
best_logistic_model

__Training Performance__

In [None]:
# Evaluate the model on the training set
train_pred = best_logistic_model.predict(X_train)
print("Training performance:")
print(classification_report(y_train, train_pred))

__Validation Performace__

In [None]:
# Evaluate the model on the validation set
valid_pred = best_logistic_model.predict(X_valid)
print("Validation performance:")
print(classification_report(y_valid, valid_pred))

__Testing Performace__ 

In [None]:
# Evaluate the model on the test set
test_pred = best_logistic_model.predict(X_test)
print("Testing performance:")
print(classification_report(y_test, test_pred))

In [None]:
train_accuracy = accuracy_score(y_train, train_pred)
valid_accuracy = accuracy_score(y_valid, valid_pred)
test_accuracy = accuracy_score(y_test, test_pred)
def plot_perf(accuracy_score(y_train, train_pred), accuracy_score(y_valid, valid_pred), ccuracy_score(y_test, test_pred), 'Multinomial Logistic Regression')

Printing accuracy 

In [None]:
log_accuracy =  metrics.accuracy_score(y_valid,y_pred_log)
log_precision = metrics.precision_score(y_valid,y_pred_log)

accuracy_dic['model'].append(best_lr)
accuracy_dic['accuracy'].append(round(log_accuracy,3))
accuracy_dic['precision'].append(round(log_precision,3))

print('Accuracy:',round(log_accuracy,3))
print('Precision:',round(log_precision,3))

__Plotting Confusion Matrix__

In [None]:
log_confusion_matrix = metrics.confusion_matrix(y_valid, y_pred_log)
graph = metrics.ConfusionMatrixDisplay(log_confusion_matrix,display_labels=[False,True])
graph.plot(cmap=plt.cm.Blues)
plt.show()