In [None]:
import pandas as pd
import numpy as np

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score

from sklearn import metrics

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist

In [None]:
# import data
(train_data, train_labels), (test_data, test_labels) = fashion_mnist.load_data()

In [None]:
train_x = train_data.reshape(train_data.shape[0], -1).astype('float32')
test_x = test_data.reshape(test_data.shape[0], -1).astype('float32')

# LightGBM

In [39]:
from lightgbm import LGBMClassifier

In [40]:
lgb_model = LGBMClassifier(objective='multiclass',path_smooth = 0.2)

In [47]:
lgb_model.fit(train_x,train_labels,categorical_feature=[0,3])



LGBMClassifier(objective='multiclass', path_smooth=0.2)

In [107]:
expected_y  = test_labels
predicted_y = lgb_model.predict(test_x)
lightGBM_pred_y = predicted_y
print(metrics.classification_report(expected_y, predicted_y))

              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1000
           1       1.00      0.97      0.98      1000
           2       0.80      0.83      0.81      1000
           3       0.89      0.90      0.90      1000
           4       0.80      0.83      0.82      1000
           5       0.99      0.97      0.98      1000
           6       0.72      0.66      0.69      1000
           7       0.95      0.97      0.96      1000
           8       0.98      0.97      0.98      1000
           9       0.97      0.96      0.96      1000

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



In [None]:
# dont run this part, i'm still tuning
err_list = []
for i in np.arange(0.0, 1.0, 0.1):
    model = LGBMClassifier(objective='multiclass',path_smooth = i)
    model.fit(X_train,y_train,categorical_feature=[0,3])
    predictions = model.predict(X_test)
    error = sum(predictions!=y_test)/len(y_test)
    err_list.append(error)
err_list

# LDA

In [50]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [83]:
lda = LDA(n_components=9)

In [108]:
expected_y  = test_labels
predicted_y = lda_model.predict(test_x)
lda_pred_y = predicted_y
print(metrics.classification_report(expected_y, predicted_y))

              precision    recall  f1-score   support

           0       0.82      0.78      0.80      1000
           1       1.00      0.93      0.96      1000
           2       0.71      0.70      0.71      1000
           3       0.81      0.86      0.83      1000
           4       0.71      0.74      0.73      1000
           5       0.89      0.90      0.89      1000
           6       0.56      0.57      0.57      1000
           7       0.88      0.89      0.89      1000
           8       0.94      0.93      0.93      1000
           9       0.92      0.91      0.91      1000

    accuracy                           0.82     10000
   macro avg       0.82      0.82      0.82     10000
weighted avg       0.82      0.82      0.82     10000





In [85]:
report = metrics.classification_report(expected_y, predicted_y, output_dict=True)
pd.DataFrame(report).transpose()

Unnamed: 0,precision,recall,f1-score,support
0,0.817801,0.781,0.798977,1000.0
1,0.998929,0.933,0.96484,1000.0
2,0.713415,0.702,0.707661,1000.0
3,0.807547,0.856,0.831068,1000.0
4,0.710878,0.745,0.727539,1000.0
5,0.890765,0.897,0.893871,1000.0
6,0.561765,0.573,0.567327,1000.0
7,0.884273,0.894,0.88911,1000.0
8,0.936299,0.926,0.931121,1000.0
9,0.916331,0.909,0.912651,1000.0


# SVM

In [95]:
from sklearn import svm
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [98]:
# Use Ploynomial kernel 
svmpoly = svm.SVC(kernel='poly', degree=3, C=1).fit(train_x, train_labels)

In [113]:
expected_y  = test_labels
predicted_y = svmpoly.predict(test_x)
SVM_pred_y = predicted_y
print(metrics.classification_report(expected_y, predicted_y))

              precision    recall  f1-score   support

           0       0.79      0.84      0.81      1000
           1       0.99      0.95      0.97      1000
           2       0.80      0.78      0.79      1000
           3       0.88      0.86      0.87      1000
           4       0.83      0.76      0.79      1000
           5       0.84      0.96      0.89      1000
           6       0.66      0.69      0.68      1000
           7       0.94      0.91      0.92      1000
           8       0.97      0.94      0.95      1000
           9       0.96      0.94      0.95      1000

    accuracy                           0.86     10000
   macro avg       0.87      0.86      0.86     10000
weighted avg       0.87      0.86      0.86     10000



In [101]:
report = metrics.classification_report(expected_y, predicted_y, output_dict=True)
pd.DataFrame(report).transpose()

Unnamed: 0,precision,recall,f1-score,support
0,0.790019,0.839,0.813773,1000.0
1,0.991693,0.955,0.973001,1000.0
2,0.804145,0.776,0.789822,1000.0
3,0.878695,0.862,0.870268,1000.0
4,0.825708,0.758,0.790407,1000.0
5,0.835366,0.959,0.892924,1000.0
6,0.6625,0.689,0.67549,1000.0
7,0.93641,0.913,0.924557,1000.0
8,0.966084,0.94,0.952864,1000.0
9,0.963077,0.939,0.950886,1000.0


In [None]:
# to get the precision dictionary
out = {}
for i in range(0,10):
    out.update({i:round(report[str(i)]['precision'],2)})

In [128]:
# create df of y_pred of test set for each model
y_pred_test = pd.concat([pd.DataFrame(lightGBM_pred_y, columns = ['LightGBM']),
           pd.DataFrame(lda_pred_y, columns = ['LDA']), 
           pd.DataFrame(SVM_pred_y, columns = ['SVM'])],axis=1)

In [130]:
y_pred_test.to_csv('y_pred_test.csv')