### CODING

In [1]:
#tensorflow and tf.keras
import tensorflow as tf 

#Library 
import numpy as np 
import matplotlib.pyplot as plt 

from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

import time

fashion_mnist = tf.keras.datasets.fashion_mnist

(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

X_train, X_test = X_train / 255.0, X_test / 255.0 




In [2]:
print(X_train[0].shape)
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
print(X_train[0].shape)

(28, 28)
(784,)


In [17]:
C_values = [0.1, 1, 10, 100]  # thay đổi giá trị này theo nhu cầu của bạn

for C in C_values:
    start_time = time.time()
    clf = svm.SVC(kernel='linear', C=C)
    clf.fit(X_train, y_train)
    end_time = time.time()

    train_preds = clf.predict(X_train)
    val_preds = clf.predict(X_test)

    train_error = mean_squared_error(y_train, train_preds)
    val_error = mean_squared_error(y_test, val_preds)
    training_time = end_time - start_time

    print(f'For C={C}:')
    print(f'Training error: {train_error}')
    print(f'Validation error: {val_error}')
    print(f'Training time: {training_time}s')
    print('----------------')

For C=0.1:
Training error: 1.4967166666666667
Validation error: 1.9247
Training time: 199.95183658599854s
----------------
For C=1:
Training error: 1.3110333333333333
Validation error: 2.1206
Training time: 272.2981026172638s
----------------
For C=10:
Training error: 1.1960666666666666
Validation error: 2.1991
Training time: 697.199474811554s
----------------
For C=100:
Training error: 1.1581833333333333
Validation error: 2.2193
Training time: 5997.492079257965s
----------------


- The SVM which has the smallest error on Validation dataset has C=0.1. 
- Now we evaluate the SVM and compare it to other models.

### Compare with other model
1. Logistic Regression 
2. Neural Network 
3. SVM 

In [3]:
#Support library
from sklearn.metrics import accuracy_score, classification_report

#### 1. Logistic Regression 

In [48]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

train_preds = model.predict(X_train)
val_preds = model.predict(X_test)

train_error = mean_squared_error(y_train, train_preds)
val_error = mean_squared_error(y_test, val_preds)

print(f'Training error: {train_error}')
print(f'Validation error: {val_error}')

train_accuracy = accuracy_score(y_train, train_preds)
test_accuracy = accuracy_score(y_test, val_preds)

print("Train Accuracy score: {}".format(train_accuracy))
print("Test Accuracy score: {}".format(test_accuracy))
print(classification_report(y_test, val_preds))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training error: 1.7360666666666666
Validation error: 1.9616
Train Accuracy score: 0.86625
Test Accuracy score: 0.8439
              precision    recall  f1-score   support

           0       0.81      0.81      0.81      1000
           1       0.97      0.96      0.97      1000
           2       0.73      0.73      0.73      1000
           3       0.83      0.87      0.85      1000
           4       0.73      0.76      0.74      1000
           5       0.94      0.93      0.93      1000
           6       0.62      0.57      0.59      1000
           7       0.90      0.94      0.92      1000
           8       0.94      0.94      0.94      1000
           9       0.95      0.94      0.94      1000

    accuracy                           0.84     10000
   macro avg       0.84      0.84      0.84     10000
weighted avg       0.84      0.84      0.84     10000



#### 2. Neural Network 

In [53]:
model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=100)
model.fit(X_train, y_train)

train_preds = model.predict(X_train)
val_preds = model.predict(X_test)

train_error = mean_squared_error(y_train, train_preds)
val_error = mean_squared_error(y_test, val_preds)

print(f'Training error: {train_error}')
print(f'Validation error: {val_error}')

train_accuracy = accuracy_score(y_train, train_preds)
test_accuracy = accuracy_score(y_test, val_preds)

print("Train Accuracy score: {}".format(train_accuracy))
print("Test Accuracy score: {}".format(test_accuracy))
print(classification_report(y_test, val_preds))



Training error: 0.3310166666666667
Validation error: 1.5678
Train Accuracy score: 0.9739666666666666
Test Accuracy score: 0.8846
              precision    recall  f1-score   support

           0       0.82      0.85      0.83      1000
           1       0.98      0.98      0.98      1000
           2       0.75      0.84      0.79      1000
           3       0.89      0.89      0.89      1000
           4       0.79      0.82      0.81      1000
           5       0.97      0.95      0.96      1000
           6       0.76      0.62      0.69      1000
           7       0.95      0.97      0.96      1000
           8       0.97      0.97      0.97      1000
           9       0.96      0.96      0.96      1000

    accuracy                           0.88     10000
   macro avg       0.88      0.88      0.88     10000
weighted avg       0.88      0.88      0.88     10000



#### 3. SVM 

In [4]:
model = svm.SVC(kernel='linear', C=0.1)
model.fit(X_train, y_train)

train_preds = model.predict(X_train)
val_preds = model.predict(X_test)

train_error = mean_squared_error(y_train, train_preds)
val_error = mean_squared_error(y_test, val_preds)

print(f'Training error: {train_error}')
print(f'Validation error: {val_error}')

train_accuracy = accuracy_score(y_train, train_preds)
test_accuracy = accuracy_score(y_test, val_preds)

print("Train Accuracy score: {}".format(train_accuracy))
print("Test Accuracy score: {}".format(test_accuracy))
print(classification_report(y_test, val_preds))

Training error: 1.4967166666666667
Validation error: 1.9247
Train Accuracy score: 0.8888166666666667
Test Accuracy score: 0.8561
              precision    recall  f1-score   support

           0       0.78      0.83      0.80      1000
           1       0.98      0.96      0.97      1000
           2       0.75      0.77      0.76      1000
           3       0.85      0.87      0.86      1000
           4       0.77      0.78      0.77      1000
           5       0.95      0.94      0.94      1000
           6       0.65      0.57      0.61      1000
           7       0.92      0.94      0.93      1000
           8       0.95      0.95      0.95      1000
           9       0.96      0.95      0.95      1000

    accuracy                           0.86     10000
   macro avg       0.85      0.86      0.86     10000
weighted avg       0.85      0.86      0.86     10000



### Research about C parameter in Support Vector Machine 

One of the most important parameters in the SVM is the C parameter, which plays a crucial role in determining the balance between achieving a `low training error` and `allowing for misclassifications`.

### Bình luận kết quả