In [1]:
from tensorflow import keras
import numpy as np

f_mnist = keras.datasets.fashion_mnist
(x_train_full, y_train_full), (x_test, y_test) = f_mnist.load_data()

x_train_full = np.reshape(x_train_full, (x_train_full.shape[0], x_train_full.shape[1] * x_train_full.shape[2]))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1] * x_test.shape[2]))

print(f"Training dataset: {x_train_full.shape}, {x_train_full.dtype}")
print(f"Testing dataset: {x_test.shape}, {x_test.dtype}")

x_train = x_train_full / 255.0
x_test = x_test / 255.0

y_train = y_train_full

2024-04-29 18:30:56.098036: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-29 18:30:56.099830: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 18:30:56.131321: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 18:30:56.131348: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 18:30:56.132310: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

Training dataset: (60000, 784), uint8
Testing dataset: (10000, 784), uint8


In [2]:
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [3]:
class_names = [
    'T-shirt',
    'Trouser',
    'Pullover',
    'Dress',
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle boot'
]

class_names[y_train[0]]

'Ankle boot'

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

from sklearn.decomposition import PCA
from sklearn.random_projection import GaussianRandomProjection
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


classifying_models = {
    'Decision Tree': DecisionTreeClassifier(),
    'K-Neighbors': KNeighborsClassifier(),
    'Linear SVC': LinearSVC(dual=True),
    'Logistic Regression': LogisticRegression(solver='lbfgs')
}

dimensionality_methods = {
    'Original Dataset': lambda X, y: (x_train_scaled, y_train),
    'PCA': PCA(),
    'Random Projection': GaussianRandomProjection(n_components=100),
    'LDA': LinearDiscriminantAnalysis()
}

classifying_analyze = {method: [] for method in dimensionality_methods}
classifying_analyze_time = {method: [] for method in dimensionality_methods}

In [5]:
import time


for model_name, model in classifying_models.items():
    for method_name, dimensionality_reducer in dimensionality_methods.items():
        model_instance = type(model).__name__
        if model_instance == 'LogisticRegression':
            model = LogisticRegression(solver='lbfgs', fit_intercept=True)

        print(f"Running {model_instance} with {method_name}\n")

        if method_name == "Original Dataset":
            start = time.time()
            model.fit(x_train_scaled, y_train)
            end = time.time()

            accuracy = model.score(x_test_scaled, y_test)

            classifying_analyze_time[method_name].append(end - start)
            classifying_analyze[method_name].append(accuracy)
            
            continue
            
        # Навчання моделі з використанням методу розмірності даних
        start = time.time()
        x_train_reduced = dimensionality_reducer.fit_transform(x_train_scaled, y_train)
        model.fit(x_train_reduced, y_train)
        end = time.time()

        # Оцінка результату
        x_test_reduced = dimensionality_reducer.transform(x_test_scaled)
        accuracy = model.score(x_test_reduced, y_test)

        # Збереження результатів
        classifying_analyze_time[method_name].append(end - start)
        classifying_analyze[method_name].append(accuracy)

Running DecisionTreeClassifier with Original Dataset

Running DecisionTreeClassifier with PCA

Running DecisionTreeClassifier with Random Projection

Running DecisionTreeClassifier with LDA

Running KNeighborsClassifier with Original Dataset

Running KNeighborsClassifier with PCA

Running KNeighborsClassifier with Random Projection

Running KNeighborsClassifier with LDA

Running LinearSVC with Original Dataset





Running LinearSVC with PCA





Running LinearSVC with Random Projection





Running LinearSVC with LDA





Running LogisticRegression with Original Dataset



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Running LogisticRegression with PCA



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Running LogisticRegression with Random Projection



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Running LogisticRegression with LDA



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [6]:
classifying_analyze, classifying_analyze_time

({'Original Dataset': [0.7889, 0.8533, 0.8284, 0.8439],
  'PCA': [0.7486, 0.8533, 0.8283, 0.8439],
  'Random Projection': [0.7026, 0.838, 0.7704, 0.8103],
  'LDA': [0.77, 0.8221, 0.8169, 0.8241]},
 {'Original Dataset': [50.70191478729248,
   0.08204102516174316,
   1059.1568534374237,
   18.521929502487183],
  'PCA': [278.8157603740692,
   14.723002672195435,
   754.4843339920044,
   30.00542664527893],
  'Random Projection': [56.044333696365356,
   0.35141491889953613,
   248.42104721069336,
   6.380276441574097],
  'LDA': [30.990924835205078,
   16.129300832748413,
   44.38417983055115,
   18.47638750076294]})

In [7]:
classifying_analyze['Model'] = ['Decision Tree', 'K-Neighbors', 'Linear SVC', 'Logistic Regression']
classifying_analyze_time['Model'] = ['Decision Tree', 'K-Neighbors', 'Linear SVC', 'Logistic Regression']

In [8]:
import pandas as pd


data = classifying_analyze
df = pd.DataFrame.from_dict(data)

df.set_index('Model')

Unnamed: 0_level_0,Original Dataset,PCA,Random Projection,LDA
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Decision Tree,0.7889,0.7486,0.7026,0.77
K-Neighbors,0.8533,0.8533,0.838,0.8221
Linear SVC,0.8284,0.8283,0.7704,0.8169
Logistic Regression,0.8439,0.8439,0.8103,0.8241


In [9]:
data = classifying_analyze_time
df = pd.DataFrame.from_dict(data)

df.set_index('Model')

Unnamed: 0_level_0,Original Dataset,PCA,Random Projection,LDA
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Decision Tree,50.701915,278.81576,56.044334,30.990925
K-Neighbors,0.082041,14.723003,0.351415,16.129301
Linear SVC,1059.156853,754.484334,248.421047,44.38418
Logistic Regression,18.52193,30.005427,6.380276,18.476388
