In [1]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns
import joblib  





In [3]:
# MNIST Datensatz aus keras laden
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [4]:
# Das Shape des Datensatzes ausgeben
print("Ursprungs Shape des Trainingsdatensatzes:", x_train.shape)
print("Ursprungs Shape des Testdatensatzes:", x_test.shape)

Ursprungs Shape des Trainingsdatensatzes: (60000, 28, 28)
Ursprungs Shape des Testdatensatzes: (10000, 28, 28)


In [5]:
# Den Datensatz für ein SVM vorbereiten

# Flatten der Bildmatrizen für die SVM (von 28x28 zu 784)
x_train_svm = x_train.reshape((x_train.shape[0], -1))
x_test_svm = x_test.reshape((x_test.shape[0], -1))
# Standardisierung der Feature-Werte
scaler = StandardScaler()
x_train_svm = scaler.fit_transform(x_train_svm)
x_test_svm = scaler.transform(x_test_svm)

In [6]:
# Das Shape des Datensatzes ausgeben
print("SVM Shape des Trainingsdatensatzes:", x_train_svm.shape)
print("SVM Shape des Testdatensatzes:", x_test_svm.shape)

SVM Shape des Trainingsdatensatzes: (60000, 784)
SVM Shape des Testdatensatzes: (10000, 784)


In [7]:
# Parameteroptimierung mit GridSearchCV

# Parametergitter definieren
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.01, 'scale', 0.0001],
    'kernel': ['rbf']
}

# GridSearchCV initialisieren
grid_search = GridSearchCV(svm.SVC(), param_grid, verbose=2, cv=3, n_jobs=6, scoring='accuracy')

# Auf den Trainingsdaten fitten
grid_search.fit(x_train_svm, y_train)

# Beste Parameter-Kombination ausgeben
print("Beste Parameter-Kombination:", grid_search.best_params_)



Fitting 3 folds for each of 9 candidates, totalling 27 fits
Beste Parameter-Kombination: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [10]:
import pandas as pd

# Save cv_results_df to a CSV file
cv_results_df = pd.DataFrame(grid_search.cv_results_)
cv_results_df.to_csv('svm_v2_opt_gridcv_results_c0.1_1_10_100.csv', index=False)
print(cv_results_df)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0    4472.785324      5.999194      1638.966583        4.068859     0.1   
1    1250.977011      1.411624      1254.975400        2.121311     0.1   
2    1898.269149      3.110241      1330.855350        2.776589     0.1   
3    3730.482477     17.826703      1582.329841        4.659099       1   
4     789.476683      1.398656       956.037579        3.935774       1   
5     939.437677      3.857407      1061.316651        1.405752       1   
6    4128.293020     16.841308       814.261444       18.284112      10   
7     743.236215      3.843901       935.673332        4.929686      10   
8     608.647060      3.951744       844.255648       12.480740      10   

  param_gamma param_kernel                                         params  \
0        0.01          rbf     {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}   
1       scale          rbf  {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}   
2      0.0001     