In [1]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns
import joblib  





In [2]:
# MNIST Datensatz aus keras laden
(x_train, y_train), (x_test, y_test) = fashion_mnistmnist.load_data()

In [3]:
# Das Shape des Datensatzes ausgeben
print("Ursprungs Shape des Trainingsdatensatzes:", x_train.shape)
print("Ursprungs Shape des Testdatensatzes:", x_test.shape)

Ursprungs Shape des Trainingsdatensatzes: (60000, 28, 28)
Ursprungs Shape des Testdatensatzes: (10000, 28, 28)


In [4]:
# Den Datensatz für ein SVM vorbereiten

# Flatten der Bildmatrizen für die SVM (von 28x28 zu 784)
x_train_svm = x_train.reshape((x_train.shape[0], -1))
x_test_svm = x_test.reshape((x_test.shape[0], -1))
# Standardisierung der Feature-Werte
scaler = StandardScaler()
x_train_svm = scaler.fit_transform(x_train_svm)
x_test_svm = scaler.transform(x_test_svm)

In [5]:
# Das Shape des Datensatzes ausgeben
print("SVM Shape des Trainingsdatensatzes:", x_train_svm.shape)
print("SVM Shape des Testdatensatzes:", x_test_svm.shape)

SVM Shape des Trainingsdatensatzes: (60000, 784)
SVM Shape des Testdatensatzes: (10000, 784)


In [7]:
# Parameteroptimierung mit GridSearchCV

# Parametergitter definieren
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.01, 'scale', 0.0001],
    'kernel': ['rbf']
}

# GridSearchCV initialisieren
grid_search = GridSearchCV(svm.SVC(), param_grid, verbose=2, cv=3, n_jobs=6, scoring='accuracy')

# Auf den Trainingsdaten fitten
grid_search.fit(x_train_svm, y_train)

# Beste Parameter-Kombination ausgeben
print("Beste Parameter-Kombination:", grid_search.best_params_)



Fitting 3 folds for each of 9 candidates, totalling 27 fits
Beste Parameter-Kombination: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [8]:
import pandas as pd

# Save cv_results_df to a CSV file
cv_results_df = pd.DataFrame(grid_search.cv_results_)
cv_results_df.to_csv('svm_v2_opt_gridcv_results_c0.1_1_10_100.csv', index=False)
print(cv_results_df)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0    6623.743119     11.904728      1987.636552        4.399732     0.1   
1    2295.752257      3.718161      1835.309420        6.630185     0.1   
2    3234.733570      5.802447      1786.281460        6.109205     0.1   
3    6169.913058     76.293615      2004.906676        7.209797       1   
4     947.552331      0.557573      1090.537819        2.064171       1   
5    1526.446014      3.589145      1495.457438        9.935033       1   
6    5642.066207      9.674817       783.809635        7.905184      10   
7     873.757574      6.526336       797.520130        3.552567      10   
8     572.837677      2.091722       704.425038       24.596976      10   

  param_gamma param_kernel                                         params  \
0        0.01          rbf     {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}   
1       scale          rbf  {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}   
2      0.0001     