In [1]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns
import joblib  





In [2]:
# MNIST Datensatz aus keras laden
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
# Das Shape des Datensatzes ausgeben
print("Ursprungs Shape des Trainingsdatensatzes:", x_train.shape)
print("Ursprungs Shape des Testdatensatzes:", x_test.shape)

Ursprungs Shape des Trainingsdatensatzes: (60000, 28, 28)
Ursprungs Shape des Testdatensatzes: (10000, 28, 28)


In [4]:
# Den Datensatz für ein SVM vorbereiten

# Flatten der Bildmatrizen für die SVM (von 28x28 zu 784)
x_train_svm = x_train.reshape((x_train.shape[0], -1))
x_test_svm = x_test.reshape((x_test.shape[0], -1))
# Standardisierung der Feature-Werte
scaler = StandardScaler()
x_train_svm = scaler.fit_transform(x_train_svm)
x_test_svm = scaler.transform(x_test_svm)

In [5]:
# Das Shape des Datensatzes ausgeben
print("SVM Shape des Trainingsdatensatzes:", x_train_svm.shape)
print("SVM Shape des Testdatensatzes:", x_test_svm.shape)

SVM Shape des Trainingsdatensatzes: (60000, 784)
SVM Shape des Testdatensatzes: (10000, 784)


In [6]:
# Parameteroptimierung mit GridSearchCV

# Parametergitter definieren
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.01, 'scale', 0.0001],
    'kernel': ['rbf']
}

# GridSearchCV initialisieren
grid_search = GridSearchCV(svm.SVC(), param_grid, verbose=2, cv=3, n_jobs=6, scoring='f1_weighted')

# Auf den Trainingsdaten fitten
grid_search.fit(x_train_svm, y_train)

# Beste Parameter-Kombination ausgeben
print("Beste Parameter-Kombination:", grid_search.best_params_)



Fitting 3 folds for each of 9 candidates, totalling 27 fits
Beste Parameter-Kombination: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [7]:
import pandas as pd

# Save cv_results_df to a CSV file
cv_results_df = pd.DataFrame(grid_search.cv_results_)
cv_results_df.to_csv('svm_v2_mnist_gridsearch_f1.csv', index=False)
print(cv_results_df)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0    5255.046189     14.508383      1812.844063        2.089039     0.1   
1    1880.618012      3.653981      1253.910846        7.249694     0.1   
2    2806.960721      6.635479      1645.594951       16.905628     0.1   
3    4758.235481     62.515159      1745.232111        5.100744       1   
4     883.975430      1.390957       777.033884        3.321231       1   
5    1091.747019      3.332990      1095.322025       11.691308       1   
6    4962.590820     14.823364       726.900626       11.566964      10   
7     802.242517      7.715798       733.316953        2.958721      10   
8     532.510066      2.452323       649.816525       23.065792      10   

  param_gamma param_kernel                                         params  \
0        0.01          rbf     {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}   
1       scale          rbf  {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}   
2      0.0001     