Hier:
- GridSearchCV mit gamma=scale und c= 1,10,100 testen

In [1]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns
import joblib  





In [2]:
# MNIST Datensatz aus keras laden
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
# Das Shape des Datensatzes ausgeben
print("Ursprungs Shape des Trainingsdatensatzes:", x_train.shape)
print("Ursprungs Shape des Testdatensatzes:", x_test.shape)

Ursprungs Shape des Trainingsdatensatzes: (60000, 28, 28)
Ursprungs Shape des Testdatensatzes: (10000, 28, 28)


In [4]:
# Den Datensatz für ein SVM vorbereiten

# Flatten der Bildmatrizen für die SVM (von 28x28 zu 784)
x_train_svm = x_train.reshape((x_train.shape[0], -1))
x_test_svm = x_test.reshape((x_test.shape[0], -1))
# Standardisierung der Feature-Werte
scaler = StandardScaler()
x_train_svm = scaler.fit_transform(x_train_svm)
x_test_svm = scaler.transform(x_test_svm)

In [5]:
# Das Shape des Datensatzes ausgeben
print("SVM Shape des Trainingsdatensatzes:", x_train_svm.shape)
print("SVM Shape des Testdatensatzes:", x_test_svm.shape)

SVM Shape des Trainingsdatensatzes: (60000, 784)
SVM Shape des Testdatensatzes: (10000, 784)


In [15]:
# Parameteroptimierung mit GridSearchCV

# Parametergitter definieren
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale'],
    'kernel': ['rbf']
}

# GridSearchCV initialisieren
grid_search = GridSearchCV(svm.SVC(), param_grid, verbose=1, cv=3, n_jobs=6, scoring='accuracy')

# Auf den Trainingsdaten fitten
grid_search.fit(x_train_svm, y_train)

# Beste Parameter-Kombination ausgeben
print("Beste Parameter-Kombination:", grid_search.best_params_)



Fitting 3 folds for each of 4 candidates, totalling 12 fits
Beste Parameter-Kombination: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [17]:
import pandas as pd

# Save cv_results_df to a CSV file
cv_results_df = pd.DataFrame(grid_search.cv_results_)
cv_results_df.to_csv('svm_v2_opt_gridcv_results_c0.1_1_10_100.csv', index=False)
print(cv_results_df)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0    1583.854495      3.659028      1220.538789        4.992894     0.1   
1     975.133727      0.777928       784.015747        0.158493       1   
2     724.637140      2.638047       699.712555        4.212903      10   
3     587.853033      4.259290       325.795134        3.726903     100   

  param_gamma param_kernel                                         params  \
0       scale          rbf  {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}   
1       scale          rbf    {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}   
2       scale          rbf   {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}   
3       scale          rbf  {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}   

   split0_test_score  split1_test_score  split2_test_score  mean_test_score  \
0            0.92070            0.92300            0.92515         0.922950   
1            0.95960            0.96020            0.96100         0.960267   
2