In [4]:
import joblib

from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.svm import SVC


#--------------------------------------------------------------------------------------------------
mnist = fetch_openml('mnist_784', version = 1, cache = True, as_frame = False)

X = mnist["data"].astype("float32")
y = mnist["target"].astype(int)


#--------------------------------------------------------------------------------------------------
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=10000, random_state=42, stratify=y)  # stratify - att varje siffra (0–9) får ungefär samma andel i varje låda.
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42, stratify=y_train_val)

preprocessor = StandardScaler()


In [5]:
BEST_PCA = 75
BEST_C = 14
BEST_GAMMA = "scale"

final_model_svc = Pipeline([
    ("scaler", preprocessor),
    ("pca", PCA(n_components=BEST_PCA, random_state=42, svd_solver="randomized")),
    ("model", SVC(kernel="rbf", C=BEST_C, gamma=BEST_GAMMA, cache_size=500))
])

#Tränar modellen på hela dataset
final_model_svc.fit(X, y)

#Sparar modellen
joblib.dump(final_model_svc, "svc_mnist.joblib")

meta = {
    "pca": 75,
    "C": 14,
    "gamma": "scale",
    "input_shape": (784,),
    "pixel_range": "0-255"
}

joblib.dump(meta, "svc_mnist_meta.joblib")

print("Sparat: svc_mnist.joblib och svc_mnist_meta.joblib")


Sparat: svc_mnist.joblib och svc_mnist_meta.joblib
