In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report,accuracy_score

# Load MNIST dataset (this might take a while the first time)
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data / 255., mnist.target.astype(int)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Split training set further into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Feature scaling (optional but recommended for SVMs)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
# Define SVM classifier (using RBF kernel as an example)
svm_clf = SVC(kernel='rbf', decision_function_shape='ovr', random_state=42)
# Hyperparameter tuning using cross-validation on the training set
param_grid = {'C': [1, 10, 100], 'gamma': [0.01, 0.1, 1.0]}
grid_search = GridSearchCV(svm_clf, param_grid, cv=3, scoring='accuracy', verbose=2)
grid_search.fit(X_train_scaled, y_train)
# Get the best hyperparameters
best_params = grid_search.best_params_
# Train SVM classifier with best hyperparameters on the full training set
best_svm_clf = SVC(kernel='rbf', decision_function_shape='ovr', **best_params, random_state=42)
best_svm_clf.fit(X_train_scaled, y_train)
# Evaluate on the validation set
y_val_pred = best_svm_clf.predict(X_val_scaled)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_accuracy:.4f}")
# Evaluate on the test set
y_test_pred = best_svm_clf.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")
# Print classification report for test set
print(classification_report(y_test, y_test_pred))




In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Load California housing dataset
california_housing = fetch_california_housing()

# Split dataset into training and test sets
X = california_housing.data
y = california_housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM regressor (SVR) - example with RBF kernel
svm_regressor = SVR(kernel='rbf')
svm_regressor.fit(X_train_scaled, y_train)

# Predict on test set
y_pred = svm_regressor.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Optionally, perform hyperparameter tuning
# param_grid = {'C': [1, 10, 100], 'gamma': [0.01, 0.1, 1.0]}
# grid_search = GridSearchCV(svm_regressor, param_grid, cv=5, scoring='neg_mean_squared_error')
# grid_search.fit(X_train_scaled, y_train)
# best_params = grid_search.best_params_
# best_svm_regressor = SVR(kernel='rbf', **best_params)
# best_svm_regressor.fit(X_train_scaled, y_train)
# y_pred_tuned = best_svm_regressor.predict(X_test_scaled)
# mse_tuned = mean_squared_error(y_test, y_pred_tuned)
# r2_tuned = r2_score(y_test, y_pred_tuned)
# print(f"Tuned Model - Mean Squared Error (MSE): {mse_tuned:.4f}")
# print(f"Tuned Model - R-squared Score: {r2_tuned:.4f}")


Mean Squared Error (MSE): 0.3570
R-squared Score: 0.7276
