In [58]:
import openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import matplotlib.pyplot as plt

In [59]:
dataset = openml.datasets.get_dataset(45919)
X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

  dataset = openml.datasets.get_dataset(45919)


In [60]:
if y.dtype == 'object':
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

In [61]:
# SVM parameters for classification
parameters = {'kernel': ['linear', 'rbf', 'poly'],
              'C': [0.1, 1, 10]}  # Regularization parameter

In [62]:
best_accuracy = 0
best_params = None

In [63]:
for i in range(10):
    # Split imputed data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.3, random_state=i)
    
    # Initialize SVM classifier
    svc = SVC()

    # Perform grid search with cross-validation
    clf = GridSearchCV(svc, parameters, cv=5, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    
    # Store best results if current model is better
    if clf.best_score_ > best_accuracy:
        best_accuracy = clf.best_score_
        best_params = clf.best_params_
        best_sample_index = i
        best_X_train = X_train
        best_y_train = y_train
        best_X_test = X_test
        best_y_test = y_test
 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [None]:
# Initialize SVC with the best parameters
best_svc = SVC(**best_params)

# Train SVC on the best sample
best_svc.fit(best_X_train, best_y_train)

In [None]:
accuracies = []
iterations = []

In [None]:
for iteration in range(1, 101):  # Assuming 100 iterations
    best_svc.set_params(max_iter=iteration)
    best_svc.fit(best_X_train, best_y_train)
    accuracy = best_svc.score(best_X_test, best_y_test)
    accuracies.append(accuracy)
    iterations.append(iteration)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(iterations, accuracies, marker='o')
plt.title(f"Fitness (Best Accuracy: {best_accuracy})")
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.grid(True)
plt.show()