In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import NuSVC
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os


In [2]:
# Load the Wine Quality dataset from UCI repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, delimiter=';')

# Prepare the data
X = data.drop('quality', axis=1)
y = data['quality']

# Define the parameters for SVM optimization
parameters = {'kernel': ['linear', 'poly', 'rbf'], 
              'nu': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9], 
              'coef0': np.linspace(0.0, 10.0, 11)}


In [3]:
# Initialize variables to store results across all samples
all_best_params_list = []
all_accuracy_list = []
all_max_accuracy = 0
all_max_accuracy_sample = None

In [7]:
from sklearn.metrics import accuracy_score
# Perform optimization for 10 samples
for sample in range(10):
    # Initialize variables for this sample
    best_accuracy = 0
    best_params = None
    max_accuracy = 0
    max_accuracy_sample = None
    accuracy_list = []
    
    # Split the data into training and testing sets for this sample
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=sample)

    # Perform optimization with 100 iterations for this sample
    for _ in range(100):
        try:
            # Randomly choose parameters
            params = {key: np.random.choice(value) for key, value in parameters.items()}
            
            # Train NuSVC with the chosen parameters
            svm = NuSVC(**params)
            svm.fit(X_train, y_train)
            
            # Evaluate accuracy on the test set
            accuracy = accuracy_score(y_test, svm.predict(X_test))
            
            # Update best accuracy and parameters if current accuracy is higher
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = params
            if accuracy > max_accuracy:
                max_accuracy = accuracy
        except ValueError:
            # Skip the current iteration if ValueError occurs
            continue
        
        # Append the accuracy for this iteration to the list
        accuracy_list.append(accuracy)
    
    # Append the best parameters and accuracy for this sample to the lists
    all_best_params_list.append(best_params)
    all_accuracy_list.append(accuracy_list)
    
    # Update the overall maximum accuracy and its corresponding sample
    if max_accuracy > all_max_accuracy:
        all_max_accuracy = max_accuracy
        all_max_accuracy_sample = sample


In [10]:
# Save the table of results to a CSV file
results_df = pd.DataFrame({
    "Sample#": range(1, 11),
    "Best Accuracy": [max(accuracy_list) for accuracy_list in all_accuracy_list],
    "Best SVM Parameters": [f"{params['kernel']}, {params['nu']:.2f}, {params['coef0']:.2f}" for params in all_best_params_list]
})
results_df.to_csv("results.csv", index=False)

# Save the coverage graph to a PNG file for the sample with the highest accuracy
max_accuracy_sample_index = np.argmax([max(accuracy_list) for accuracy_list in all_accuracy_list])
max_accuracy_sample_accuracy_list = all_accuracy_list[max_accuracy_sample_index]

plt.figure(figsize=(10, 6))
plt.plot(range(1, 101), max_accuracy_sample_accuracy_list)
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.title(f"Coverage Graph for Sample {max_accuracy_sample_index + 1} with Highest Accuracy (Max Accuracy: {max(max_accuracy_sample_accuracy_list):.4f})")
plt.grid(True)
plt.savefig("coverage_graph.png")
plt.show()

ValueError: All arrays must be of the same length