In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
import matplotlib.pyplot as plt

# Load the dataset
dataset = pd.read_excel('Dry_Bean_Dataset\Dry_Bean_Dataset.xlsx')

# Ensure dataset size between 5k and 30k
if len(dataset) < 5000 or len(dataset) > 30000:
    print("Dataset size is not within the specified range.")
    exit()

# Split the dataset into training and testing sets with 10 different samples
samples = []
for i in range(10):
    train_data, test_data = train_test_split(dataset, test_size=0.3, random_state=i)
    X_train, X_test = train_data.drop(columns=['Class']), test_data.drop(columns=['Class'])
    y_train, y_test = train_data['Class'], test_data['Class']
    samples.append((X_train, X_test, y_train, y_test))

# Define parameter grid for GridSearchCV
param_grid = {'C': [0.1, 1, 10],
              'gamma': [0.001, 0.01, 0.1, 1],
              'kernel': ['linear', 'rbf', 'poly']}

best_accuracies = []
best_params = []

# Iterate over samples
for i, (X_train, X_test, y_train, y_test) in enumerate(samples):
    print(f"Processing sample {i+1}...")
    
    # Initialize SVM classifier
    svm = SVC()
    
    # Perform grid search
    grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    # Get best accuracy and parameters
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    
    best_accuracies.append(best_accuracy)
    best_params.append(best_param)
    
    print(f"Best accuracy for sample {i+1}: {best_accuracy:.4f}")
    print(f"Best parameters: {best_param}")

# Find the sample with maximum accuracy
max_accuracy_index = np.argmax(best_accuracies)
max_accuracy_sample = max_accuracy_index + 1
max_accuracy = best_accuracies[max_accuracy_index]
max_accuracy_params = best_params[max_accuracy_index]

print(f"\nSample with maximum accuracy (Sample {max_accuracy_sample}):")
print(f"Best accuracy: {max_accuracy:.4f}")
print(f"Best parameters: {max_accuracy_params}")

# Plot convergence graph for the sample with maximum accuracy
X_train, X_test, y_train, y_test = samples[max_accuracy_index]
svm = SVC(**max_accuracy_params)
svm.fit(X_train, y_train)

# Get convergence history
convergence_history = svm.decision_function(X_test)

# Plot convergence graph
plt.plot(convergence_history)
plt.xlabel('Iterations')
plt.ylabel('Accuracy')
plt.title('Fitness')
plt.show()


Processing sample 1...


In [10]:

# Importing data
data = pd.read_excel('Dry_Bean_Dataset\Dry_Bean_Dataset.xlsx')
data = data.sample(frac=1).reset_index(drop=True)  # Shuffle the data
data['Class'] = pd.Categorical(data['Class'])

iteration = 1000
kernel_list = ['rbfdot', 'polydot', 'vanilladot', 'tanhdot', 'laplacedot', 'anovadot']
# Accuracy dataframe
df = pd.DataFrame(columns=['Sample', 'Best Accuracy', 'Kernel', 'Nu', 'Epsilon'])

final_acc = 0



In [11]:
# Generating samples
my_data = [data.sample(n=1000, replace=False) for i in range(10)]


In [21]:
from sklearn.svm import NuSVC
from sklearn.preprocessing import MinMaxScaler
import random

kernel_list = ['linear', 'poly', 'rbf', 'sigmoid'] 

for i in range(10):
    current = my_data[i]
    train_data, test_data = train_test_split(current, test_size=0.3, random_state=i)
  
    best_accuracy = 0
    best_kernel = ""
    best_epsilon = 0
    acc = []
    iter_ = []

    for j in range(1, iteration + 1):
        k = random.choice(kernel_list)
        e = random.random()

        X_train = train_data.iloc[:, :-1]
        y_train = train_data.iloc[:, -1]
        X_test = test_data.iloc[:, :-1]
        y_test = test_data.iloc[:, -1]

        # Scale the input features using Min-Max scaling
        scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train the NuSVC model using the scaled data
        model = NuSVC(kernel=k, gamma='auto')
        model.fit(X_train_scaled, y_train)
        
        predicted = model.predict(X_test)
        accuracy = round(np.mean(y_test == predicted) * 100, 2)
        
        if j % 50 == 0:
            acc.append(accuracy)
            iter_.append(j)
        
        if accuracy > best_accuracy:
            best_kernel = k
            best_epsilon = e
            best_accuracy = accuracy
    
    if best_accuracy > final_acc:
        final_acc = best_accuracy
        plt.figure()
        plt.plot(iter_, acc)
        plt.xlabel('Iterations')
        plt.ylabel('Accuracy')
        plt.title('Fitness(BestAcc)')
        plt.xticks(np.arange(0, iteration+1, step=100))
        plt.yticks(np.arange(0, 101, step=10))
        plt.grid(True)
        plt.show()
    
    df = df.append({'Sample': i+1, 'Best Accuracy': best_accuracy, 
                    'Kernel': best_kernel, 'Epsilon': round(best_epsilon, 2)}, ignore_index=True)


ValueError: specified nu is infeasible

In [4]:
# Split dataset into training and testing sets with 10 different samples
samples = []
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
    samples.append((X_train, X_test, y_train, y_test))


In [5]:
# Define parameter grid for GridSearchCV
param_grid = {'kernel': ['linear', 'rbf', 'poly'],
              'nu': [0.1, 0.3, 0.5],
              'epsilon': [0.1, 0.2, 0.3]}

best_accuracies = []
best_params = []


In [6]:

# Iterate over samples
for i, (X_train, X_test, y_train, y_test) in enumerate(samples):
    print(f"Processing sample {i+1}...")
    
    # Initialize SVM classifier
    svm = SVC()
    
    # Perform grid search
    grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    # Get best accuracy and parameters
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    
    best_accuracies.append(best_accuracy)
    best_params.append(best_param)
    
    print(f"Best accuracy for sample {i+1}: {best_accuracy:.4f}")
    print(f"Best parameters: {best_param}")


Processing sample 1...


ValueError: Invalid parameter 'epsilon' for estimator SVC(). Valid parameters are: ['C', 'break_ties', 'cache_size', 'class_weight', 'coef0', 'decision_function_shape', 'degree', 'gamma', 'kernel', 'max_iter', 'probability', 'random_state', 'shrinking', 'tol', 'verbose'].

In [None]:
# Find the sample with maximum accuracy
max_accuracy_index = np.argmax(best_accuracies)
max_accuracy_sample = max_accuracy_index + 1
max_accuracy = best_accuracies[max_accuracy_index]
max_accuracy_params = best_params[max_accuracy_index]

print(f"\nSample with maximum accuracy (Sample {max_accuracy_sample}):")
print(f"Best accuracy: {max_accuracy:.4f}")
print(f"Best parameters: {max_accuracy_params}")



In [None]:

# Plot convergence graph for the sample with maximum accuracy
X_train, X_test, y_train, y_test = samples[max_accuracy_index]
svm = SVC(**max_accuracy_params)
svm.fit(X_train, y_train)

# Get convergence history
convergence_history = svm.decision_function(X_test)

# Plot convergence graph
plt.plot(convergence_history)
plt.xlabel('Iterations')
plt.ylabel('Accuracy')
plt.title('Fitness')
plt.show()
