<a href="https://colab.research.google.com/github/praneykalra16/Parameter_Optimisation/blob/main/Parameter_Optimisation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.6-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.6


In [10]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
dry_bean = fetch_ucirepo(id=602)

# data (as pandas dataframes)
X = dry_bean.data.features
y = dry_bean.data.targets

# metadata
print(dry_bean.metadata)

# variable information
print(dry_bean.variables)


{'uci_id': 602, 'name': 'Dry Bean', 'repository_url': 'https://archive.ics.uci.edu/dataset/602/dry+bean+dataset', 'data_url': 'https://archive.ics.uci.edu/static/public/602/data.csv', 'abstract': 'Images of 13,611 grains of 7 different registered dry beans were taken with a high-resolution camera. A total of 16 features; 12 dimensions and 4 shape forms, were obtained from the grains.', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 13611, 'num_features': 16, 'feature_types': ['Integer', 'Real'], 'demographics': [], 'target_col': ['Class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2020, 'last_updated': 'Thu Mar 28 2024', 'dataset_doi': '10.24432/C50S4B', 'creators': [], 'intro_paper': {'title': 'Multiclass classification of dry beans using computer vision and machine learning techniques', 'authors': 'M. Koklu, Ilker Ali Özkan', 'published_in': 'Computers and Electronic

In [13]:
import pandas as pd

# Load the dataset
data_path = "https://archive.ics.uci.edu/static/public/602/data.csv"
bean_data = pd.read_csv(data_path)

# Display the first few rows of the dataset to understand its structure

bean_data.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRatio,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,Roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.272751,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [14]:
from sklearn.model_selection import train_test_split

# Repeat the process 10 times to get 10 different samples
num_samples = 10

# Initialize lists to store training and testing sets for each sample
X_train_samples = []
X_test_samples = []
y_train_samples = []
y_test_samples = []

# Split the dataset into 10 different samples
for i in range(num_samples):
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(bean_data.drop(columns=['Class']), bean_data['Class'], test_size=0.3, random_state=i)

    # Append the training and testing sets to the lists
    X_train_samples.append(X_train)
    X_test_samples.append(X_test)
    y_train_samples.append(y_train)
    y_test_samples.append(y_test)

# Check the shape of the training and testing sets for one sample
print("Shape of X_train for sample 1:", X_train_samples[0].shape)
print("Shape of X_test for sample 1:", X_test_samples[0].shape)
print("Shape of y_train for sample 1:", y_train_samples[0].shape)
print("Shape of y_test for sample 1:", y_test_samples[0].shape)


Shape of X_train for sample 1: (9527, 16)
Shape of X_test for sample 1: (4084, 16)
Shape of y_train for sample 1: (9527,)
Shape of y_test for sample 1: (4084,)


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Define parameter grid for SVM
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}

# Initialize lists to store best parameters and accuracies for each sample
best_params_list = []
best_accuracy_list = []

# Iterate over each sample
for i in range(num_samples):
    # Initialize SVM classifier
    svm = SVC(random_state=42)

    # Perform GridSearchCV for hyperparameter optimization
    grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1, verbose=1)
    grid_search.fit(X_train_samples[i], y_train_samples[i])

    # Record the best parameters and accuracy
    best_params_list.append(grid_search.best_params_)
    best_accuracy = grid_search.best_estimator_.score(X_test_samples[i], y_test_samples[i])
    best_accuracy_list.append(best_accuracy)

    # Print the results
    print(f"Sample {i+1}: Best parameters - {grid_search.best_params_}, Best accuracy - {best_accuracy}")

# Find the index of the sample with maximum accuracy
max_accuracy_index = best_accuracy_list.index(max(best_accuracy_list))
print(f"\nSample with maximum accuracy: Sample {max_accuracy_index+1}")

# Retrieve the best parameters for the sample with maximum accuracy
best_params_max_accuracy = best_params_list[max_accuracy_index]
print("Best parameters for sample with maximum accuracy:", best_params_max_accuracy)


In [None]:
import matplotlib.pyplot as plt

# Initialize SVM classifier with the best parameters for the sample with maximum accuracy
svm_max_accuracy = SVC(**best_params_max_accuracy, random_state=42, max_iter=100)

# Fit the SVM classifier to the training data for the sample with maximum accuracy
svm_max_accuracy.fit(X_train_samples[max_accuracy_index], y_train_samples[max_accuracy_index])

# Plot the convergence graph
plt.figure(figsize=(10, 6))
plt.plot(svm_max_accuracy.n_iter_, svm_max_accuracy.score(X_train_samples[max_accuracy_index], y_train_samples[max_accuracy_index]), marker='o', label='Training Accuracy')
plt.plot(svm_max_accuracy.n_iter_, svm_max_accuracy.score(X_test_samples[max_accuracy_index], y_test_samples[max_accuracy_index]), marker='o', label='Testing Accuracy')
plt.title('Convergence Graph for SVM (Sample with Maximum Accuracy)')
plt.xlabel('Number of Iterations')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()
