## In this notebook, we test the LibSVM and LinearSVM on the same datasets, to compare it with NSSVM!

# a5a dataset

# libsvm on a5a dataset

In [2]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a5a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 86.64%
Testing Accuracy: 84.72%
Training time: 31.50749897956848 seconds


# LinearSVM on a5a dataset

In [3]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a5a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 84.83%
Testing Accuracy: 85.41%
Training time: 1.4267833232879639 seconds


# a6a dataset

# libsvm on a6a dataset

In [4]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a6a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 86.94%
Testing Accuracy: 84.68%
Training time: 20.131935834884644 seconds


# linear svm on a6a

In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a6a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.06%
Testing Accuracy: 85.06%
Training time: 0.3125319480895996 seconds


# a7a dataset

# libsvm on a7a dataset

In [6]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a7a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 87.32%
Testing Accuracy: 83.09%
Training time: 10.666053533554077 seconds


# linear svm on a7a

In [7]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a7a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.51%
Testing Accuracy: 84.06%
Training time: 0.574347734451294 seconds


# a8a dataset

# libsvm on a8a

In [8]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a8a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 87.65%
Testing Accuracy: 84.74%
Training time: 3.4562928676605225 seconds


# linear svm on a8a data

In [9]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a8a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.77%
Testing Accuracy: 85.81%
Training time: 0.200056791305542 seconds


# a9a dataset

# libsvm on a9a

In [10]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a9a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 86.99%
Testing Accuracy: 84.56%
Training time: 10.658567905426025 seconds


# linear svm on a9a

In [11]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('./data/a9a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.42%
Testing Accuracy: 84.77%
Training time: 0.3708939552307129 seconds


# sonar, rocks vs mines dataset, UCI

# libsvm on sonar, rocks vs mines dataset

In [12]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
data = pd.read_csv(url, header=None)

# The last column contains labels: 'R' for rock and 'M' for mine
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Convert class labels 'R' and 'M' to -1 and 1
y = np.where(y == 'R', -1, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 98.19%
Testing Accuracy: 88.10%


# linearSVM on sonar, rocks vs mines dataset

In [13]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
data = pd.read_csv(url, header=None)

# The last column contains labels: 'R' for rock and 'M' for mine
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Convert class labels 'R' and 'M' to -1 and 1
y = np.where(y == 'R', -1, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 98.80%
Testing Accuracy: 80.95%


## Dhrb data

## LibSVM on dhrb data

In [14]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from scipy.io import loadmat
import matplotlib.pyplot as plt
import time

# Load the data
data_path = "./data/dhrb.mat"
class_path = "./data/dhrbclass.mat"
data = loadmat(data_path)
data_class = loadmat(class_path)

X = data['X']
y = data_class['y'].flatten()

# Adjust labels if they include negative values
if np.any(y < 0):
    # Assuming binary classification with labels -1 and 1
    y_bincount = y + 1  # Shifts -1 to 0 and 1 to 2
else:
    y_bincount = y

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Adjusting y_train and y_test for bincount
y_train_bincount = y_train + 1 if np.any(y_train < 0) else y_train
y_test_bincount = y_test + 1 if np.any(y_test < 0) else y_test

# Check label distribution
print("Training labels distribution:", np.bincount(y_train_bincount))
print("Testing labels distribution:", np.bincount(y_test_bincount))

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier.fit(X_train, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

# Plotting support vectors (optional, if dimensions allow)
if X_train.shape[1] == 2:
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='winter', marker='o', alpha=0.5)
    plt.scatter(svm_classifier.support_vectors_[:, 0], svm_classifier.support_vectors_[:, 1], c='red')
    plt.title("SVM Support Vectors")
    plt.show()


Training labels distribution: [39473     0  8172]
Testing labels distribution: [9823    0 2089]
Training Accuracy: 83.95%
Testing Accuracy: 83.19%


## Linear SVM on dhrb data

In [15]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
from scipy.io import loadmat
import matplotlib.pyplot as plt

# Load the data
data_path = "./data/dhrb.mat"
class_path = "./data/dhrbclass.mat"
data = loadmat(data_path)
data_class = loadmat(class_path)

X = data['X']
y = data_class['y'].flatten()

# Adjust labels if they include negative values
if np.any(y < 0):
    # Assuming binary classification with labels -1 and 1
    y_bincount = y + 1  # Shifts -1 to 0 and 1 to 2
else:
    y_bincount = y

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Adjusting y_train and y_test for bincount
y_train_bincount = y_train + 1 if np.any(y_train < 0) else y_train
y_test_bincount = y_test + 1 if np.any(y_test < 0) else y_test

# Check label distribution
print("Training labels distribution:", np.bincount(y_train_bincount))
print("Testing labels distribution:", np.bincount(y_test_bincount))

# Initialize and train the SVM with RBF kernel
svm_classifier = LinearSVC(C=1.0, dual=False)
svm_classifier.fit(X_train, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

# Plotting support vectors (optional, if dimensions allow)
if X_train.shape[1] == 2:
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='winter', marker='o', alpha=0.5)
    plt.scatter(svm_classifier.support_vectors_[:, 0], svm_classifier.support_vectors_[:, 1], c='red')
    plt.title("SVM Support Vectors")
    plt.show()


Training labels distribution: [39473     0  8172]
Testing labels distribution: [9823    0 2089]
Training Accuracy: 83.37%
Testing Accuracy: 83.17%


## Iris dataset

## libsvm on iris dataset

In [17]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 96.67%
Testing Accuracy: 100.00%


## Linear SVC on iris dataset

In [19]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = LinearSVC(C=1.0, random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 95.00%
Testing Accuracy: 100.00%
