In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import time

## libsvm

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from scipy.io import loadmat
import matplotlib.pyplot as plt

# Load the data
data_path = "/content/drive/MyDrive/ml_data/dhrb.mat"
class_path = "/content/drive/MyDrive/ml_data/dhrbclass.mat"
data = loadmat(data_path)
data_class = loadmat(class_path)

X = data['X']
y = data_class['y'].flatten()

# Adjust labels if they include negative values
if np.any(y < 0):
    # Assuming binary classification with labels -1 and 1
    y_bincount = y + 1  # Shifts -1 to 0 and 1 to 2
else:
    y_bincount = y

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Adjusting y_train and y_test for bincount
y_train_bincount = y_train + 1 if np.any(y_train < 0) else y_train
y_test_bincount = y_test + 1 if np.any(y_test < 0) else y_test

# Check label distribution
print("Training labels distribution:", np.bincount(y_train_bincount))
print("Testing labels distribution:", np.bincount(y_test_bincount))

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier.fit(X_train, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

# Plotting support vectors (optional, if dimensions allow)
if X_train.shape[1] == 2:
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='winter', marker='o', alpha=0.5)
    plt.scatter(svm_classifier.support_vectors_[:, 0], svm_classifier.support_vectors_[:, 1], c='red')
    plt.title("SVM Support Vectors")
    plt.show()


Training labels distribution: [39473     0  8172]
Testing labels distribution: [9823    0 2089]


## grid search for best params

In [None]:
# from sklearn.model_selection import GridSearchCV

# # Define the parameter grid
# param_grid = {
#     'C': [0.1, 1, 10],
#     'gamma': ['scale', 'auto', 0.1, 0.01],
#     'kernel': ['rbf']
# }

# # Initialize the SVM classifier
# svm_classifier = SVC()

# # Initialize GridSearchCV
# grid_search = GridSearchCV(estimator=svm_classifier, param_grid=param_grid, cv=5)

# # Perform grid search
# grid_search.fit(X_train, y_train)

# # Get the best parameters and best score
# best_params = grid_search.best_params_
# best_score = grid_search.best_score_

# print("Best Parameters:", best_params)
# print("Best Cross-Validation Score:", best_score)

# # Predictions and evaluations with best parameters
# best_svm_classifier = grid_search.best_estimator_
# y_pred_train_best = best_svm_classifier.predict(X_train)
# y_pred_test_best = best_svm_classifier.predict(X_test)

# train_accuracy_best = accuracy_score(y_train, y_pred_train_best)
# test_accuracy_best = accuracy_score(y_test, y_pred_test_best)

# print(f"Training Accuracy with Best Parameters: {train_accuracy_best * 100:.2f}%")
# print(f"Testing Accuracy with Best Parameters: {test_accuracy_best * 100:.2f}%")


## linear svm

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
from scipy.io import loadmat
import matplotlib.pyplot as plt

# Load the data
data_path = "/content/drive/MyDrive/ml_data/dhrb.mat"
class_path = "/content/drive/MyDrive/ml_data/dhrbclass.mat"
data = loadmat(data_path)
data_class = loadmat(class_path)

X = data['X']
y = data_class['y'].flatten()

# Adjust labels if they include negative values
if np.any(y < 0):
    # Assuming binary classification with labels -1 and 1
    y_bincount = y + 1  # Shifts -1 to 0 and 1 to 2
else:
    y_bincount = y

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Adjusting y_train and y_test for bincount
y_train_bincount = y_train + 1 if np.any(y_train < 0) else y_train
y_test_bincount = y_test + 1 if np.any(y_test < 0) else y_test

# Check label distribution
print("Training labels distribution:", np.bincount(y_train_bincount))
print("Testing labels distribution:", np.bincount(y_test_bincount))

# Initialize and train the SVM with RBF kernel
svm_classifier = LinearSVC(C=1.0, dual=False)
svm_classifier.fit(X_train, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

# Plotting support vectors (optional, if dimensions allow)
if X_train.shape[1] == 2:
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='winter', marker='o', alpha=0.5)
    plt.scatter(svm_classifier.support_vectors_[:, 0], svm_classifier.support_vectors_[:, 1], c='red')
    plt.title("SVM Support Vectors")
    plt.show()


# a5a.t dataset

# libsvm on a5a dataset

In [3]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a5a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 86.64%
Testing Accuracy: 84.72%
Training time: 39.91389226913452 seconds


# LinearSVM on a5a dataset

In [4]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a5a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 84.83%
Testing Accuracy: 85.41%
Training time: 2.8468470573425293 seconds


# a6a dataset

# libsvm on a6a dataset

In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a6a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 86.94%
Testing Accuracy: 84.68%
Training time: 36.4747428894043 seconds


# linear svm on a6a

In [6]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a6a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.06%
Testing Accuracy: 85.06%
Training time: 0.6994783878326416 seconds


# a7a dataset

# libsvm on a7a dataset

In [7]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a7a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 87.32%
Testing Accuracy: 83.09%
Training time: 21.541465044021606 seconds


# linear svm on a7a

In [8]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a7a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.51%
Testing Accuracy: 84.06%
Training time: 1.7233028411865234 seconds


# a8a dataset

# libsvm on a8a

In [14]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a8a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 87.65%
Testing Accuracy: 84.74%
Training time: 2.7495572566986084 seconds


# linear svm on a8a data

In [10]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a8a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.77%
Testing Accuracy: 85.81%
Training time: 0.6738629341125488 seconds


# a9a dataset

# libsvm on a9a

In [12]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a9a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 86.99%
Testing Accuracy: 84.56%
Training time: 19.59955334663391 seconds


# linear svm on a9a

In [13]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a9a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)

start_time = time.time()
svm_classifier.fit(X_train_scaled, y_train)
elapsed_time = time.time() - start_time

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")
print("Training time:", elapsed_time, "seconds")


Training Accuracy: 85.42%
Testing Accuracy: 84.77%
Training time: 0.563878059387207 seconds


# sonar, rocks vs mines dataset, UCI

# libsvm on sonar, rocks vs mines dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
data = pd.read_csv(url, header=None)

# The last column contains labels: 'R' for rock and 'M' for mine
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Convert class labels 'R' and 'M' to -1 and 1
y = np.where(y == 'R', -1, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM with RBF kernel
svm_classifier = SVC(C=1.0, kernel='rbf', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 98.19%
Testing Accuracy: 88.10%


# linearSVM on sonar, rocks vs mines dataset

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file

# Load the dataset from the LIBSVM format file
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
data = pd.read_csv(url, header=None)

# The last column contains labels: 'R' for rock and 'M' for mine
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Convert class labels 'R' and 'M' to -1 and 1
y = np.where(y == 'R', -1, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear SVM
svm_classifier = LinearSVC(C=1.0, dual=False, random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions and evaluations
y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 98.80%
Testing Accuracy: 80.95%
