In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('hdddata.csv')

# Select two classes for binary classification
class_1 = 0  # Replace with the desired class
class_2 = 1  # Replace with the desired class

# Filter the dataset for the selected classes
binary_data = data[(data['failure'] == class_1) | (data['failure'] == class_2)]

# Separate features and target variable
X = binary_data.drop('failure', axis=1)  # Features
y = binary_data['failure']  # Target variable

# Impute missing values (NaN) with the mean of the column
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Splitting the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Initializing SVM classifier
clf = svm.SVC()

# Training the SVM classifier
clf.fit(X_train, y_train)

# A1. Get the support vectors and study them
support_vectors = clf.support_vectors_


In [None]:
# A2. Test the accuracy of the SVM
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)


In [None]:
# A3. Using the predict function to study output values
predicted_classes = clf.predict(X_test)
print("Predicted classes:", predicted_classes)

In [None]:
# Testing accuracy using own logic of class determination and comparing against class labels
own_accuracy = accuracy_score(y_test, predicted_classes)
print("Accuracy using own logic:", own_accuracy)


In [None]:
# A4. Experimenting with various kernel functions
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    clf = svm.SVC(kernel=kernel)
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    print(f"Accuracy with {kernel} kernel:", accuracy)
