#### SVM Classifier

In [39]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [40]:
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_epochs=1000):
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.n_epochs = n_epochs
        self.w = None
        self.b = None


    def load_data(self, file_path):
        columns = ['age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps',
                  'deg-malig', 'breast', 'breast-quad', 'irradiat', 'class']
        
        
        data_rows = []
        with open(file_path, 'r') as file:
            for line in file:
                values = [val.strip().strip("'") for val in line.strip().split(',')]
                data_rows.append(values)
        
        data = pd.DataFrame(data_rows, columns=columns)
        data = data.replace('?', np.nan).dropna()
        
        
        def convert_age(age_str):
            if '-' in age_str:
                start, end = map(int, age_str.split('-'))
                return (start + end) / 2
        
        data['age'] = data['age'].apply(convert_age)
        
        categorical_columns = data.select_dtypes(include=['object']).columns
        for column in categorical_columns:
            le = LabelEncoder()
            data[column] = le.fit_transform(data[column])
        
        return data


    def objective_function(self, w, X, y):
        return 0.5 * np.dot(w, w)  # 1/2||W||**2
    
    def constraint_function(self, w, X, y):
        return y * np.dot(X, w) - self.b   #yi(w.xi + b) >= 1
    
        cons = {'type': 'ineq', 'fun': constraint_function}
        self.w = np.zeros(X.shape[1])
        
        result = minimize(objective_function, self.w, constraints=cons)
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        y_label = np.where(y <= 0, -1, 1)  

        self.w = np.zeros(n_features)
        self.b = 0

        for epoch in range(self.n_epochs):
            for idx, x_i in enumerate(X):
                condition = y_label[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                
                if condition:
                    self.w -= self.learning_rate * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.learning_rate * (2 * self.lambda_param * self.w - np.dot(x_i, y_label[idx]))
                    self.b -= self.learning_rate * y_label[idx]

    def predict(self, X):
        approximation = np.dot(X, self.w) - self.b
        return np.where(approximation >= 0, 1, 0)

In [41]:
file_path = r"E:\KHU\ML\Breast Cancer dataset\Breast_Cancer_dataset.txt"
svm = SVM(learning_rate=0.001, lambda_param=0.01, n_epochs=1000)

data = svm.load_data(file_path)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)


TP = FP = TN = FN = 0
for i in range(len(y_test)):
    if y_test[i] == 1 and y_pred[i] == 1:
        TP += 1
    elif y_test[i] == 0 and y_pred[i] == 1:
        FP += 1
    elif y_test[i] == 0 and y_pred[i] == 0:
        TN += 1
    elif y_test[i] == 1 and y_pred[i] == 0:
        FN += 1


accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
specifity = TN / (TN + FN) if (TN + FN) != 0 else 0
error_rate = 1 - accuracy


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Specifity:", specifity)
print("Error Rate:", error_rate)
print(f"TP: {TP}, FP: {FP}, FN: {FN}, TN: {TN}")

Accuracy: 0.7142857142857143
Precision: 0.36363636363636365
Recall: 0.3076923076923077
Specifity: 0.8
Error Rate: 0.2857142857142857
TP: 4, FP: 7, FN: 9, TN: 36
