In [5]:
import pandas as pd
import numpy as np

# Load Dataset
data = pd.read_csv('titanic.csv')
data = data.drop(['Name', 'Ticket', 'Cabin'], axis=1)
data['Age'] = data['Age'].fillna(data['Age'].median())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])

# Encode Categorical Variables
for column in ['Sex', 'Embarked']:
    unique_vals = data[column].unique()
    mapping = {val: idx for idx, val in enumerate(unique_vals)}
    data[column] = data[column].map(mapping)

# Split Data
def train_test_split(X, y, test_size=0.2, random_state=None):
    np.random.seed(random_state)
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    split_idx = int(X.shape[0] * (1 - test_size))
    train_indices, test_indices = indices[:split_idx], indices[split_idx:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

X = data.drop('Survived', axis=1).values
y = data['Survived'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize Data
def standardize(X):
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    return (X - mean) / std

X_train_scaled = standardize(X_train)
X_test_scaled = standardize(X_test)

# PCA Implementation
def compute_pca(X, n_components):
    cov_matrix = np.cov(X.T)
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    sorted_indices = np.argsort(eigenvalues)[::-1]
    top_eigenvectors = eigenvectors[:, sorted_indices[:n_components]]
    return np.dot(X, top_eigenvectors)

X_train_pca = compute_pca(X_train_scaled, n_components=5)
X_test_pca = compute_pca(X_test_scaled, n_components=5)

# SVM Implementation
class SVM:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, n_iters=5000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        y_ = np.where(y <= 0, -1, 1)  # Ensure labels are -1 and 1

        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.w) + self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
                    self.b -= self.lr * y_[idx]

    def predict(self, X):
        approx = np.dot(X, self.w) + self.b
        return np.sign(approx)

# Logistic Regression Implementation
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            model = np.dot(X, self.weights) + self.bias
            predictions = self._sigmoid(model)

            dw = (1 / n_samples) * np.dot(X.T, (predictions - y))
            db = (1 / n_samples) * np.sum(predictions - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        model = np.dot(X, self.weights) + self.bias
        predictions = self._sigmoid(model)
        return [1 if i > 0.5 else 0 for i in predictions]

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

# Train and Evaluate Models
# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)
y_pred_log_reg = log_reg.predict(X_test_scaled)

log_reg_pca = LogisticRegression()
log_reg_pca.fit(X_train_pca, y_train)
y_pred_log_reg_pca = log_reg_pca.predict(X_test_pca)

# SVM from Scratch
y_train_svm = np.where(y_train <= 0, -1, 1)
y_test_svm = np.where(y_test <= 0, -1, 1)

svm = SVM()
svm.fit(X_train_scaled, y_train_svm)
y_pred_svm = svm.predict(X_test_scaled)

svm_pca = SVM()
svm_pca.fit(X_train_pca, y_train_svm)
y_pred_svm_pca = svm_pca.predict(X_test_pca)

# Accuracy Comparison
def accuracy_score(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

print("Logistic Regression Accuracy (Original):", accuracy_score(y_test, y_pred_log_reg))
print("Logistic Regression Accuracy (PCA):", accuracy_score(y_test, y_pred_log_reg_pca))
print("SVM Accuracy (Original):", accuracy_score(y_test_svm, y_pred_svm))
print("SVM Accuracy (PCA):", accuracy_score(y_test_svm, y_pred_svm_pca))


Logistic Regression Accuracy (Original): 0.7988826815642458
Logistic Regression Accuracy (PCA): 0.6815642458100558
SVM Accuracy (Original): 0.36312849162011174
SVM Accuracy (PCA): 0.36312849162011174
