In [None]:
#FINAL CODE

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('titanic.csv')

df.head()
df.info()
df.describe()
 
df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

X = df.drop('Survived', axis=1).values
y = df['Survived'].values

X = (X - X.mean(axis=0)) / X.std(axis=0)

X = np.c_[np.ones(X.shape[0]), X]

np.random.seed(42)
indices = np.random.permutation(X.shape[0])
train_size = int(0.8 * X.shape[0])
X_train = X[indices[:train_size]]
y_train = y[indices[:train_size]]
X_test = X[indices[train_size:]]
y_test = y[indices[train_size:]]
 
def sigmoid(z):
    return 1 / (1 + np.exp(-z))
 
def cost_logistic(theta, X, y):
    m = X.shape[0]
    h = sigmoid(X.dot(theta))
    return - (1 / m) * (y.T.dot(np.log(h)) + (1 - y).T.dot(np.log(1 - h)))
 
def gradient_logistic(theta, X, y):
    m = X.shape[0]
    h = sigmoid(X.dot(theta))
    return (1 / m) * X.T.dot(h - y)
 
def gradient_descent_logistic(X, y, alpha, iterations):
    theta = np.zeros(X.shape[1])
    costs = []
    for i in range(iterations):
        theta = theta - alpha * gradient_logistic(theta, X, y)
        costs.append(cost_logistic(theta, X, y))
    return theta, costs

def predict_logistic(theta, X):
    h = sigmoid(X.dot(theta))
    return (h >= 0.5).astype(int)

def accuracy_logistic(theta, X, y):
    y_pred = predict_logistic(theta, X)
    return np.mean(y_pred == y)

alpha = 0.01
iterations = 1000
theta_logistic, costs_logistic = gradient_descent_logistic(X_train, y_train, alpha, iterations)
 
def cost_svm(theta, X, y, C):
    m = X.shape[0]
    h = X.dot(theta)
    return (1 / 2) * theta.T.dot(theta) + C * np.sum(np.maximum(0, 1 - y * h))
 
def gradient_svm(theta, X, y, C):
    m = X.shape[0]
    h = X.dot(theta)
    return theta + C * X.T.dot((y * h < 1).astype(int) * -y)


def gradient_descent_svm(X, y, C, alpha, iterations):
    m = X.shape[0]
    n = X.shape[1]
    theta = np.zeros(n)
    J_history = []
    for i in range(iterations):
        theta = theta - alpha * gradient_svm(theta, X, y, C)
        J_history.append(cost_svm(theta, X, y, C))
    return theta, J_history


def predict_svm(theta, X):
    h = X.dot(theta)
    return (h >= 0).astype(int)

 
C = 1
alpha = 0.01
iterations = 1000
theta_svm, J_history = gradient_descent_svm(X_train, y_train * 2 - 1, C, alpha, iterations)


plt.plot(range(iterations), costs_logistic, label='Logistic Regression')
plt.plot(range(iterations), costs_svm, label='SVM')
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.legend()
plt.show()

acc_logistic = accuracy_logistic(theta_logistic, X_test, y_test) # get the accuracy
print('Logistic Regression Accuracy: {:.2f}%'.format(acc_logistic * 100))

 
acc_svm = accuracy_svm(theta_svm, X_test, y_test)  
print('SVM Accuracy: {:.2f}%'.format(acc_svm * 100))

