In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df_train = pd.read_csv("train.csv")
df_train.head()

In [None]:
df_test = pd.read_csv("test.csv")
df_test.head()

In [None]:
# Lets Pre Process our data by removing insignificant Columns
# These columns have no impact on the probability that a passenger has survived

X_train = df_train.drop(['Survived', 'PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)
Y_train = df_train['Survived']
X_test = df_test.drop(['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)

In [None]:
# Lets Convert Categorical Values Like Gender to Numerical Values
# PCA works on Numnerical Values, not Categorical

X_train = X_train.fillna(X_train.mean())
X_test = X_test.fillna(X_test.mean())

X_train = pd.get_dummies(X_train)
X_test = pd.get_dummies(X_test)

In [None]:
X_train

In [None]:
X_test

In [None]:
class PCA_classifier:

  def __init__(self, no_of_components):
    
    self.no_of_components = no_of_components
    self.components = None
    self.mean = None


  def fit(self, X):
    # mean centering
    self.mean = np.mean(X, axis=0)
    X = X - self.mean

    # covariance
    cov = np.cov(X.T)

    # eigenvectors, eigen-values
    eigenvectors, eigenvalues = np.linalg.eig(cov)

    # eigenvectors v = [:, 1] column vector, transpose this for easier calculations
    eigenvectors = eigenvectors.T

    # sort eigenvectors
    idxs = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idxs]
    eigenvectors = eigenvectors[idxs]

    self.components = eigenvectors[:self.no_of_components]


  def transform(self, X):
    X = X - self.mean

    return np.dot(X, self.components.T)

In [None]:
pca = PCA_classifier(no_of_components = 2)

In [None]:
pca.fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

In [None]:
plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1], c = Y_train)
plt.xlabel("PC 1")
plt.ylabel("PC 2")
plt.title("Transformed Training Data")
plt.show()

In [None]:
plt.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c = 'red')
plt.xlabel("PC 1")
plt.ylabel("PC 2")
plt.title("Transformed Test Data")
plt.show()