In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
# Importing the dataset
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values

In [3]:
dataset

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0
...,...,...,...,...,...
395,15691863,Female,46.0,41000.0,1
396,15706071,Male,51.0,23000.0,1
397,15654296,Female,50.0,20000.0,1
398,15755018,Male,36.0,33000.0,0


In [4]:
# Applying Kernel PCA
def apply_Kernal_PCA(indep_X):
    from sklearn.decomposition import KernelPCA
    kpca = KernelPCA(n_components = 2, kernel = 'rbf')
    X_transformed = kpca.fit_transform(indep_X)
    explained_variance = kpca.transform(indep_X)
    return X_transformed,explained_variance

# Function to split the dataset and apply scaling
def split_scale(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

# Function to make predictions and generate the confusion matrix
def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return classifier, accuracy, report, X_test, y_test, cm

# Function to perform logistic regression
def logistic(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

# Function to perform linear SVM
def svm_linear(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

# Function to perform non-linear SVM
def svm_nl(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

# Function to perform Naive Bayes
def naive(X_train, y_train, X_test, y_test):
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

# Function to perform K-NN
def knn(X_train, y_train, X_test, y_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

# Function to perform Decision Tree
def decision_tree(X_train, y_train, X_test, y_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

# Function to perform Random Forest
def random_forest(X_train, y_train, X_test, y_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)



In [5]:
# Function to create a DataFrame with accuracy results
def KernalPCA_classification(acc_log, acc_svml, acc_svmnl, acc_knn, acc_nav, acc_des, acc_rf):
    dataframe = pd.DataFrame(index=['KernalPCA'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    dataframe['Logistic'] = acc_log
    dataframe['SVMl'] = acc_svml
    dataframe['SVMnl'] = acc_svmnl
    dataframe['KNN'] = acc_knn
    dataframe['Navie'] = acc_nav
    dataframe['Decision'] = acc_des
    dataframe['Random'] = acc_rf
    return dataframe


In [6]:
# Applying KernalPCA
X_kernalpca, explained_variance = apply_Kernal_PCA(X)

# Splitting and scaling the dataset
X_train, X_test, y_train, y_test = split_scale(X_kernalpca, y)


In [7]:
 #Lists to store accuracies
acc_log = []
acc_svml = []
acc_svmnl = []
acc_knn = []
acc_nav = []
acc_des = []
acc_rf = []


In [8]:
# Logistic Regression
classifier, accuracy, report, X_test, y_test, cm = logistic(X_train, y_train, X_test, y_test)
acc_log.append(accuracy)

# Linear SVM
classifier, accuracy, report, X_test, y_test, cm = svm_linear(X_train, y_train, X_test, y_test)
acc_svml.append(accuracy)

# Non-linear SVM
classifier, accuracy, report, X_test, y_test, cm = svm_nl(X_train, y_train, X_test, y_test)
acc_svmnl.append(accuracy)

# K-NN
classifier, accuracy, report, X_test, y_test, cm = knn(X_train, y_train, X_test, y_test)
acc_knn.append(accuracy)

# Naive Bayes
classifier, accuracy, report, X_test, y_test, cm = naive(X_train, y_train, X_test, y_test)
acc_nav.append(accuracy)

# Decision Tree
classifier, accuracy, report, X_test, y_test, cm = decision_tree(X_train, y_train, X_test, y_test)
acc_des.append(accuracy)

# Random Forest
classifier, accuracy, report, X_test, y_test, cm = random_forest(X_train, y_train, X_test, y_test)
acc_rf.append(accuracy)



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
# Creating a DataFrame with the results
results = KernalPCA_classification(acc_log, acc_svml, acc_svmnl, acc_knn, acc_nav, acc_des, acc_rf)
print(results)

           Logistic  SVMl  SVMnl   KNN  Navie  Decision  Random
KernalPCA      0.68  0.68   0.68  0.59   0.33      0.58     0.6
