In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

In [None]:
warnings.filterwarnings("ignore")

In [None]:
dataset = pd.read_csv("../input/social-network-ads/Social_Network_Ads.csv")

In [None]:
dataset.head(10).style.set_properties(**{'background-color': 'black',                                                   
                                    'color': 'gold',                       
                                    'border-color': 'white'})

In [None]:
dataset.describe().style.set_properties(**{'background-color': 'black',                                                   
                                    'color': 'gold',                       
                                    'border-color': 'white'})

In [None]:
dataset.info()

# EDA

## Numerical Features (Univariate Analysis)

### PDF Plots

In [None]:
sns.set_style("dark")
sns.displot(dataset.iloc[:, 0].values, kde=True)
plt.title("Probability Distribution Function of Age")
plt.show()

> **The following plot is a gaussian distribution with maximum value of age of 40**

In [None]:
sns.set_style("dark")
sns.displot(dataset.iloc[:, 1].values, kde=True)
plt.title("Probability Distribution Function of Salary")
plt.show()

> **The following distribution is also a gaussian distribution with maximum density at salary of 8000**

### BoxPlots

In [None]:
sns.set_style("dark")
sns.boxplot(x = dataset.iloc[:, 0].values)
plt.title("Box Plot visualization of Age")
plt.show()

> **Presence of no outliners in Age feaure**

In [None]:
sns.set_style("dark")
sns.boxplot(x = dataset.iloc[:, 1].values)
plt.title("Box Plot visualization of Salary")
plt.show()

> **Presence of no outliners in Salary feature**

### Histograms

In [None]:
sns.set_style("dark")
sns.histplot(x = dataset.iloc[:, 0].values)
plt.title("Histogram visualization of Age")
plt.show()

> **Maximum count is of age between 35 to 45 in the following dataset**

In [None]:
sns.set_style("dark")
sns.histplot(x = dataset.iloc[:, 1].values)
plt.title("Histogram visualization of Salary")
plt.show()

> **Dataset mostly users are within salaries less than 90 thousand**

## Categorical Features (Bivariate Analysis)

### Histograms

In [None]:
sns.set_style("dark")
sns.histplot(x = dataset.iloc[:, 2].values)
plt.title("Histogram visualization of Purchsed feature")
plt.xticks([0, 1])
plt.show()

> **Count of not purchased is almost double as that of purchased**

### Pie Plot

In [None]:
plt.pie(dataset.loc[:, dataset.columns[-1]].value_counts(), startangle=90, wedgeprops={'edgecolor': 'black'}, shadow=True)
plt.legend(dataset.loc[:, dataset.columns[-1]].unique())
plt.title("Pie visualization of Purchased feature")
plt.show()

## Numerical Features (Bivariate Analysis)

### Joint Plot

In [None]:
sns.set_style("dark")
sns.jointplot(x = dataset.iloc[:, 0].values, y = dataset.iloc[:, 1].values ,kind="hex")
plt.title("Hexbin visualization of Numerical features")
plt.show()

In [None]:
sns.set_style("dark")
sns.jointplot(x = dataset.iloc[:, 0].values, y = dataset.iloc[:, 1].values ,kind="reg")
plt.title("Hexbin visualization of Numerical features")
plt.show()

### Pair Plot

In [None]:
sns.set_style("dark")
sns.pairplot(dataset.iloc[:, [0, 1]])
plt.show()

# Models

In [None]:
dataset.isna().any()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(dataset.iloc[:, :-1].values, dataset.iloc[:, -1].values, test_size = 0.2, random_state = 42)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [None]:
models_precisions = []
models_recalls = []
models_accuracy = []
models_f1 = []

In [None]:
def train_predict_and_show_results(model, epochs = None, batch_size = None):
    print("Training the model")
    if epochs != None and batch_size != None:
        model.fit(X_train, Y_train, epochs = epochs, batch_size = batch_size)
    else:
        model.fit(X_train,Y_train)
    print("Training completed")
    if epochs != None and batch_size != None:
        Y_pred = model.predict_classes(X_test)
    else:
        Y_pred = model.predict(X_test)
    precision = round(precision_score(Y_test, Y_pred, zero_division=1), 2)
    recall = round(recall_score(Y_test, Y_pred), 2)
    accuracy = round(accuracy_score(Y_test, Y_pred), 2)
    f1 = round(f1_score(Y_test, Y_pred), 2)
    print(f"Precision : {precision} \nRecall : {recall} \nAccuracy : {accuracy} \nF1 Score : {f1}")
    models_precisions.append(precision)
    models_recalls.append(recall)
    models_accuracy.append(accuracy)
    models_f1.append(f1)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
train_predict_and_show_results(log_reg)

## Linear SVM

In [None]:
from sklearn.svm import LinearSVC
lin_svc = LinearSVC(max_iter=20000)
train_predict_and_show_results(lin_svc)

## Kernel SVM

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel="rbf")
train_predict_and_show_results(svc)

## KNN Classification

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
train_predict_and_show_results(knn)

## Naive Bayes Classifier

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
train_predict_and_show_results(nb)

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dec_tree = DecisionTreeClassifier()
train_predict_and_show_results(dec_tree)

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier()
train_predict_and_show_results(forest)

## Neural Network

In [None]:
import keras
model = keras.models.Sequential()
model.add(keras.layers.InputLayer(input_shape=(2,)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1, activation="sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics="accuracy")

In [None]:
train_predict_and_show_results(model, epochs=10, batch_size = 32)

# Results

In [None]:
results_df = pd.DataFrame(list(zip(["Logistic Regression", "Linear SVM", "Kernel SVM", "KNN Classifier", "Naive Bayes Classifier", "Decision Tree Classifier", "Random Forest Classifier", "Neural Network"], models_precisions, models_recalls, models_accuracy, models_f1)))
results_df.columns = ["Model Type" ,"Precision", "Recall", "Accuracy", "F1 Score"]

In [None]:
results_df.style.set_properties(**{'background-color': 'black',                                                   
                                    'color': 'gold',                       
                                    'border-color': 'white'})