In [8]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
# Load dataset
df = pd.read_csv('Social_Network_Ads.csv')

print("\nFirst 5 rows of the dataset:\n")
print(df.head())

# Example columns: 'User ID', 'Gender', 'Age', 'EstimatedSalary', 'Purchased'
# Drop 'User ID' (irrelevant feature)
df = df.drop('User ID', axis=1)

# Convert categorical variable 'Gender' into numeric
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})

# Features and target
X = df[['Gender', 'Age', 'EstimatedSalary']]
y = df['Purchased']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train the model
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

# Predict
y_pred = logreg.predict(X_test)
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Extract TP, FP, TN, FN
TN, FP, FN, TP = cm.ravel()

print("\nConfusion Matrix:\n", cm)
print(f"True Positives (TP): {TP}")
print(f"False Positives (FP): {FP}")
print(f"True Negatives (TN): {TN}")
print(f"False Negatives (FN): {FN}")

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
# Error Rate
error_rate = 1 - accuracy
# Precision
precision = precision_score(y_test, y_pred)
# Recall
recall = recall_score(y_test, y_pred)

print("\nModel Evaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Error Rate: {error_rate:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")



First 5 rows of the dataset:

    User ID  Gender  Age  EstimatedSalary  Purchased
0  15624510    Male   19            19000          0
1  15810944    Male   35            20000          0
2  15668575  Female   26            43000          0
3  15603246  Female   27            57000          0
4  15804002    Male   19            76000          0

Confusion Matrix:
 [[65  3]
 [ 7 25]]
True Positives (TP): 25
False Positives (FP): 3
True Negatives (TN): 65
False Negatives (FN): 7

Model Evaluation Metrics:
Accuracy: 0.90
Error Rate: 0.10
Precision: 0.89
Recall: 0.78
