In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


data = pd.read_csv("Social_Network_Ads.csv")

In [8]:
X = data[['Age', 'EstimatedSalary']]
y = data['Purchased']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer(
    transformers=[
        ('scaler', StandardScaler(), ['Age', 'EstimatedSalary'])
    ],
    remainder='passthrough'
)

In [9]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

In [11]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[50  2]
 [ 9 19]]


In [10]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8625


In [14]:
error_rate = 1 - accuracy
print("Error Rate:", error_rate)

Error Rate: 0.13749999999999996


In [15]:
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

Precision: 0.9047619047619048


In [16]:
recall = recall_score(y_test, y_pred)
print("Recall:", recall)

Recall: 0.6785714285714286


In [17]:
f1 = f1_score(y_test, y_pred)
print("F1 Score:", f1)

F1 Score: 0.7755102040816326
