In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Sample dataset
data = {
    'Age': [25, 30, 35, np.nan, 40],
    'Salary': [50000, 60000, np.nan, 80000, 90000],
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
    'Purchased': ['No', 'Yes', 'No', 'Yes', 'Yes']
}
df = pd.DataFrame(data)

# 1. Handling Missing Values
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Salary'].fillna(df['Salary'].median(), inplace=True)

# 2. Encoding Categorical Variables
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])  # Male=1, Female=0
df['Purchased'] = label_encoder.fit_transform(df['Purchased'])  # Yes=1, No=0

# 3. Splitting Data into Features and Target
X = df[['Age', 'Salary', 'Gender']]
y = df['Purchased']

# 4. Scaling Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 6. Training a Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# 7. Making Predictions
y_pred = model.predict(X_test)

# 8. Evaluating the Model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.0
Confusion Matrix:
 [[0 0]
 [1 0]]
