In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('User_Data.csv')

# Display the first few rows of the dataset to understand its structure
print(df.head())


# Extract features (X) and target variable (y)
X = df[['Age', 'EstimatedSalary']]
y = df['Purchased']

# Split the data into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (standardizing the features to have mean 0 and variance 1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 2. Build the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# 3. Predict on the test data
y_pred = model.predict(X_test)

# 4. Evaluate the model
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Classification report (Precision, Recall, F1-score)
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)

# Convert new user data into a DataFrame with the same feature names as the original data
new_user = pd.DataFrame({'Age': [30], 'EstimatedSalary': [87000]})

# Apply the same scaling transformation
new_user_scaled = scaler.transform(new_user)

# Make the prediction
new_prediction = model.predict(new_user_scaled)

# Display the prediction (1 = Will Purchase, 0 = Won't Purchase)
print("Prediction for new user (Age 30, Salary 87000):"+str(new_prediction[0]))


    User ID  Gender  Age  EstimatedSalary  Purchased
0  15624510    Male   19            19000          0
1  15810944    Male   35            20000          0
2  15668575  Female   26            43000          0
3  15603246  Female   27            57000          0
4  15804002    Male   19            76000          0
Confusion Matrix:
 [[50  2]
 [ 9 19]]
Accuracy: 0.86
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.96      0.90        52
           1       0.90      0.68      0.78        28

    accuracy                           0.86        80
   macro avg       0.88      0.82      0.84        80
weighted avg       0.87      0.86      0.86        80

Prediction for new user (Age 30, Salary 87000):0
