In [4]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_cost(X, y, theta):
    m = len(y)
    h = sigmoid(X @ theta)
    epsilon = 1e-15 
    cost = (-1 / m) * (y.T @ np.log(h + epsilon) + (1 - y).T @ np.log(1 - h + epsilon))
    return cost

def gradient_descent(X, y, theta, learning_rate, iterations):
    m = len(y)
    costs = np.zeros(iterations)

    for i in range(iterations):
        h = sigmoid(X @ theta)
        gradient = X.T @ (h - y) / m
        theta -= learning_rate * gradient
        costs[i] = compute_cost(X, y, theta)

    return theta, costs

# Feature scaling (Normalization)
def normalize(X):
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    X_normalized = (X - mu) / sigma
    return X_normalized

# Add a column of ones for the bias term
X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_bias = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# Initialize parameters
theta_initial = np.zeros(X_train_bias.shape[1])

# Set hyperparameters
learning_rate = 0.01
iterations = 1000

# Train the model on raw data
theta_raw, _ = gradient_descent(X_train_bias, y_train, theta_initial, learning_rate, iterations)

# Make predictions on raw data
y_pred_raw = np.round(sigmoid(X_test_bias @ theta_raw))

# Calculate accuracy on raw data
acc_raw = accuracy_score(y_test, y_pred_raw)

# Display result
print("Accuracy on raw data: {:.2f}%".format(acc_raw * 100))

# Normalize the features
X_train_norm = normalize(X_train)
X_test_norm = normalize(X_test)

# Add a column of ones for the bias term
X_train_norm_bias = np.c_[np.ones((X_train_norm.shape[0], 1)), X_train_norm]
X_test_norm_bias = np.c_[np.ones((X_test_norm.shape[0], 1)), X_test_norm]

# Train the model on normalized data
theta_norm, _ = gradient_descent(X_train_norm_bias, y_train, theta_initial, learning_rate, iterations)

# Make predictions on normalized data
y_pred_norm = np.round(sigmoid(X_test_norm_bias @ theta_norm))

# Calculate accuracy on normalized data
acc_norm = accuracy_score(y_test, y_pred_norm)

# Display result
print("Accuracy on normalized data: {:.2f}%".format(acc_norm * 100))



  return 1 / (1 + np.exp(-z))


Accuracy on raw data: 60.83%
Accuracy on normalized data: 37.50%


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

file_path = "C:\\Users\\hp\\Downloads\\Social_Network_Ads.csv"
data = pd.read_csv(file_path)
data.head()

# Extract features and target variable
X = data.iloc[:, [2, 3]].values  # Using 'Age' and 'EstimatedSalary' as features
y = data.iloc[:, 4].values  # Target variable 'Purchased'

# Split the dataset into training and test sets (70:30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Logistic Regression using scikit-learn on raw data
lr_raw = LogisticRegression(random_state=42)
lr_raw.fit(X_train, y_train)
y_pred_raw = lr_raw.predict(X_test)
acc_raw = accuracy_score(y_test, y_pred_raw)

# Normalization
scaler = MinMaxScaler()
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

# Logistic Regression using scikit-learn on normalized data
lr_norm = LogisticRegression(random_state=42)
lr_norm.fit(X_train_norm, y_train)
y_pred_norm = lr_norm.predict(X_test_norm)
acc_norm = accuracy_score(y_test, y_pred_norm)

# Standardization
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

# Logistic Regression using scikit-learn on standardized data
lr_std = LogisticRegression(random_state=42)
lr_std.fit(X_train_std, y_train)
y_pred_std = lr_std.predict(X_test_std)
acc_std = accuracy_score(y_test, y_pred_std)

# Display results
print("Accuracy on raw data: {:.2f}%".format(acc_raw * 100))
print("Accuracy on normalized data: {:.2f}%".format(acc_norm * 100))
print("Accuracy on standardized data: {:.2f}%".format(acc_std * 100))

Accuracy on raw data: 60.83%
Accuracy on normalized data: 84.17%
Accuracy on standardized data: 85.00%
