# Assignment 2 (part 1)
### Q : Implement logistic regression using Python (both scratch and sk learn) to perform classification on Social_Network_Ads.csv dataset 
#### Try all three  and compare the results
##### 1. Using raw data
##### 2. Normalisation 
##### 3. Standardisation  
##### Split the dataset into train and test set. (in ratio 70:30)

# Solution-
### Importing Libraries

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

### Load the dataset

In [7]:
data = pd.read_csv("C:/Users/u/Downloads/Social_Network_Ads.csv")
data.head() #Shows first few rows

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [8]:
# Drop the User ID column as it is not relevant for classification
data = data.drop('User ID', axis=1)

### Convert Gender to numerical values (0 for male, 1 for female)

In [9]:
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})

In [10]:
# Separate features (X) and target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

### Split the dataset into train and test sets (70:30 ratio)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Function to normalize and standardize data
def preprocess_data(X_train, X_test, method='raw'):
    if method == 'normalize':
        scaler = MinMaxScaler()
    elif method == 'standardize':
        scaler = StandardScaler()
    else:
        return X_train, X_test
    
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled

### Evaluate logistic regression model

In [19]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    # Display metrics
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=1))

### Logistic Regression from scratch

In [25]:
class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0

        for _ in range(self.num_iterations):
            z = np.dot(X, self.weights) + self.bias
            a = self.sigmoid(z)

            dw = (1 / m) * np.dot(X.T, (a - y))
            db = (1 / m) * np.sum(a - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        return np.round(self.sigmoid(z))

### Raw Data

In [20]:
print("Raw Data:")
logreg_raw = LogisticRegression()
logreg_raw.fit(X_train, y_train)
evaluate_model(logreg_raw, X_test, y_test)

Raw Data:
Accuracy: 0.6083333333333333

Confusion Matrix:
 [[73  0]
 [47  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.61      1.00      0.76        73
           1       1.00      0.00      0.00        47

    accuracy                           0.61       120
   macro avg       0.80      0.50      0.38       120
weighted avg       0.76      0.61      0.46       120



### Normalized Data

In [21]:
print("\nNormalized Data:")
X_train_norm, X_test_norm = preprocess_data(X_train, X_test, 'normalize')
logreg_norm = LogisticRegression()
logreg_norm.fit(X_train_norm, y_train)
evaluate_model(logreg_norm, X_test_norm, y_test)


Normalized Data:
Accuracy: 0.8416666666666667

Confusion Matrix:
 [[72  1]
 [18 29]]

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.99      0.88        73
           1       0.97      0.62      0.75        47

    accuracy                           0.84       120
   macro avg       0.88      0.80      0.82       120
weighted avg       0.87      0.84      0.83       120



### Standardized Data

In [16]:
X_train_std, X_test_std = preprocess_data(X_train, X_test, 'standardize')
logreg_std = LogisticRegression()
logreg_std.fit(X_train_std, y_train)
evaluate_model(logreg_std, X_test_std, y_test)

Accuracy: 0.8583333333333333

Confusion Matrix:
 [[71  2]
 [15 32]]

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.97      0.89        73
           1       0.94      0.68      0.79        47

    accuracy                           0.86       120
   macro avg       0.88      0.83      0.84       120
weighted avg       0.87      0.86      0.85       120



In [26]:
# Logistic Regression from scratch
print("\nLogistic Regression from Scratch:")
logreg_scratch = LogisticRegressionScratch()
logreg_scratch.fit(X_train, y_train)
evaluate_model(logreg_scratch, X_test, y_test)


Logistic Regression from Scratch:
Accuracy: 0.6083333333333333

Confusion Matrix:
 [[73  0]
 [47  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.61      1.00      0.76        73
           1       1.00      0.00      0.00        47

    accuracy                           0.61       120
   macro avg       0.80      0.50      0.38       120
weighted avg       0.76      0.61      0.46       120



  return 1 / (1 + np.exp(-z))
