In [1]:
import pandas as pd

In [2]:
# Load the dataset
file_path = "Social_Network_Ads.csv"
df = pd.read_csv(file_path)

# Display basic information and first few rows
df.info(), df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Age              400 non-null    int64
 1   EstimatedSalary  400 non-null    int64
 2   Purchased        400 non-null    int64
dtypes: int64(3)
memory usage: 9.5 KB


(None,
    Age  EstimatedSalary  Purchased
 0   19            19000          0
 1   35            20000          0
 2   26            43000          0
 3   27            57000          0
 4   19            76000          0)

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Separate features and target variable
X = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Verify the shape of the splits
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((320, 2), (80, 2), (320,), (80,))

In [4]:
import numpy as np

# Initialize parameters (weights and bias)
np.random.seed(42)
weights = np.random.randn(2)  # Two features
bias = np.random.randn()

# Define the learning rate and number of epochs
learning_rate = 0.01
epochs = 1000

# Mean Squared Error (MSE) function
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Training loop
for epoch in range(epochs):
    # Forward propagation: Compute predictions
    y_pred = np.dot(X_train, weights) + bias
    
    # Compute loss
    loss = mse_loss(y_train, y_pred)
    
    # Compute gradients
    error = y_pred - y_train
    dW = np.dot(X_train.T, error) / len(X_train)  # Gradient w.r.t weights
    dB = np.mean(error)  # Gradient w.r.t bias
    
    # Update weights and bias using gradient descent
    weights -= learning_rate * dW
    bias -= learning_rate * dB

    # Print loss every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final weights and bias after training
weights, bias

Epoch 0, Loss: 0.3311
Epoch 100, Loss: 0.1627
Epoch 200, Loss: 0.1359
Epoch 300, Loss: 0.1315
Epoch 400, Loss: 0.1308
Epoch 500, Loss: 0.1307
Epoch 600, Loss: 0.1307
Epoch 700, Loss: 0.1307
Epoch 800, Loss: 0.1307
Epoch 900, Loss: 0.1307


(array([0.26017436, 0.14529949]), np.float64(0.3593874468550853))

In [5]:
# Make predictions on the test set
y_test_pred = np.dot(X_test, weights) + bias

# Compute test loss
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

test_loss = mse_loss(y_test, y_test_pred)
print("Test Loss:", test_loss)

Test Loss: 0.0992865541537975
