<a href="https://colab.research.google.com/github/mtcs2503/Machine-Learning-Lab/blob/main/lab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_cost(X, y, theta):
    m = len(y)


    h = sigmoid(X @ theta)


    h = np.clip(h, 1e-15, 1 - 1e-15)

    cost = (-1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))

    return cost

def compute_gradient(X, y, theta):
    m = len(y)


    h = sigmoid(X @ theta)
    error = h - y
    gradient = (1/m) * (X.T @ error)

    return gradient

def load_and_preprocess_data(file_path):
    print("Loading and preprocessing data...")
    df = pd.read_csv(file_path)
    df.dropna(inplace=True)
    y = df['TenYearCHD'].values.reshape(-1, 1)
    X = df.drop('TenYearCHD', axis=1)
    X = (X - X.mean()) / X.std()
    X = np.hstack((np.ones((X.shape[0], 1)), X))

    print("Data preprocessing complete.")
    print(f"Shape of feature matrix X: {X.shape}")
    print(f"Shape of target vector y: {y.shape}\n")

    return X, y

if __name__ == "__main__":
    file_name = '/content/framingham.csv'

    X, y = load_and_preprocess_data(file_name)

    num_features = X.shape[1]
    theta_initial = np.zeros((num_features, 1))

    initial_cost = compute_cost(X, y, theta_initial)
    initial_gradient = compute_gradient(X, y, theta_initial)

    print("--- Initial values (before any training) ---")
    print(f"Initial Cost (Loss Function): {initial_cost:.4f}")

    print("\nInitial Gradient:")
    print(initial_gradient)


    alpha = 0.01
    theta_updated = theta_initial - alpha * initial_gradient

    updated_cost = compute_cost(X, y, theta_updated)
    updated_gradient = compute_gradient(X, y, theta_updated)

    print("\n--- After one gradient descent step ---")
    print(f"Updated Cost (Loss Function): {updated_cost:.4f}")
    print("\nUpdated Gradient:")
    print(updated_gradient)


Loading and preprocessing data...
Data preprocessing complete.
Shape of feature matrix X: (3656, 16)
Shape of target vector y: (3656, 1)

--- Initial values (before any training) ---
Initial Cost (Loss Function): 0.6931

Initial Gradient:
[[ 0.3476477 ]
 [-0.03296512]
 [-0.08401111]
 [ 0.02266105]
 [-0.00689025]
 [-0.0187413 ]
 [-0.03202042]
 [-0.01737298]
 [-0.06523556]
 [-0.0335589 ]
 [-0.03274302]
 [-0.08008558]
 [-0.05401972]
 [-0.02943893]
 [-0.00737433]
 [-0.04381535]]

--- After one gradient descent step ---
Updated Cost (Loss Function): 0.6917

Updated Gradient:
[[ 0.34677859]
 [-0.03287707]
 [-0.08358362]
 [ 0.02250628]
 [-0.00695568]
 [-0.01875431]
 [-0.03176389]
 [-0.01727414]
 [-0.06470674]
 [-0.03332737]
 [-0.0324892 ]
 [-0.0794938 ]
 [-0.05350413]
 [-0.02912397]
 [-0.00724412]
 [-0.04356401]]
