<a href="https://colab.research.google.com/github/sujalkumeriya59/Deep-Learning/blob/main/practical_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Data Loading
import pandas as pd
df = pd.read_csv('diabetes.csv')
df.head()

In [None]:
# Data Preprocessing

# Fill missing values (e.g., with the mean)
df.fillna(df.mean(), inplace=True)

# Normalize the features using Min-Max scaling or Standardization (e.g., StandardScaler)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# We assume the features are all columns except the 'Outcome' column (target variable)
X = df.drop('Outcome', axis=1)
y = df['Outcome']

X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets (80/20 split)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Perform Gradient Boosting

import numpy as np

# Hypothesis function for linear regression
def predict(X, theta):
    return np.dot(X, theta)

# Cost function (Mean Squared Error)
def compute_cost(X, y, theta):
    m = len(y)
    predictions = predict(X, theta)
    cost = (1/(2*m)) * np.sum((predictions - y) ** 2)
    return cost

# Gradient Descent function with cost print every 100 iterations
def gradient_descent(X, y, theta, alpha, iterations):
    m = len(y)  # number of training examples
    cost_history = []

    for i in range(iterations):
        predictions = predict(X, theta)
        errors = predictions - y

        # Calculate gradient
        gradients = (1/m) * np.dot(X.T, errors)

        # Update parameters
        theta = theta - alpha * gradients

        # Compute and record the cost
        cost = compute_cost(X, y, theta)
        cost_history.append(cost)

        # Print cost every 100 iterations
        if (i + 1) % 100 == 0:
            print(f"Iteration {i + 1}: Cost = {cost}")

    return theta, cost_history


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Hypothesis function
def predict(X, theta):
    return np.dot(X, theta)

# Cost function (Mean Squared Error)
def compute_cost(X, y, theta):
    m = len(y)
    predictions = predict(X, theta)
    cost = (1/(2*m)) * np.sum((predictions - y) ** 2)
    return cost

# Gradient descent
def gradient_descent(X, y, theta, alpha, iterations):
    m = len(y)
    cost_history = []

    for i in range(iterations):
        predictions = predict(X, theta)
        errors = predictions - y
        gradients = (1/m) * np.dot(X.T, errors)
        theta = theta - alpha * gradients

        cost = compute_cost(X, y, theta)
        cost_history.append(cost)

        # Print cost each iteration
        print(f"Iteration {i+1}/{iterations} -> Cost: {cost:.6f}")

    return theta, cost_history

# ---- STEP 1: PREPARE SAMPLE DATA ----
# We'll create a simple linear dataset: y = 4 + 3x + noise
np.random.seed(42)
m = 100  # number of samples
X = 2 * np.random.rand(m, 1)
y = 4 + 3 * X + np.random.randn(m, 1)

# Add intercept term (bias) to X
X_b = np.c_[np.ones((m, 1)), X]  # shape (m, 2)

# Initial parameters (theta0 and theta1)
theta_init = np.zeros((2, 1))

# ---- STEP 2: RUN GRADIENT DESCENT ----
alpha = 0.1    # learning rate
iterations = 100
theta_opt, cost_history = gradient_descent(X_b, y, theta_init, alpha, iterations)

print("\nOptimized parameters (theta):")
print(theta_opt)

# ---- STEP 3: PLOT COST FUNCTION ----
plt.plot(range(1, iterations + 1), cost_history)
plt.xlabel("Iteration")
plt.ylabel("Cost (MSE)")
plt.title("Cost Function vs Iterations")
plt.grid(True)
plt.show()
