# Linear Regression from Scratch - Template

This notebook provides a bare-minimum template for implementing a linear regression model from scratch using gradient descent. 

### How to Use This Template:
1.  **Load Data**: In the first code cell, load your dataset using pandas.
2.  **Select Features**: Define which columns are your features (`X`) and which is your target (`y`).
3.  **Adjust Hyperparameters**: Tune the `learning_rate` and `epochs`.
4.  **Run the Model**: Execute the cells to train the model and see the results.
5.  **Extend (Optional)**: Add K-Fold Cross-Validation, multiple features, or different visualizations as required by the specific assignment.

In [None]:
# ============================================================
# 1. SETUP: Import libraries and load data
# ============================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --- TODO: Load your dataset ---
# Example: df = pd.read_csv('./datasets/your_dataset.csv')
df = pd.DataFrame() # Replace with actual data loading

# --- TODO: Select your feature and target columns ---
# Example: X_raw = df['feature_column'].values
# Example: y = df['target_column'].values
X_raw = np.array([]) # Replace with your feature data
y = np.array([])     # Replace with your target data

# --- Data Preprocessing (if needed) ---
# df = df.dropna() # Example: drop rows with missing values

print("Data loaded and features selected.")

In [None]:
# ============================================================
# 2. CORE: Linear Regression Implementation
# ============================================================

def train_linear_regression(X, y, learning_rate, epochs):
    """Trains a simple linear regression model using gradient descent."""
    
    # --- Feature Scaling (Standardization) ---
    # Helps gradient descent converge faster
    mean_x = X.mean()
    std_x = X.std()
    if std_x == 0: std_x = 1.0 # Avoid division by zero
    X_scaled = (X - mean_x) / std_x
    
    # --- Initialize Parameters ---
    m = 0.0  # Slope for scaled data
    c = 0.0  # Intercept
    n = len(y)
    cost_history = []

    # --- Gradient Descent Loop ---
    for i in range(epochs):
        # Make predictions
        y_pred = m * X_scaled + c
        
        # Calculate error
        error = y_pred - y
        
        # Calculate cost (MSE / 2)
        cost = (1 / (2 * n)) * np.sum(error ** 2)
        cost_history.append(cost)
        
        # Calculate gradients
        dm = (1 / n) * np.sum(error * X_scaled)
        dc = (1 / n) * np.sum(error)
        
        # Update parameters
        m -= learning_rate * dm
        c -= learning_rate * dc

    # --- Convert parameters back to original scale for interpretation ---
    original_slope = m / std_x
    original_intercept = c - (m * mean_x / std_x)
    
    print("Training complete.")
    return original_slope, original_intercept, cost_history

def predict(X, slope, intercept):
    """Make predictions using the trained model."""
    return slope * X + intercept

def evaluate_model(y_true, y_pred):
    """Calculate MSE and R-squared score."""
    mse = np.mean((y_true - y_pred) ** 2)
    
    ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    
    r2 = 1 - (ss_residual / ss_total)
    return mse, r2

In [None]:
# ============================================================
# 3. EXECUTION: Train, Evaluate, and Visualize
# ============================================================

# --- Hyperparameters ---
learning_rate = 0.01
epochs = 1000

# --- Check if data is loaded before proceeding ---
if len(X_raw) > 0 and len(y) > 0:
    # --- Train the model ---
    slope, intercept, cost_history = train_linear_regression(X_raw, y, learning_rate, epochs)
    print(f"\nLearned Model: y = {slope:.4f}x + {intercept:.4f}")

    # --- Make predictions ---
    predictions = predict(X_raw, slope, intercept)

    # --- Evaluate the model ---
    mse, r2 = evaluate_model(y, predictions)
    print(f"\nMean Squared Error (MSE): {mse:.4f}")
    print(f"R-squared (RÂ²) Score: {r2:.4f}")

    # --- Visualize Results ---
    # Cost Convergence
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(cost_history)
    plt.title("Cost Function Convergence")
    plt.xlabel("Epoch")
    plt.ylabel("Cost")
    plt.grid(True)

    # Regression Line
    plt.subplot(1, 2, 2)
    plt.scatter(X_raw, y, alpha=0.7, label="Actual Data")
    plt.plot(X_raw, predictions, color='red', linewidth=2, label="Regression Line")
    plt.title("Linear Regression Fit")
    plt.xlabel("Feature")
    plt.ylabel("Target")
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()
else:
    print("Please load data in the first cell before running this cell.")