# Implement batch gradient descent with early stopping

## importing needed libraries

In [144]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing


## Load the penguins dataset

In [133]:
data = sns.load_dataset('penguins')

## Split the data into train and validation sets

In [135]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [136]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## define the softmax function

In [137]:
def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

## Defining model

In [142]:
class Model:
    def __init__(self, n_features, n_classes, learning_rate=0.1, max_iter=1000, tol=1e-4, verbose=False):
        self.n_features = n_features
        self.n_classes = n_classes
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose
    
    def fit(self, X, y):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        y = pd.get_dummies(y).values
        
        self.W = np.random.randn(self.n_features + 1, self.n_classes) * 0.01
        
        train_losses = []
        val_losses = []
        for i in range(self.max_iter):
            y_pred = softmax(X @ self.W)
            train_loss = -np.sum(y * np.log(y_pred)) / X.shape[0]
            train_losses.append(train_loss)
            grad = X.T @ (y_pred - y) / X.shape[0]
            self.W -= self.learning_rate * grad
            if self.verbose and i % 10 == 0:
                y_val_pred = softmax(X_val @ self.W)
                val_loss = -np.sum(y_val * np.log(y_val_pred)) / X_val.shape[0]
                val_losses.append(val_loss)
                print(f"Iteration {i}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
                if len(val_losses) > 1 and val_losses[-1] > val_losses[-2] - self.tol:
                    print(f"Stopped early after {i} iterations")
                    break
                    
        self.train_losses = train_losses
        self.val_losses = val_losses
    
    def predict(self, X):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        y_pred = softmax(X @ self.W)
        y_pred = np.argmax(y_pred, axis=1)
        
        return y_pred

## Train the model using batch gradient descent with early stoping

In [143]:
model = Model(n_features=X_train.shape[1], n_classes=len(np.unique(y_train)))

In [141]:
model.fit(X_train, y_train)

In [123]:
y_pred = model.predict(X_test)

In [145]:
labels = np.array(['Adelie', 'Gentoo', 'Adelie', 'Chinstrap', 'Adelie', 'Gentoo',
   'Gentoo', 'Chinstrap', 'Chinstrap', 'Chinstrap', 'Adelie',
   'Adelie', 'Gentoo', 'Adelie', 'Gentoo', 'Adelie', 'Adelie',
   'Chinstrap', 'Adelie', 'Gentoo', 'Adelie', 'Adelie', 'Gentoo',
   'Chinstrap', 'Adelie', 'Adelie', 'Gentoo', 'Gentoo', 'Chinstrap',
   'Gentoo', 'Chinstrap', 'Gentoo', 'Adelie', 'Adelie', 'Gentoo',
   'Gentoo', 'Chinstrap', 'Gentoo', 'Adelie', 'Adelie', 'Adelie',
   'Adelie', 'Chinstrap', 'Chinstrap', 'Adelie', 'Adelie', 'Gentoo',
   'Adelie', 'Adelie', 'Gentoo', 'Adelie', 'Gentoo', 'Gentoo',
   'Adelie', 'Adelie', 'Gentoo', 'Adelie', 'Adelie', 'Chinstrap',
   'Chinstrap', 'Gentoo', 'Gentoo', 'Gentoo', 'Adelie', 'Adelie',
   'Gentoo', 'Adelie'])
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
print(labels_encoded)

[0 2 0 1 0 2 2 1 1 1 0 0 2 0 2 0 0 1 0 2 0 0 2 1 0 0 2 2 1 2 1 2 0 0 2 2 1
 2 0 0 0 0 1 1 0 0 2 0 0 2 0 2 2 0 0 2 0 0 1 1 2 2 2 0 0 2 0]


In [148]:
accuracy = np.mean(y_pred == labels_encoded)
print("Accuracy = ", accuracy)

Accuracy =  1.0
