In [4]:
import os
import numpy as np
from PIL import Image
import random
import math
from typing import List, Tuple, Dict, Optional
import json

class ManualImageDataGenerator:
    def __init__(self, 
                 target_size: Tuple[int, int] = (380, 380),
                 batch_size: int = 32,
                 shuffle: bool = True,
                 preprocessing_function=None,
                 horizontal_flip: bool = False,
                 rotation_range: float = 0.0,
                 zoom_range: float = 0.0,
                 width_shift_range: float = 0.0,
                 height_shift_range: float = 0.0):
        
        self.target_size = target_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.preprocessing_function = preprocessing_function
        self.horizontal_flip = horizontal_flip
        self.rotation_range = rotation_range
        self.zoom_range = zoom_range
        self.width_shift_range = width_shift_range
        self.height_shift_range = height_shift_range
        
        self.class_indices = {}
        self.samples = []
        self.filenames = []
        self.classes = []
        self.batch_index = 0
        
    def flow_from_directory(self, directory: str, class_mode: str = 'categorical'):
        """Load images from directory structure and prepare for batch generation"""
        self.directory = directory
        self.class_mode = class_mode
        
        # Discover classes from subdirectories
        classes = [d for d in os.listdir(directory) 
                  if os.path.isdir(os.path.join(directory, d))]
        classes.sort()
        
        self.class_indices = {cls_name: i for i, cls_name in enumerate(classes)}
        self.num_classes = len(classes)
        
        # Collect all image files
        self.samples = []
        self.filenames = []
        self.classes = []
        
        for class_name in classes:
            class_dir = os.path.join(directory, class_name)
            class_idx = self.class_indices[class_name]
            
            for filename in os.listdir(class_dir):
                if self._is_image_file(filename):
                    filepath = os.path.join(class_dir, filename)
                    self.samples.append((filepath, class_idx))
                    self.filenames.append(filename)
                    self.classes.append(class_idx)
        
        self.samples = np.array(self.samples)
        self.classes = np.array(self.classes)
        self.n = len(self.samples)
        
        self._set_index_array()
        self.batch_index = 0
        return self
    
    def _is_image_file(self, filename: str) -> bool:
        """Check if file is an image based on extension"""
        valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif']
        return any(filename.lower().endswith(ext) for ext in valid_extensions)
    
    def _set_index_array(self):
        """Create index array for batch generation"""
        self.index_array = np.arange(self.n)
        if self.shuffle:
            np.random.shuffle(self.index_array)
    
    def _load_and_preprocess_image(self, filepath: str) -> np.ndarray:
        """Load and preprocess a single image"""
        try:
            # Load image
            img = Image.open(filepath)
            
            # Convert to RGB if needed
            if img.mode != 'RGB':
                img = img.convert('RGB')
            
            # Resize image
            img = img.resize(self.target_size, Image.Resampling.LANCZOS)
            
            # Convert to numpy array
            img_array = np.array(img, dtype=np.float32)
            
            # Apply preprocessing function if provided
            if self.preprocessing_function:
                img_array = self.preprocessing_function(img_array)
            
            return img_array
        except Exception as e:
            print(f"Error loading image {filepath}: {e}")
            # Return a blank image if loading fails
            return np.zeros((*self.target_size, 3), dtype=np.float32)
    
    def _augment_image(self, image: np.ndarray) -> np.ndarray:
        """Apply data augmentation to image"""
        img = image.copy()
        h, w = img.shape[:2]
        
        # Horizontal flip
        if self.horizontal_flip and random.random() > 0.5:
            img = img[:, ::-1, :]
        
        # Rotation
        if self.rotation_range > 0:
            angle = random.uniform(-self.rotation_range, self.rotation_range)
            img = self._rotate_image(img, angle)
        
        # Zoom
        if self.zoom_range > 0:
            zoom_factor = random.uniform(1 - self.zoom_range, 1 + self.zoom_range)
            img = self._zoom_image(img, zoom_factor)
        
        # Width shift
        if self.width_shift_range > 0:
            shift_x = int(random.uniform(-self.width_shift_range, self.width_shift_range) * w)
            img = self._shift_image(img, shift_x, 0)
        
        # Height shift
        if self.height_shift_range > 0:
            shift_y = int(random.uniform(-self.height_shift_range, self.height_shift_range) * h)
            img = self._shift_image(img, 0, shift_y)
        
        return img
    
    def _rotate_image(self, image: np.ndarray, angle: float) -> np.ndarray:
        """Rotate image by given angle in degrees"""
        pil_img = Image.fromarray(image.astype(np.uint8))
        rotated = pil_img.rotate(angle, resample=Image.Resampling.BILINEAR, expand=False)
        return np.array(rotated, dtype=np.float32)
    
    def _zoom_image(self, image: np.ndarray, zoom_factor: float) -> np.ndarray:
        """Zoom image by given factor"""
        h, w = image.shape[:2]
        
        # Calculate new dimensions
        new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
        
        # Resize
        pil_img = Image.fromarray(image.astype(np.uint8))
        zoomed = pil_img.resize((new_w, new_h), Image.Resampling.LANCZOS)
        
        # Crop or pad to original size
        if zoom_factor > 1:
            # Crop center
            left = (new_w - w) // 2
            top = (new_h - h) // 2
            zoomed = zoomed.crop((left, top, left + w, top + h))
        else:
            # Pad with zeros
            result = Image.new('RGB', (w, h), (0, 0, 0))
            left = (w - new_w) // 2
            top = (h - new_h) // 2
            result.paste(zoomed, (left, top))
            zoomed = result
        
        return np.array(zoomed, dtype=np.float32)
    
    def _shift_image(self, image: np.ndarray, shift_x: int, shift_y: int) -> np.ndarray:
        """Shift image by given pixels"""
        h, w = image.shape[:2]
        
        # Create result array
        result = np.zeros_like(image)
        
        # Calculate source and destination coordinates
        x_src_start = max(-shift_x, 0)
        y_src_start = max(-shift_y, 0)
        x_dst_start = max(shift_x, 0)
        y_dst_start = max(shift_y, 0)
        
        x_src_end = min(w - shift_x, w)
        y_src_end = min(h - shift_y, h)
        x_dst_end = min(w + shift_x, w)
        y_dst_end = min(h + shift_y, h)
        
        # Calculate actual copy dimensions
        copy_width = min(x_src_end - x_src_start, x_dst_end - x_dst_start)
        copy_height = min(y_src_end - y_src_start, y_dst_end - y_dst_start)
        
        # Copy shifted region
        if copy_width > 0 and copy_height > 0:
            result[y_dst_start:y_dst_start+copy_height, x_dst_start:x_dst_start+copy_width] = \
                image[y_src_start:y_src_start+copy_height, x_src_start:x_src_start+copy_width]
        
        return result
    
    def _to_categorical(self, labels: np.ndarray, num_classes: int) -> np.ndarray:
        """Convert class labels to one-hot encoded vectors"""
        # Convert labels to integer array first
        labels_int = labels.astype(int)
        categorical = np.zeros((len(labels_int), num_classes), dtype=np.float32)
        for i, label in enumerate(labels_int):
            categorical[i, label] = 1.0
        return categorical
    
    def __iter__(self):
        self.batch_index = 0
        self._set_index_array()
        return self
    
    def __next__(self) -> Tuple[np.ndarray, np.ndarray]:
        """Generate next batch of data"""
        if self.batch_index >= self.n:
            self.batch_index = 0
            raise StopIteration
        
        # Get batch indices
        start = self.batch_index
        end = min(start + self.batch_size, self.n)
        batch_indices = self.index_array[start:end]
        
        # Load and process batch
        batch_x = []
        batch_y = []
        
        for idx in batch_indices:
            filepath, class_idx = self.samples[idx]
            
            # Load and preprocess image
            img = self._load_and_preprocess_image(filepath)
            
            # Apply augmentation for training (check if augmentation is enabled)
            if (self.horizontal_flip or self.rotation_range > 0 or 
                self.zoom_range > 0 or self.width_shift_range > 0 or 
                self.height_shift_range > 0):
                img = self._augment_image(img)
            
            batch_x.append(img)
            batch_y.append(class_idx)
        
        # Convert to arrays
        batch_x = np.array(batch_x)
        batch_y = np.array(batch_y)
        
        # Convert labels to categorical if needed
        if self.class_mode == 'categorical':
            batch_y = self._to_categorical(batch_y, self.num_classes)
        
        self.batch_index = end
        
        return batch_x, batch_y
    
    def __len__(self) -> int:
        """Number of batches per epoch"""
        return math.ceil(self.n / self.batch_size)
    
    def reset(self):
        """Reset the generator"""
        self.batch_index = 0
        self._set_index_array()

# EfficientNet preprocessing function (manual implementation)
def efficientnet_preprocess_input(x: np.ndarray) -> np.ndarray:
    """
    Manual implementation of EfficientNet preprocessing
    Normalizes images using the ImageNet dataset statistics
    """
    # EfficientNet expects values in range [0, 255]
    # Normalize using ImageNet mean and std
    imagenet_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    imagenet_std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    
    # Normalize each channel
    x_normalized = np.zeros_like(x, dtype=np.float32)
    for i in range(3):
        x_normalized[..., i] = (x[..., i] / 255.0 - imagenet_mean[i]) / imagenet_std[i]
    
    return x_normalized

# Manual Neural Network Implementation
class ManualDenseLayer:
    def __init__(self, units, activation=None, input_dim=None):
        self.units = units
        self.activation = activation
        self.input_dim = input_dim
        self.weights = None
        self.biases = None
        self.initialized = False
        
    def initialize(self, input_dim):
        if not self.initialized:
            self.input_dim = input_dim
            # Xavier/Glorot initialization
            limit = np.sqrt(6.0 / (input_dim + self.units))
            self.weights = np.random.uniform(-limit, limit, (input_dim, self.units))
            self.biases = np.zeros((1, self.units))
            self.initialized = True
            
    def forward(self, x, training=True):
        if not self.initialized:
            self.initialize(x.shape[1])
            
        self.input = x
        self.z = np.dot(x, self.weights) + self.biases
        
        if self.activation == 'relu':
            self.output = np.maximum(0, self.z)
        elif self.activation == 'softmax':
            # Stable softmax implementation
            exp_z = np.exp(self.z - np.max(self.z, axis=1, keepdims=True))
            self.output = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        else:
            self.output = self.z
            
        return self.output

class ManualDropoutLayer:
    def __init__(self, rate):
        self.rate = rate
        self.mask = None
        
    def forward(self, x, training=True):
        if training and self.rate > 0:
            self.mask = np.random.binomial(1, 1 - self.rate, size=x.shape) / (1 - self.rate)
            return x * self.mask
        return x

class ManualGlobalAveragePooling2D:
    def forward(self, x, training=True):
        # x shape: (batch_size, height, width, channels)
        return np.mean(x, axis=(1, 2))

class ManualEfficientNetB4:
    def __init__(self, include_top=True, input_shape=(380, 380, 3), num_classes=5):
        self.include_top = include_top
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.layers = []
        self.built = False
        
    def build(self):
        if self.built:
            return
            
        # Simulating the base model (pretrained weights would be loaded here)
        print("Building EfficientNetB4 base model (simplified)...")
        
        if self.include_top:
            # Add custom classification layers
            self.layers.append(ManualGlobalAveragePooling2D())
            self.layers.append(ManualDropoutLayer(0.5))
            self.layers.append(ManualDenseLayer(256, activation='relu'))
            self.layers.append(ManualDropoutLayer(0.5))
            self.layers.append(ManualDenseLayer(self.num_classes, activation='softmax'))
            
        self.built = True
        
    def forward(self, x, training=True):
        if not self.built:
            self.build()
            
        for layer in self.layers:
            x = layer.forward(x, training)
            
        return x
    
    def set_trainable(self, trainable):
        # In a real implementation, this would freeze/unfreeze base model layers
        pass

class ManualAdamOptimizer:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.lr = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.t = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m = [np.zeros_like(p) for p in params]
            self.v = [np.zeros_like(p) for p in params]
            
        self.t += 1
        updated_params = []
        
        for i, (param, grad) in enumerate(zip(params, grads)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grad ** 2)
            
            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)
            
            param_update = self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)
            updated_params.append(param - param_update)
            
        return updated_params

def categorical_crossentropy(y_true, y_pred):
    # Avoid numerical instability
    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
    return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

def categorical_accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

# Main execution
if __name__ == "__main__":
    # Define dataset directories
    base_dir = r"E:\marine-animals-dataset\versions\1"
    train_dir = os.path.join(base_dir, 'train')
    valid_dir = os.path.join(base_dir, 'valid')
    test_dir = os.path.join(base_dir, 'test')
    
    # Create data generators with EfficientNet preprocessing
    train_datagen = ManualImageDataGenerator(
        target_size=(380, 380),
        batch_size=32,
        shuffle=True,
        preprocessing_function=efficientnet_preprocess_input,
        horizontal_flip=True,
        rotation_range=15,
        zoom_range=0.25,
        width_shift_range=0.2,
        height_shift_range=0.2
    )
    
    valid_datagen = ManualImageDataGenerator(
        target_size=(380, 380),
        batch_size=32,
        shuffle=False,
        preprocessing_function=efficientnet_preprocess_input
    )
    
    test_datagen = ManualImageDataGenerator(
        target_size=(380, 380),
        batch_size=32,
        shuffle=False,
        preprocessing_function=efficientnet_preprocess_input
    )
    
    # Load datasets
    print("Loading training data...")
    train_generator = train_datagen.flow_from_directory(train_dir, class_mode='categorical')
    
    print("Loading validation data...")
    valid_generator = valid_datagen.flow_from_directory(valid_dir, class_mode='categorical')
    
    print("Loading test data...")
    test_generator = test_datagen.flow_from_directory(test_dir, class_mode='categorical')
    
    # Print class indices
    class_indices = train_generator.class_indices
    print("Class indices:", class_indices)
    
    # Example usage: iterate through one batch
    print("\nTesting batch generation...")
    for i, (batch_x, batch_y) in enumerate(train_generator):
        print(f"Batch {i+1}:")
        print(f"  Images shape: {batch_x.shape}")
        print(f"  Labels shape: {batch_y.shape}")
        print(f"  Image range: [{batch_x.min():.3f}, {batch_x.max():.3f}]")
        print(f"  Labels: {np.argmax(batch_y, axis=1)}")
        
        if i == 0:  # Just show first batch for demonstration
            break
    
    # Reset generators for actual training
    train_generator.reset()
    valid_generator.reset()
    test_generator.reset()
    
    print(f"\nDataset Info:")
    print(f"Training samples: {train_generator.n}")
    print(f"Validation samples: {valid_generator.n}")
    print(f"Test samples: {test_generator.n}")
    print(f"Number of classes: {train_generator.num_classes}")
    print(f"Classes: {list(train_generator.class_indices.keys())}")
    
    # Create and test the manual model
    print("\nBuilding manual EfficientNetB4 model...")
    model = ManualEfficientNetB4(include_top=True, input_shape=(380, 380, 3), num_classes=5)
    
    # Test forward pass with a sample batch
    sample_batch = np.random.random((2, 380, 380, 3)).astype(np.float32)
    output = model.forward(sample_batch)
    print(f"Model output shape: {output.shape}")
    print(f"Model output sum per sample: {np.sum(output, axis=1)}")  # Should be ~1.0 for softmax

Loading training data...
Loading validation data...
Loading test data...
Class indices: {'Dolphin': 0, 'Fish': 1, 'Lobster': 2, 'Octopus': 3, 'Sea Horse': 4}

Testing batch generation...
Batch 1:
  Images shape: (32, 380, 380, 3)
  Labels shape: (32, 5)
  Image range: [0.000, 255.000]
  Labels: [1 4 0 1 2 4 3 4 0 1 4 2 1 3 3 1 4 2 2 1 1 1 3 1 2 0 4 0 4 2 1 0]

Dataset Info:
Training samples: 1241
Validation samples: 250
Test samples: 100
Number of classes: 5
Classes: ['Dolphin', 'Fish', 'Lobster', 'Octopus', 'Sea Horse']

Building manual EfficientNetB4 model...
Building EfficientNetB4 base model (simplified)...
Model output shape: (2, 5)
Model output sum per sample: [1. 1.]


In [5]:
import os
import numpy as np
from PIL import Image
import random
import math

class ManualImageDataGenerator:
    def __init__(self, 
                 target_size=(380, 380),
                 batch_size=32,
                 shuffle=True,
                 preprocessing_function=None,
                 horizontal_flip=False,
                 rotation_range=0.0,
                 zoom_range=0.0,
                 width_shift_range=0.0,
                 height_shift_range=0.0):
        
        self.target_size = target_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.preprocessing_function = preprocessing_function
        self.horizontal_flip = horizontal_flip
        self.rotation_range = rotation_range
        self.zoom_range = zoom_range
        self.width_shift_range = width_shift_range
        self.height_shift_range = height_shift_range
        
        self.class_indices = {}
        self.samples = []
        self.batch_index = 0
        
    def flow_from_directory(self, directory, class_mode='categorical'):
        self.directory = directory
        self.class_mode = class_mode
        
        classes = [d for d in os.listdir(directory) 
                  if os.path.isdir(os.path.join(directory, d))]
        classes.sort()
        
        self.class_indices = {cls_name: i for i, cls_name in enumerate(classes)}
        self.num_classes = len(classes)
        
        self.samples = []
        for class_name in classes:
            class_dir = os.path.join(directory, class_name)
            class_idx = self.class_indices[class_name]
            
            for filename in os.listdir(class_dir):
                if self._is_image_file(filename):
                    filepath = os.path.join(class_dir, filename)
                    self.samples.append((filepath, class_idx))
        
        self.n = len(self.samples)
        self._set_index_array()
        self.batch_index = 0
        return self
    
    def _is_image_file(self, filename):
        valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif']
        return any(filename.lower().endswith(ext) for ext in valid_extensions)
    
    def _set_index_array(self):
        self.index_array = np.arange(self.n)
        if self.shuffle:
            np.random.shuffle(self.index_array)
    
    def _load_and_preprocess_image(self, filepath):
        try:
            img = Image.open(filepath)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            img = img.resize(self.target_size, Image.Resampling.LANCZOS)
            img_array = np.array(img, dtype=np.float32)
            
            if self.preprocessing_function:
                img_array = self.preprocessing_function(img_array)
            
            return img_array
        except Exception as e:
            print(f"Error loading image {filepath}: {e}")
            return np.zeros((*self.target_size, 3), dtype=np.float32)
    
    def _augment_image(self, image):
        img = image.copy()
        h, w = img.shape[:2]
        
        if self.horizontal_flip and random.random() > 0.5:
            img = img[:, ::-1, :]
        
        if self.rotation_range > 0:
            angle = random.uniform(-self.rotation_range, self.rotation_range)
            pil_img = Image.fromarray(img.astype(np.uint8))
            rotated = pil_img.rotate(angle, resample=Image.Resampling.BILINEAR, expand=False)
            img = np.array(rotated, dtype=np.float32)
        
        return img
    
    def _to_categorical(self, labels, num_classes):
        labels_int = labels.astype(int)
        categorical = np.zeros((len(labels_int), num_classes), dtype=np.float32)
        for i, label in enumerate(labels_int):
            categorical[i, label] = 1.0
        return categorical
    
    def __iter__(self):
        self.batch_index = 0
        self._set_index_array()
        return self
    
    def __next__(self):
        if self.batch_index >= self.n:
            self.batch_index = 0
            raise StopIteration
        
        start = self.batch_index
        end = min(start + self.batch_size, self.n)
        batch_indices = self.index_array[start:end]
        
        batch_x = []
        batch_y = []
        
        for idx in batch_indices:
            filepath, class_idx = self.samples[idx]
            img = self._load_and_preprocess_image(filepath)
            
            if (self.horizontal_flip or self.rotation_range > 0):
                img = self._augment_image(img)
            
            batch_x.append(img)
            batch_y.append(class_idx)
        
        batch_x = np.array(batch_x)
        batch_y = np.array(batch_y)
        
        if self.class_mode == 'categorical':
            batch_y = self._to_categorical(batch_y, self.num_classes)
        
        self.batch_index = end
        return batch_x, batch_y
    
    def __len__(self):
        return math.ceil(self.n / self.batch_size)
    
    def reset(self):
        self.batch_index = 0
        self._set_index_array()

def efficientnet_preprocess_input(x):
    imagenet_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    imagenet_std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    
    x_normalized = np.zeros_like(x, dtype=np.float32)
    for i in range(3):
        x_normalized[..., i] = (x[..., i] / 255.0 - imagenet_mean[i]) / imagenet_std[i]
    
    return x_normalized

class ManualConv2D:
    def __init__(self, filters, kernel_size, strides=1, padding='same', use_bias=True):
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding
        self.use_bias = use_bias
        self.weights = None
        self.biases = None
        
    def initialize(self, input_shape):
        input_channels = input_shape[-1]
        limit = np.sqrt(6.0 / (input_channels * self.kernel_size * self.kernel_size + self.filters))
        self.weights = np.random.uniform(-limit, limit, 
                                       (self.kernel_size, self.kernel_size, input_channels, self.filters))
        if self.use_bias:
            self.biases = np.zeros(self.filters)
        return input_shape
    
    def forward(self, x):
        batch_size, h, w, input_channels = x.shape
        k_h, k_w, _, output_channels = self.weights.shape
        
        if self.padding == 'same':
            pad_h = (h * (self.strides - 1) + k_h - self.strides) // 2
            pad_w = (w * (self.strides - 1) + k_w - self.strides) // 2
            x_padded = np.pad(x, ((0, 0), (pad_h, pad_h), (pad_w, pad_w), (0, 0)), mode='constant')
        else:
            x_padded = x
        
        h_padded, w_padded = x_padded.shape[1], x_padded.shape[2]
        output_h = (h_padded - k_h) // self.strides + 1
        output_w = (w_padded - k_w) // self.strides + 1
        
        output = np.zeros((batch_size, output_h, output_w, output_channels))
        
        for i in range(output_h):
            for j in range(output_w):
                h_start = i * self.strides
                h_end = h_start + k_h
                w_start = j * self.strides
                w_end = w_start + k_w
                
                patch = x_padded[:, h_start:h_end, w_start:w_end, :]
                for k in range(output_channels):
                    output[:, i, j, k] = np.sum(patch * self.weights[:, :, :, k], axis=(1, 2, 3))
        
        if self.use_bias:
            output += self.biases
        
        return output

class ManualDense:
    def __init__(self, units, activation=None):
        self.units = units
        self.activation = activation
        self.weights = None
        self.biases = None
        
    def initialize(self, input_shape):
        input_dim = input_shape[-1]
        limit = np.sqrt(6.0 / (input_dim + self.units))
        self.weights = np.random.uniform(-limit, limit, (input_dim, self.units))
        self.biases = np.zeros(self.units)
        return (input_shape[0], self.units)
    
    def forward(self, x):
        self.input = x
        self.z = np.dot(x, self.weights) + self.biases
        
        if self.activation == 'relu':
            self.output = np.maximum(0, self.z)
        elif self.activation == 'softmax':
            exp_z = np.exp(self.z - np.max(self.z, axis=1, keepdims=True))
            self.output = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        else:
            self.output = self.z
            
        return self.output

class ManualGlobalAveragePooling2D:
    def initialize(self, input_shape):
        return (input_shape[0], input_shape[3])
    
    def forward(self, x):
        return np.mean(x, axis=(1, 2))

class ManualDropout:
    def __init__(self, rate):
        self.rate = rate
        
    def initialize(self, input_shape):
        return input_shape
    
    def forward(self, x, training=True):
        if training and self.rate > 0:
            self.mask = np.random.binomial(1, 1 - self.rate, size=x.shape) / (1 - self.rate)
            return x * self.mask
        return x

class ManualBatchNormalization:
    def __init__(self, momentum=0.99, epsilon=1e-3):
        self.momentum = momentum
        self.epsilon = epsilon
        self.gamma = None
        self.beta = None
        self.moving_mean = None
        self.moving_variance = None
        
    def initialize(self, input_shape):
        self.gamma = np.ones(input_shape[-1])
        self.beta = np.zeros(input_shape[-1])
        self.moving_mean = np.zeros(input_shape[-1])
        self.moving_variance = np.ones(input_shape[-1])
        return input_shape
    
    def forward(self, x, training=True):
        if training:
            mean = np.mean(x, axis=(0, 1, 2), keepdims=True)
            variance = np.var(x, axis=(0, 1, 2), keepdims=True)
            
            self.moving_mean = self.momentum * self.moving_mean + (1 - self.momentum) * mean.flatten()
            self.moving_variance = self.momentum * self.moving_variance + (1 - self.momentum) * variance.flatten()
            
            x_normalized = (x - mean) / np.sqrt(variance + self.epsilon)
        else:
            x_normalized = (x - self.moving_mean.reshape(1, 1, 1, -1)) / np.sqrt(self.moving_variance.reshape(1, 1, 1, -1) + self.epsilon)
        
        return x_normalized * self.gamma.reshape(1, 1, 1, -1) + self.beta.reshape(1, 1, 1, -1)

class ManualMBConvBlock:
    def __init__(self, filters, strides, expand_ratio, kernel_size=3):
        self.filters = filters
        self.strides = strides
        self.expand_ratio = expand_ratio
        self.kernel_size = kernel_size
        self.layers = []
        
    def initialize(self, input_shape):
        input_channels = input_shape[-1]
        expanded_channels = input_channels * self.expand_ratio
        
        # Expansion conv
        if self.expand_ratio != 1:
            self.expand_conv = ManualConv2D(expanded_channels, 1, strides=1)
            self.expand_conv.initialize(input_shape)
            self.layers.append(self.expand_conv)
            
            self.bn1 = ManualBatchNormalization()
            self.bn1.initialize((input_shape[0], input_shape[1], input_shape[2], expanded_channels))
            self.layers.append(self.bn1)
        
        # Depthwise conv
        self.depthwise_conv = ManualConv2D(expanded_channels, self.kernel_size, strides=self.strides, padding='same')
        depthwise_input_shape = (input_shape[0], input_shape[1], input_shape[2], expanded_channels) if self.expand_ratio != 1 else input_shape
        self.depthwise_conv.initialize(depthwise_input_shape)
        self.layers.append(self.depthwise_conv)
        
        self.bn2 = ManualBatchNormalization()
        self.bn2.initialize((input_shape[0], input_shape[1]//self.strides, input_shape[2]//self.strides, expanded_channels))
        self.layers.append(self.bn2)
        
        # Projection conv
        self.projection_conv = ManualConv2D(self.filters, 1, strides=1)
        projection_input_shape = (input_shape[0], input_shape[1]//self.strides, input_shape[2]//self.strides, expanded_channels)
        self.projection_conv.initialize(projection_input_shape)
        self.layers.append(self.projection_conv)
        
        self.bn3 = ManualBatchNormalization()
        output_shape = (input_shape[0], input_shape[1]//self.strides, input_shape[2]//self.strides, self.filters)
        self.bn3.initialize(output_shape)
        self.layers.append(self.bn3)
        
        return output_shape
    
    def forward(self, x, training=True):
        residual = x
        
        # Expansion
        if self.expand_ratio != 1:
            x = self.expand_conv.forward(x)
            x = self.bn1.forward(x, training)
            x = np.maximum(0, x)  # Swish activation simplified to ReLU
        
        # Depthwise
        x = self.depthwise_conv.forward(x)
        x = self.bn2.forward(x, training)
        x = np.maximum(0, x)
        
        # Projection
        x = self.projection_conv.forward(x)
        x = self.bn3.forward(x, training)
        
        # Skip connection
        if self.strides == 1 and residual.shape[-1] == x.shape[-1]:
            x = x + residual
        
        return x

class ManualEfficientNetB4:
    def __init__(self, include_top=True, input_shape=(380, 380, 3), num_classes=1000):
        self.include_top = include_top
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.layers = []
        self.built = False
        
    def build(self):
        if self.built:
            return
            
        # Stem
        self.stem_conv = ManualConv2D(48, 3, strides=2, padding='same')
        self.stem_conv.initialize((1, *self.input_shape))
        self.layers.append(self.stem_conv)
        
        self.stem_bn = ManualBatchNormalization()
        self.stem_bn.initialize((1, 190, 190, 48))
        self.layers.append(self.stem_bn)
        
        # MBConv blocks (simplified version)
        # Block 1
        self.block1 = ManualMBConvBlock(24, 1, 1)
        self.block1.initialize((1, 190, 190, 48))
        self.layers.append(self.block1)
        
        # Block 2
        self.block2 = ManualMBConvBlock(32, 2, 6)
        self.block2.initialize((1, 190, 190, 24))
        self.layers.append(self.block2)
        
        if self.include_top:
            # Top layers
            self.global_pool = ManualGlobalAveragePooling2D()
            self.global_pool.initialize((1, 95, 95, 32))
            self.layers.append(self.global_pool)
            
            self.dropout1 = ManualDropout(0.5)
            self.dropout1.initialize((1, 32))
            self.layers.append(self.dropout1)
            
            self.dense1 = ManualDense(256, activation='relu')
            self.dense1.initialize((1, 32))
            self.layers.append(self.dense1)
            
            self.dropout2 = ManualDropout(0.5)
            self.dropout2.initialize((1, 256))
            self.layers.append(self.dropout2)
            
            self.dense2 = ManualDense(self.num_classes, activation='softmax')
            self.dense2.initialize((1, 256))
            self.layers.append(self.dense2)
            
        self.built = True
        
    def forward(self, x, training=True):
        if not self.built:
            self.build()
            
        for layer in self.layers:
            if isinstance(layer, ManualDropout):
                x = layer.forward(x, training)
            else:
                x = layer.forward(x)
                
        return x
    
    def set_trainable(self, trainable):
        for layer in self.layers:
            if hasattr(layer, 'weights'):
                layer.trainable = trainable

class ManualAdam:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.lr = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.t = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m = [np.zeros_like(p) for p in params]
            self.v = [np.zeros_like(p) for p in params]
            
        self.t += 1
        updated_params = []
        
        for i, (param, grad) in enumerate(zip(params, grads)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grad ** 2)
            
            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)
            
            param_update = self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)
            updated_params.append(param - param_update)
            
        return updated_params

def categorical_crossentropy(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
    return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

def categorical_accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

# Main execution
if __name__ == "__main__":
    # Create manual EfficientNetB4 model
    base_model = ManualEfficientNetB4(include_top=False, input_shape=(380, 380, 3), num_classes=5)
    
    # Freeze base model
    base_model.set_trainable(False)
    
    # Build custom classifier
    classifier_layers = [
        ManualGlobalAveragePooling2D(),
        ManualDropout(0.5),
        ManualDense(256, activation='relu'),
        ManualDropout(0.5),
        ManualDense(5, activation='softmax')
    ]
    
    # Initialize classifier
    current_shape = (1, 95, 95, 32)  # Output shape from base model
    for layer in classifier_layers:
        current_shape = layer.initialize(current_shape)
    
    # Define complete model
    class CompleteModel:
        def __init__(self, base_model, classifier_layers):
            self.base_model = base_model
            self.classifier_layers = classifier_layers
            
        def forward(self, x, training=True):
            x = self.base_model.forward(x, training)
            for layer in self.classifier_layers:
                if isinstance(layer, ManualDropout):
                    x = layer.forward(x, training)
                else:
                    x = layer.forward(x)
            return x
    
    model = CompleteModel(base_model, classifier_layers)
    
    # Create optimizer
    optimizer = ManualAdam(learning_rate=0.001)
    
    print("Manual EfficientNetB4 Model Summary:")
    print("Input shape: (380, 380, 3)")
    print("Base model: Frozen")
    print("Classifier layers:")
    print("  GlobalAveragePooling2D")
    print("  Dropout(0.5)")
    print("  Dense(256, relu)")
    print("  Dropout(0.5)")
    print("  Dense(5, softmax)")
    print("Optimizer: Adam(lr=0.001)")
    print("Loss: categorical_crossentropy")
    
    # Test with sample data
    sample_input = np.random.random((2, 380, 380, 3)).astype(np.float32)
    output = model.forward(sample_input, training=False)
    print(f"\nSample output shape: {output.shape}")
    print(f"Sample output sum per sample: {np.sum(output, axis=1)}")

Manual EfficientNetB4 Model Summary:
Input shape: (380, 380, 3)
Base model: Frozen
Classifier layers:
  GlobalAveragePooling2D
  Dropout(0.5)
  Dense(256, relu)
  Dropout(0.5)
  Dense(5, softmax)
Optimizer: Adam(lr=0.001)
Loss: categorical_crossentropy

Sample output shape: (2, 5)
Sample output sum per sample: [1. 1.]


In [2]:
import os
import numpy as np
from PIL import Image
import random
import math

class ManualImageDataGenerator:
    def __init__(self, 
                 target_size=(224, 224),
                 batch_size=16,  # Increased batch size
                 shuffle=True,
                 preprocessing_function=None,
                 horizontal_flip=False,
                 rotation_range=0.0,
                 zoom_range=0.0):
        
        self.target_size = target_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.preprocessing_function = preprocessing_function
        self.horizontal_flip = horizontal_flip
        self.rotation_range = rotation_range
        self.zoom_range = zoom_range
        
        self.class_indices = {}
        self.samples = []
        self.batch_index = 0
        
    def flow_from_directory(self, directory, class_mode='categorical'):
        self.directory = directory
        self.class_mode = class_mode
        
        classes = [d for d in os.listdir(directory) 
                  if os.path.isdir(os.path.join(directory, d))]
        classes.sort()
        
        self.class_indices = {cls_name: i for i, cls_name in enumerate(classes)}
        self.num_classes = len(classes)
        
        self.samples = []
        for class_name in classes:
            class_dir = os.path.join(directory, class_name)
            class_idx = self.class_indices[class_name]
            
            for filename in os.listdir(class_dir):
                if self._is_image_file(filename):
                    filepath = os.path.join(class_dir, filename)
                    self.samples.append((filepath, class_idx))
        
        self.n = len(self.samples)
        self._set_index_array()
        self.batch_index = 0
        return self
    
    def _is_image_file(self, filename):
        valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
        return any(filename.lower().endswith(ext) for ext in valid_extensions)
    
    def _set_index_array(self):
        self.index_array = np.arange(self.n)
        if self.shuffle:
            np.random.shuffle(self.index_array)
    
    def _load_and_preprocess_image(self, filepath):
        try:
            img = Image.open(filepath)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            img = img.resize(self.target_size, Image.Resampling.LANCZOS)
            img_array = np.array(img, dtype=np.float32)
            
            if self.preprocessing_function:
                img_array = self.preprocessing_function(img_array)
            
            return img_array
        except Exception as e:
            print(f"Error loading image {filepath}: {e}")
            return np.zeros((*self.target_size, 3), dtype=np.float32)
    
    def _augment_image(self, image):
        img = image.copy()
        
        if self.horizontal_flip and random.random() > 0.5:
            img = img[:, ::-1, :]
        
        return img
    
    def _to_categorical(self, labels, num_classes):
        labels_int = labels.astype(int)
        categorical = np.zeros((len(labels_int), num_classes), dtype=np.float32)
        for i, label in enumerate(labels_int):
            categorical[i, label] = 1.0
        return categorical
    
    def __iter__(self):
        self.batch_index = 0
        self._set_index_array()
        return self
    
    def __next__(self):
        if self.batch_index >= self.n:
            self.batch_index = 0
            raise StopIteration
        
        start = self.batch_index
        end = min(start + self.batch_size, self.n)
        batch_indices = self.index_array[start:end]
        
        batch_x = []
        batch_y = []
        
        for idx in batch_indices:
            filepath, class_idx = self.samples[idx]
            img = self._load_and_preprocess_image(filepath)
            
            # Apply augmentation only during training
            if hasattr(self, 'horizontal_flip') and self.horizontal_flip:
                img = self._augment_image(img)
            
            batch_x.append(img)
            batch_y.append(class_idx)
        
        batch_x = np.array(batch_x, dtype=np.float32)
        batch_y = np.array(batch_y)
        
        if self.class_mode == 'categorical':
            batch_y = self._to_categorical(batch_y, self.num_classes)
        
        self.batch_index = end
        return batch_x, batch_y
    
    def __len__(self):
        return math.ceil(self.n / self.batch_size)
    
    def reset(self):
        self.batch_index = 0
        self._set_index_array()

def efficientnet_preprocess_input(x):
    imagenet_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    imagenet_std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    
    x_normalized = np.zeros_like(x, dtype=np.float32)
    for i in range(3):
        x_normalized[..., i] = (x[..., i] / 255.0 - imagenet_mean[i]) / imagenet_std[i]
    
    return x_normalized

class ManualDense:
    def __init__(self, units, activation=None):
        self.units = units
        self.activation = activation
        self.weights = None
        self.biases = None
        self.initialized = False
        
    def initialize(self, input_shape):
        input_dim = input_shape[-1]
        # Better initialization
        limit = np.sqrt(2.0 / input_dim)
        self.weights = np.random.randn(input_dim, self.units).astype(np.float32) * limit
        self.biases = np.zeros(self.units, dtype=np.float32)
        self.initialized = True
        return (input_shape[0], self.units)
    
    def forward(self, x):
        if not self.initialized:
            self.initialize(x.shape)
            
        self.input = x
        self.z = np.dot(x, self.weights) + self.biases
        
        if self.activation == 'relu':
            self.output = np.maximum(0, self.z)
        elif self.activation == 'softmax':
            # More stable softmax
            shift_z = self.z - np.max(self.z, axis=1, keepdims=True)
            exp_z = np.exp(shift_z)
            self.output = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        else:
            self.output = self.z
            
        return self.output

class ManualGlobalAveragePooling2D:
    def __init__(self):
        self.initialized = False
        
    def initialize(self, input_shape):
        self.initialized = True
        return (input_shape[0], input_shape[3])
    
    def forward(self, x):
        if not self.initialized:
            self.initialize(x.shape)
        return np.mean(x, axis=(1, 2), dtype=np.float32)

class ManualDropout:
    def __init__(self, rate):
        self.rate = rate
        self.initialized = False
        
    def initialize(self, input_shape):
        self.initialized = True
        return input_shape
    
    def forward(self, x, training=True):
        if not self.initialized:
            self.initialize(x.shape)
            
        if training and self.rate > 0:
            # Correct dropout implementation
            self.mask = (np.random.random(x.shape) > self.rate).astype(np.float32)
            return x * self.mask
        return x

class ManualConv2D:
    def __init__(self, filters, kernel_size, strides=1, padding='same', use_bias=True):
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding
        self.use_bias = use_bias
        self.weights = None
        self.biases = None
        self.initialized = False
        
    def initialize(self, input_shape):
        input_channels = input_shape[-1]
        # He initialization for ReLU
        limit = np.sqrt(2.0 / (input_channels * self.kernel_size * self.kernel_size))
        
        self.weights = np.random.randn(
            self.kernel_size, self.kernel_size, input_channels, self.filters
        ).astype(np.float32) * limit
        
        if self.use_bias:
            self.biases = np.zeros(self.filters, dtype=np.float32)
            
        self.initialized = True
        return self._compute_output_shape(input_shape)
    
    def _compute_output_shape(self, input_shape):
        batch_size, h, w, _ = input_shape
        
        if self.padding == 'same':
            out_h = (h + self.strides - 1) // self.strides
            out_w = (w + self.strides - 1) // self.strides
        else:
            out_h = (h - self.kernel_size) // self.strides + 1
            out_w = (w - self.kernel_size) // self.strides + 1
            
        return (batch_size, out_h, out_w, self.filters)
    
    def forward(self, x):
        if not self.initialized:
            self.initialize(x.shape)
            
        batch_size, h, w, input_channels = x.shape
        
        if self.padding == 'same':
            pad_h = ((h - 1) * self.strides + self.kernel_size - h) // 2
            pad_w = ((w - 1) * self.strides + self.kernel_size - w) // 2
            x_padded = np.pad(x, ((0, 0), (pad_h, pad_h), (pad_w, pad_w), (0, 0)), 
                            mode='constant').astype(np.float32)
        else:
            x_padded = x
        
        output_shape = self._compute_output_shape(x.shape)
        output = np.zeros(output_shape, dtype=np.float32)
        
        out_h, out_w = output_shape[1], output_shape[2]
        
        for i in range(out_h):
            for j in range(out_w):
                h_start = i * self.strides
                h_end = h_start + self.kernel_size
                w_start = j * self.strides
                w_end = w_start + self.kernel_size
                
                patch = x_padded[:, h_start:h_end, w_start:w_end, :]
                
                # Efficient convolution
                for k in range(self.filters):
                    output[:, i, j, k] = np.sum(patch * self.weights[:, :, :, k], axis=(1, 2, 3))
        
        if self.use_bias:
            output += self.biases
        
        return output

class ManualBatchNormalization:
    def __init__(self, momentum=0.99, epsilon=1e-3):
        self.momentum = momentum
        self.epsilon = epsilon
        self.gamma = None
        self.beta = None
        self.moving_mean = None
        self.moving_variance = None
        self.initialized = False
        
    def initialize(self, input_shape):
        channels = input_shape[-1]
        self.gamma = np.ones(channels, dtype=np.float32)
        self.beta = np.zeros(channels, dtype=np.float32)
        self.moving_mean = np.zeros(channels, dtype=np.float32)
        self.moving_variance = np.ones(channels, dtype=np.float32)
        self.initialized = True
        return input_shape
    
    def forward(self, x, training=True):
        if not self.initialized:
            self.initialize(x.shape)
            
        if training:
            # Batch statistics
            mean = np.mean(x, axis=(0, 1, 2), keepdims=True)
            variance = np.var(x, axis=(0, 1, 2), keepdims=True)
            
            # Update moving statistics
            self.moving_mean = self.momentum * self.moving_mean + (1 - self.momentum) * mean.flatten()
            self.moving_variance = self.momentum * self.moving_variance + (1 - self.momentum) * variance.flatten()
            
            # Normalize
            x_normalized = (x - mean) / np.sqrt(variance + self.epsilon)
        else:
            # Use moving statistics for inference
            x_normalized = (x - self.moving_mean.reshape(1, 1, 1, -1)) / np.sqrt(self.moving_variance.reshape(1, 1, 1, -1) + self.epsilon)
        
        # Scale and shift
        return x_normalized * self.gamma.reshape(1, 1, 1, -1) + self.beta.reshape(1, 1, 1, -1)

class ManualBaseModel:
    def __init__(self):
        self.layers = []
        self.built = False
        
    def build(self):
        if self.built:
            return
            
        # Better architecture with batch normalization
        self.conv1 = ManualConv2D(32, 3, strides=2, padding='same')
        self.bn1 = ManualBatchNormalization()
        self.conv2 = ManualConv2D(64, 3, strides=2, padding='same')
        self.bn2 = ManualBatchNormalization()
        self.conv3 = ManualConv2D(128, 3, strides=2, padding='same')
        self.bn3 = ManualBatchNormalization()
        self.conv4 = ManualConv2D(256, 3, strides=2, padding='same')
        self.bn4 = ManualBatchNormalization()
        
        self.layers = [
            self.conv1, self.bn1,
            self.conv2, self.bn2, 
            self.conv3, self.bn3,
            self.conv4, self.bn4
        ]
        
        # Initialize layers
        current_shape = (1, 224, 224, 3)
        for layer in self.layers:
            current_shape = layer.initialize(current_shape)
            
        self.built = True
        
    def forward(self, x, training=True):
        if not self.built:
            self.build()
            
        for i, layer in enumerate(self.layers):
            if isinstance(layer, ManualBatchNormalization):
                x = layer.forward(x, training)
                # ReLU activation after BN
                if i < len(self.layers) - 1:  # Don't apply ReLU after last layer
                    x = np.maximum(0, x)
            else:
                x = layer.forward(x)
            
        return x

class ManualEfficientNetB4:
    def __init__(self, include_top=True, input_shape=(224, 224, 3), num_classes=5):
        self.include_top = include_top
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.base_model = None
        self.classifier_layers = []
        self.built = False
        
    def build(self):
        if self.built:
            return
            
        # Use improved base model
        self.base_model = ManualBaseModel()
        self.base_model.build()
        
        if self.include_top:
            self.global_pool = ManualGlobalAveragePooling2D()
            self.dropout1 = ManualDropout(0.2)  # Reduced dropout
            self.dense1 = ManualDense(512, activation='relu')  # More units
            self.dropout2 = ManualDropout(0.2)  # Reduced dropout
            self.dense2 = ManualDense(self.num_classes, activation='softmax')
            
            self.classifier_layers = [
                self.global_pool,
                self.dropout1,
                self.dense1,
                self.dropout2,
                self.dense2
            ]
            
            # Initialize classifier layers
            base_output_shape = (1, 14, 14, 256)  # Updated output shape
            current_shape = base_output_shape
            
            for layer in self.classifier_layers:
                current_shape = layer.initialize(current_shape)
            
        self.built = True
        
    def forward(self, x, training=True):
        if not self.built:
            self.build()
        
        # Base model forward pass
        x = self.base_model.forward(x, training)
        
        if self.include_top:
            # Classifier forward pass
            for layer in self.classifier_layers:
                if isinstance(layer, ManualDropout):
                    x = layer.forward(x, training)
                else:
                    x = layer.forward(x)
                
        return x

def categorical_crossentropy(y_true, y_pred):
    # More stable implementation
    y_pred = np.clip(y_pred, 1e-12, 1.0 - 1e-12)
    return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

def categorical_accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

# Improved training with gradient descent
class ManualSGD:
    def __init__(self, learning_rate=0.01, momentum=0.9):
        self.lr = learning_rate
        self.momentum = momentum
        self.velocities = None
        
    def update(self, params, grads):
        if self.velocities is None:
            self.velocities = [np.zeros_like(p) for p in params]
            
        updated_params = []
        for i, (param, grad, velocity) in enumerate(zip(params, grads, self.velocities)):
            # Momentum update
            velocity = self.momentum * velocity - self.lr * grad
            updated_param = param + velocity
            updated_params.append(updated_param)
            self.velocities[i] = velocity
            
        return updated_params

# Manual training implementation
def manual_fit(model, train_generator, valid_generator, epochs=10, verbose=1):
    history = {
        'loss': [],
        'accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    
    # Track best model
    best_val_accuracy = 0
    
    for epoch in range(epochs):
        if verbose:
            print(f'Epoch {epoch + 1}/{epochs}')
        
        # Training phase
        train_generator.reset()
        epoch_loss = 0.0
        epoch_accuracy = 0.0
        num_batches = 0
        
        for batch_x, batch_y in train_generator:
            # Forward pass
            predictions = model.forward(batch_x, training=True)
            
            # Compute metrics
            batch_loss = categorical_crossentropy(batch_y, predictions)
            batch_accuracy = categorical_accuracy(batch_y, predictions)
            
            epoch_loss += batch_loss
            epoch_accuracy += batch_accuracy
            num_batches += 1
        
        # Calculate averages
        if num_batches > 0:
            avg_loss = epoch_loss / num_batches
            avg_accuracy = epoch_accuracy / num_batches
            history['loss'].append(avg_loss)
            history['accuracy'].append(avg_accuracy)
        
        # Validation phase
        valid_generator.reset()
        val_loss = 0.0
        val_accuracy = 0.0
        num_val_batches = 0
        
        for batch_x, batch_y in valid_generator:
            predictions = model.forward(batch_x, training=False)
            
            batch_val_loss = categorical_crossentropy(batch_y, predictions)
            batch_val_accuracy = categorical_accuracy(batch_y, predictions)
            
            val_loss += batch_val_loss
            val_accuracy += batch_val_accuracy
            num_val_batches += 1
        
        # Calculate validation averages
        if num_val_batches > 0:
            avg_val_loss = val_loss / num_val_batches
            avg_val_accuracy = val_accuracy / num_val_batches
            history['val_loss'].append(avg_val_loss)
            history['val_accuracy'].append(avg_val_accuracy)
            
            # Track best model
            if avg_val_accuracy > best_val_accuracy:
                best_val_accuracy = avg_val_accuracy
        
        if verbose:
            print(f'loss: {avg_loss:.4f} - accuracy: {avg_accuracy:.4f} - '
                  f'val_loss: {avg_val_loss:.4f} - val_accuracy: {avg_val_accuracy:.4f}')
    
    print(f"Best validation accuracy: {best_val_accuracy * 100:.2f}%")
    return history

def manual_evaluate(model, generator, verbose=1):
    generator.reset()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for batch_x, batch_y in generator:
        predictions = model.forward(batch_x, training=False)
        
        batch_loss = categorical_crossentropy(batch_y, predictions)
        batch_accuracy = categorical_accuracy(batch_y, predictions)
        
        total_loss += batch_loss
        total_accuracy += batch_accuracy
        num_batches += 1
    
    if num_batches > 0:
        avg_loss = total_loss / num_batches
        avg_accuracy = total_accuracy / num_batches
    else:
        avg_loss = avg_accuracy = 0.0
    
    if verbose:
        print(f'Evaluation - loss: {avg_loss:.4f} - accuracy: {avg_accuracy:.4f}')
    
    return avg_loss, avg_accuracy

# Main execution
if __name__ == "__main__":
    # Define dataset directories
    base_dir = r"E:\marine-animals-dataset\versions\1"
    train_dir = os.path.join(base_dir, 'train')
    valid_dir = os.path.join(base_dir, 'valid')
    
    # Create data generators
    train_datagen = ManualImageDataGenerator(
        target_size=(224, 224),
        batch_size=16,  # Increased batch size
        shuffle=True,
        preprocessing_function=efficientnet_preprocess_input,
        horizontal_flip=True
    )
    
    valid_datagen = ManualImageDataGenerator(
        target_size=(224, 224),
        batch_size=16,
        shuffle=False,
        preprocessing_function=efficientnet_preprocess_input
    )
    
    # Load datasets
    print("Loading training data...")
    train_generator = train_datagen.flow_from_directory(train_dir, class_mode='categorical')
    
    print("Loading validation data...")
    valid_generator = valid_datagen.flow_from_directory(valid_dir, class_mode='categorical')
    
    # Create model
    model = ManualEfficientNetB4(include_top=True, input_shape=(224, 224, 3), num_classes=5)
    
    # Train the model
    print("Starting manual training...")
    history = manual_fit(
        model, 
        train_generator, 
        valid_generator, 
        epochs=10,
        verbose=1
    )
    
    # Evaluate on validation set
    print("\nEvaluating on validation set...")
    val_loss, val_accuracy = manual_evaluate(model, valid_generator, verbose=1)
    print(f"Validation Accuracy after initial training: {val_accuracy * 100:.2f}%")

Loading training data...
Loading validation data...
Starting manual training...
Epoch 1/10
loss: 1.6094 - accuracy: 0.1859 - val_loss: 1.6145 - val_accuracy: 0.1953
Epoch 2/10
loss: 1.6094 - accuracy: 0.1998 - val_loss: 1.6229 - val_accuracy: 0.1953
Epoch 3/10
loss: 1.6094 - accuracy: 0.2162 - val_loss: 1.6267 - val_accuracy: 0.1953
Epoch 4/10
loss: 1.6094 - accuracy: 0.1855 - val_loss: 1.6238 - val_accuracy: 0.1953
Epoch 5/10
loss: 1.6094 - accuracy: 0.1969 - val_loss: 1.6172 - val_accuracy: 0.1953
Epoch 6/10
loss: 1.6094 - accuracy: 0.1769 - val_loss: 1.6130 - val_accuracy: 0.1953
Epoch 7/10
loss: 1.6094 - accuracy: 0.2080 - val_loss: 1.6132 - val_accuracy: 0.1953
Epoch 8/10
loss: 1.6094 - accuracy: 0.1929 - val_loss: 1.6134 - val_accuracy: 0.1953
Epoch 9/10
loss: 1.6094 - accuracy: 0.2009 - val_loss: 1.6137 - val_accuracy: 0.1953
Epoch 10/10
loss: 1.6094 - accuracy: 0.1928 - val_loss: 1.6150 - val_accuracy: 0.1953
Best validation accuracy: 19.53%

Evaluating on validation set...
Eva