In [10]:
import numpy as np
import matplotlib.pyplot as plt

"""
Credit: https://hogonext.com/how-to-build-a-simple-neural-network-from-scratch-without-libraries/

1. Changed activation function.
2. Limited bit-width.
3. Updated derivative for back propagation.
"""

def relu(x):
    """
    Applies a ReLU activation function on the input data vector.
    """
    return np.maximum(0, x)


def relu_derivative(x):
    """
    Applies the derivative of the ReLU activation function on
    the input data vector.
    """
    return np.where(x >= 0, 1, 0)


def f_prop(inputs, weights):
    """
    Returns the outputs of the neural network.
    
    Data is a numpy array-like object.
    weights is a list of numpy arrays.
    
    Returns the array outputs.
    """
    layer_inputs = [np.dot(inputs, weights[0])]
    layer_outputs = [relu(layer_inputs[0])]
    
    for i in range(1, len(weights)):
        layer_inputs.append(np.dot(layer_outputs[i-1], weights[i]))
        layer_outputs.append(relu(layer_inputs[i]))
        
    return layer_outputs, layer_inputs


def backprop(inputs, targets, learning_rate, weights, layer_inputs, layer_outputs):
    """
    Returns the modified weights and output error on a single performed pass of backpropagation.
    """
    errors = [None for _ in range(len(layer_inputs))]
    deltas = [None for _ in range(len(layer_inputs))]
    
    errors[-1] = targets - layer_outputs[-1]
    deltas[-1] = errors[-1] * relu_derivative(layer_inputs[-1])
    
    for i in range(1, len(layer_outputs)):
        errors[len(layer_outputs) - i - 1] = deltas[len(layer_outputs) - i].dot(weights[len(layer_outputs) - i].T)
        deltas[len(layer_outputs) - i - 1] = errors[len(layer_outputs) - i - 1] * relu_derivative(layer_inputs[len(layer_outputs) - i - 1])
    
    weights[0] += inputs.T.dot(deltas[0]) * learning_rate
    for i in range(1, len(weights)):
        weights[i] += layer_outputs[i-1].T.dot(deltas[i]) * learning_rate
        
    return weights, np.mean(np.abs(errors[-1]))


def gen_weights(layer_sizes):
    """
    Generates the list of numpy arrays with weights randomized on interval [-1, 1].
    """
    weights = []
    for i in range(len(layer_sizes) - 1):
        weights.append(2 * np.random.random((layer_sizes[i], layer_sizes[i+1])) - 1)
    return weights


def train_network(training_data, targets, weights, learning_rate, n_epochs):
    """
    Trains a neural network given some training data, targets for that data, weights
    to train on, rate of learning, and the number of times to train.
    """
    errors = []  # Track errors during training
    
    for epoch in range(n_epochs):
        layer_outputs, layer_inputs = f_prop(training_data, weights)
        weights, error = backprop(training_data, targets, learning_rate, weights, layer_inputs, layer_outputs)
        errors.append(error)
        
        # Print progress every 1000 epochs
        if epoch % 1000 == 0:
            print(f'Epoch {epoch}, Error: {error:.4f}')
    
    return weights, layer_outputs[-1], errors


def convert_weights(weights, precision):
    """
    Converts weights so that they conform to the given bitwidth of our neuron.
    Weights is a list of numpy arrays of the weights for each input to each neuron,
    precision is the smallest step size achieved by our neuron bit width.
    """
    w = []
    
    for i in range(len(weights)):
        arr = weights[i].copy()
        arr /= precision
        arr = np.round(arr)
        arr *= precision
        w.append(arr)
    
    return w

In [43]:
class SimplifiedNeuron:
    """
    Simplified neuron that tracks energy, delay, MAC operations and ReLU activations.
    This neuron processes up to 3 inputs sequentially over 3 clock cycles.
    """
    
    def __init__(self, weights, bit_width=4, energy_per_cycle=39.67, delay_per_cycle=950):
        """
        Initialize the neuron with hardware parameters.
        
        Parameters:
        weights: Array of weights for input connections (max 3 weights)
        bit_width: Bit width for number representation
        energy_per_cycle: Energy consumption per clock cycle in fJ
        delay_per_cycle: Delay per clock cycle in ps
        """
        if len(weights) > 3:
            raise ValueError("This neuron design can only handle max 3 inputs/weights")
            
        self.weights = weights
        self.bit_width = bit_width
        self.energy_per_cycle = energy_per_cycle
        self.delay_per_cycle = delay_per_cycle
        
        # Calculate representable range based on bit width
        self.max_val = (2**(bit_width-1)) - 1
        self.min_val = -(2**(bit_width-1))
        
        # Performance tracking
        self.output = 0
        self.transform = 0
        self.mac_operations = 0
        self.relu_activations = 0
        
    def apply_bit_constraints(self, value):
        """Apply bit width constraints to a value"""
        return max(self.min_val, min(self.max_val, value))
        
    def forward(self, inputs):
        """
        Perform forward pass through the neuron.
        
        Parameters:
        inputs: Input values (max 3 inputs)
        
        Returns:
        Output value after activation
        """
        if len(inputs) > 3:
            raise ValueError("This neuron design can only handle max 3 inputs")
            
        # Pad inputs/weights if less than 3
        padded_inputs = list(inputs) + [0] * (3 - len(inputs))
        padded_weights = self.weights + [0] * (3 - len(self.weights))
            
        # Reset transform value
        self.transform = 0
        
        # Process each input-weight pair over 3 cycles
        for i in range(3):
            # Apply bit constraints to input
            constrained_input = self.apply_bit_constraints(padded_inputs[i])
            
            # Perform MAC operation
            self.transform += constrained_input * padded_weights[i]
            
            # Only count real MAC operations (not for padded zeros)
            if i < len(inputs) and i < len(self.weights):
                self.mac_operations += 1
        
        # Apply ReLU activation (4th cycle)
        previous_output = self.output
        self.output = max(0, self.transform)
        
        # Apply bit constraints to output
        self.output = self.apply_bit_constraints(self.output)
        
        # Track ReLU activations
        if self.output > 0:
            self.relu_activations += 1
            
        return self.output
    
    def calculate_energy(self):
        """Calculate energy consumption based on number of cycles"""
        # Always 4 cycles (3 for MAC ops + 1 for ReLU)
        return 4 * self.energy_per_cycle
    
    def calculate_delay(self):
        """Calculate delay based on number of cycles"""
        # Always 4 cycles (3 for MAC ops + 1 for ReLU)
        return 4 * self.delay_per_cycle
    
    def get_performance_metrics(self):
        """Get performance metrics"""
        total_energy = self.calculate_energy()
        total_delay = self.calculate_delay()
        
        return {
            'total_energy': total_energy,
            'total_delay': total_delay,
            'mac_operations': self.mac_operations,
            'relu_activations': self.relu_activations
        }

class PhysicalNeuralNetwork:
    """
    Simplified neural network model that incorporates physical characteristics
    based on a state machine implementation that processes 3 inputs per neuron.
    """
    
    def __init__(self, layer_sizes, weights=None, bit_width=4, 
                 energy_per_cycle=39.67, delay_per_cycle=950):
        """
        Initialize a neural network with physical characteristics.
        
        Parameters:
        layer_sizes: Tuple with number of neurons per layer
        weights: Pre-trained weights (optional)
        bit_width: Bit width for representation
        energy_per_cycle: Energy consumption per clock cycle in fJ
        delay_per_cycle: Delay per clock cycle in ps
        """
        self.layer_sizes = layer_sizes
        self.bit_width = bit_width
        self.energy_per_cycle = energy_per_cycle
        self.delay_per_cycle = delay_per_cycle
        
        # Initialize weights or use provided ones
        if weights is None:
            self.weights = gen_weights(layer_sizes)
        else:
            self.weights = weights
            
        # Create layers of neurons
        self.layers = self._build_network()
        
        # Performance tracking
        self.total_mac_operations = 0
        self.total_relu_activations = 0
        
    def _build_network(self):
        """Build the network with physical neurons"""
        layers = []
        
        # Create each layer
        for layer_idx in range(1, len(self.layer_sizes)):
            layer = []
            prev_size = self.layer_sizes[layer_idx-1]
            curr_size = self.layer_sizes[layer_idx]
            
            # Create neurons for this layer
            for i in range(curr_size):
                # Extract weights for this neuron
                weights_all = [self.weights[layer_idx-1][j][i] for j in range(prev_size)]
                
                # Handle cases where we have more than 3 inputs
                # by using multiple physical neurons in parallel
                num_physical_neurons = (prev_size + 2) // 3  # Ceiling division
                physical_neurons = []
                
                for pn_idx in range(num_physical_neurons):
                    # Get slice of weights for this physical neuron (max 3)
                    start_idx = pn_idx * 3
                    end_idx = min(start_idx + 3, prev_size)
                    neuron_weights = weights_all[start_idx:end_idx]
                    
                    # Create a new neuron with physical characteristics
                    neuron = SimplifiedNeuron(
                        weights=neuron_weights,
                        bit_width=self.bit_width,
                        energy_per_cycle=self.energy_per_cycle,
                        delay_per_cycle=self.delay_per_cycle
                    )
                    physical_neurons.append(neuron)
                
                # Add group of physical neurons to the layer
                layer.append(physical_neurons)
            
            layers.append(layer)
            
        return layers
    
    def forward(self, inputs):
        """
        Perform forward pass through the network
        
        Parameters:
        inputs: Input data
        
        Returns:
        Network outputs
        """
        # Reset performance counters
        self.total_mac_operations = 0
        self.total_relu_activations = 0
        
        layer_outputs = [inputs]
        
        # Process through each layer
        for layer_idx, layer in enumerate(self.layers):
            layer_input = layer_outputs[-1]
            current_outputs = []
            
            for neuron_group in layer:
                # For groups with multiple physical neurons (>3 inputs)
                group_outputs = []
                
                # Process each physical neuron in the group
                for pn_idx, neuron in enumerate(neuron_group):
                    # Get slice of inputs for this physical neuron (max 3)
                    start_idx = pn_idx * 3
                    end_idx = min(start_idx + 3, len(layer_input))
                    
                    if start_idx < len(layer_input):
                        neuron_inputs = layer_input[start_idx:end_idx]
                        neuron_output = neuron.forward(neuron_inputs)
                        group_outputs.append(neuron_output)
                        
                        # Track operations
                        self.total_mac_operations += neuron.mac_operations
                        self.total_relu_activations += neuron.relu_activations
                
                # Sum outputs from parallel neurons in the group
                group_result = sum(group_outputs)
                current_outputs.append(group_result)
                
            layer_outputs.append(current_outputs)
            
        return layer_outputs[-1]
    
    def get_performance_report(self):
        """Generate a performance report"""
        # Calculate total number of neurons
        total_neurons = 0
        for layer in self.layers:
            for neuron_group in layer:
                total_neurons += len(neuron_group)
        
        # Each neuron takes 4 cycles
        total_cycles = 4 * total_neurons
        
        # Calculate total energy and delay
        total_energy = total_cycles * self.energy_per_cycle
        total_delay = total_cycles * self.delay_per_cycle
        
        return {
            'total_energy': total_energy,
            'total_delay': total_delay,
            'total_mac_operations': self.total_mac_operations,
            'total_relu_activations': self.total_relu_activations,
            'total_neurons': total_neurons,
            'cycles_per_inference': total_cycles
        }
    
    def print_network_info(self):
        """Print simplified information about the network"""
        print(f"Physical Neural Network Information:")
        print(f"  Architecture: {self.layer_sizes}")
        print(f"  Bit Width: {self.bit_width} bits")
        print(f"  Energy per cycle: {self.energy_per_cycle} fJ")
        print(f"  Delay per cycle: {self.delay_per_cycle} ps")
        
        perf = self.get_performance_report()
        print(f"\nPerformance Metrics:")
        print(f"  Total Energy: {perf['total_energy']:.2f} fJ")
        print(f"  Total Delay: {perf['total_delay']:.2f} ps")
        print(f"  Total MAC Operations: {perf['total_mac_operations']}")
        print(f"  Total ReLU Activations: {perf['total_relu_activations']}")
        print(f"  Total Physical Neurons: {perf['total_neurons']}")
        print(f"  Cycles per Inference: {perf['cycles_per_inference']}")

def example_analysis():
    """Example analysis of a simple neural network with physical characteristics"""
    
    # 1. Define network architecture
    layer_sizes = (2, 2, 2, 1)  # Simple 2-2-1 network for XOR
    
    # 2. Create training data (XOR function)
    training_data = np.array([
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ])
    targets = np.array([
        [0],
        [1],
        [1],
        [0]
    ])
    
    # 3. Train the neural network
    print("Training neural network...")
    weights = gen_weights(layer_sizes)
    trained_weights, final_outputs, training_errors = train_network(
        training_data, targets, weights, 0.1, 10000
    )
    
    print("\nFinal predictions:")
    for i in range(len(training_data)):
        print(f"Input: {training_data[i]}, Target: {targets[i][0]}, Prediction: {final_outputs[i][0]:.4f}")
    
    # 4. Define physical characteristics based on Cadence simulation
    # Based on actual Cadence simulation results
    delay_per_cycle = 1748.931   # Delay per clock cycle in ps
    energy_per_cycle = 54.67  # Energy per clock cycle in fJ
    
    # 5. Create physical neural network
    print("\nCreating physical neural network model...")
    physical_net = PhysicalNeuralNetwork(
        layer_sizes,
        weights=trained_weights,
        bit_width=4,
        energy_per_cycle=energy_per_cycle,
        delay_per_cycle=delay_per_cycle
    )
    
    # 6. Test physical network
    print("\nTesting physical network...")
    for i in range(len(training_data)):
        output = physical_net.forward(training_data[i])
        print(f"Input: {training_data[i]}, Output: {output[0]:.4f}")
    
    # 7. Generate performance report
    print("\nGenerating performance report...")
    physical_net.print_network_info()
    
    return {
        'trained_weights': trained_weights,
        'physical_net': physical_net
    }

# Run the example analysis if this file is executed directly
if __name__ == "__main__":
    results = example_analysis()

Training neural network...
Epoch 0, Error: 0.3970
Epoch 1000, Error: 0.2500
Epoch 2000, Error: 0.2500
Epoch 3000, Error: 0.2500
Epoch 4000, Error: 0.2500
Epoch 5000, Error: 0.2500
Epoch 6000, Error: 0.2500
Epoch 7000, Error: 0.2500
Epoch 8000, Error: 0.2500
Epoch 9000, Error: 0.2500

Final predictions:
Input: [0 0], Target: 0, Prediction: 0.0000
Input: [0 1], Target: 1, Prediction: 1.0000
Input: [1 0], Target: 1, Prediction: 0.0000
Input: [1 1], Target: 0, Prediction: 0.0000

Creating physical neural network model...

Testing physical network...
Input: [0 0], Output: 0.0000
Input: [0 1], Output: 1.0000
Input: [1 0], Output: 0.0000
Input: [1 1], Output: 0.0000

Generating performance report...
Physical Neural Network Information:
  Architecture: (2, 2, 2, 1)
  Bit Width: 4 bits
  Energy per cycle: 54.67 fJ
  Delay per cycle: 1748.931 ps

Performance Metrics:
  Total Energy: 1093.40 fJ
  Total Delay: 34978.62 ps
  Total MAC Operations: 40
  Total ReLU Activations: 9
  Total Physical Neur