'''
 * Copyright (c) 2018 Radhamadhab Dalai
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
'''

# Convolutional Neural Networks (CNNs)

Convolutional Neural Networks (CNNs) have been extremely successful in image, speech, audio, and video recognition tasks due to their ability to exploit translational equivariance/invariance with respect to grid structures (in 1, 2, and 3 dimensions) [13].

## Gabor Filter Banks

Gabor filter banks are a powerful method for feature extraction. The method involves the following steps:
1. **Creating Gabor Filters**: A bank of \( N \) Gabor filters is created.
2. **Convolution**: Each filter is convolved with the input image to produce \( N \) different images.
3. **Pooling**: Pixels from each image are pooled to extract relevant information.

The process mainly consists of two steps: convolution and pooling.

## Historical Background

- **Neocognitron**: Proposed by Fukushima [32], the Neocognitron is generally seen as a model that inspired CNNs computationally.
- **LeNet**: The first convolutional neural network, LeNet, was invented by Le Cun et al. [88] for handwritten digit recognition and further popularized by LeCun et al. [89].

## Benefits of CNNs

Compared to traditional fully connected neural networks, CNNs offer the advantage of a reduced number of parameters to be learned. The typical layers in a CNN include:

1. **Input Layer**: Feeds data into the network. Inputs can be raw data (e.g., image pixels) or their transformations to highlight specific aspects of the data.

2. **Convolutional Layers**: Contain a series of filters with fixed sizes used to perform convolutions on the data to generate feature maps.

3. **Pooling Layers**: Focus on the most important patterns by reducing the dimensionality of the feature maps used by subsequent layers. Also known as downsampling layers.

4. **Rectified Linear Unit (ReLU)**: Applies a nonlinear function to the output \( x \) of the previous layer, such as \( f(x) = \max(0, x) \). ReLU layers contribute to faster convergence in training CNNs [84].

5. **Fully Connected Layers**: Used for understanding patterns generated by the previous layers. Neurons in these layers are fully connected to all activations in the previous layer and are also called inner product layers. After training, features from these layers can be used in transfer learning to train another classifier.

6. **Loss Layers**: Specify how the network training penalizes the deviation between the predicted and true labels. Various loss functions can be used, including Softmax, Sigmoid, Cross-entropy, and Euclidean loss.

## Key Component: Convolutional Layers

Convolutional layers are the most essential components of CNNs and are discussed first.


In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, ReLU
from tensorflow.keras.models import Sequential

# Create a simple CNN model
model = Sequential([
    # Input layer (you can specify input shape for the first layer)
    Conv2D(filters=32, kernel_size=(3, 3), input_shape=(64, 64, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')  # Assume 10 classes for classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Dummy data for demonstration
import numpy as np
X_train = np.random.rand(100, 64, 64, 3)  # 100 samples of 64x64 RGB images
y_train = np.random.randint(0, 10, size=(100,))  # 100 target labels

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32)


2024-07-26 19:25:00.758951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-26 19:25:06.816579: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-07-26 19:25:06.816631: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-07-26 19:25:07.302505: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-26 19:25:18.765854: W tensorflow/stream_executor/platform/de

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 31, 31, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 14, 14, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 6, 6, 128)        0

<keras.callbacks.History at 0x7ff2d5a4cf10>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 8 * 8, 128)  # Assuming input images are 64x64
        self.fc2 = nn.Linear(128, 10)  # Assuming 10 classes for classification
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(-1, 128 * 8 * 8)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model
model = SimpleCNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Dummy data for demonstration
X_train = torch.rand(100, 3, 64, 64)  # 100 samples of 64x64 RGB images
y_train = torch.randint(0, 10, (100,))  # 100 target labels

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/5], Loss: 2.3837
Epoch [2/5], Loss: 2.2323
Epoch [3/5], Loss: 2.4041
Epoch [4/5], Loss: 2.5652
Epoch [5/5], Loss: 2.1216


# Hankel Matrix and Convolution
### Convolution Operations

1. **One-dimensional Convolution (General Forms)**

   $$ y_i = (x \ast f)_i = \sum_{j=1}^{n} x(j) f(i - j + 1), \quad i = 1, \ldots, 2d - 1 $$

   $$ y_i = (x \ast f)_i = \sum_{j=1}^{d} x(i - j + 1) f(j), \quad i = 1, \ldots, 2d - 1 $$

   $$ y_i = (x \ast f)_i = \sum_{j=1}^{d} x(i + j - 1) f(j), \quad i = 1, \ldots, n - d + 1 $$

2. **Matrix-Vector Form of Convolution**

   $$ y = (x \ast f) = H(x) f $$

   where

   $$ H(x) = \begin{bmatrix}
   x(1) & x(2) & \cdots & x(d) \\
   x(2) & x(3) & \cdots & x(d + 1) \\
   \vdots & \vdots & \ddots & \vdots \\
   x(n - d + 1) & x(n - d + 2) & \cdots & x(n)
   \end{bmatrix} \in \mathbb{R}^{(n-d+1) \times d} $$

3. **Wrap-Around Hankel Matrix**

   $$ H_d(x) = \begin{bmatrix}
   x(1) & x(2) & \cdots & x(d) \\
   x(2) & x(3) & \cdots & x(d + 1) \\
   \vdots & \vdots & \ddots & \vdots \\
   x(n - d + 1) & x(n - d + 2) & \cdots & x(n) \\
   x(n - d + 2) & x(n - d + 3) & \cdots & x(1) \\
   \vdots & \vdots & \ddots & \vdots \\
   x(n) & x(1) & \cdots & x(d - 1)
   \end{bmatrix} $$

4. **Theorem 7.1 (Rank of Hankel Matrix)**

   If \( r + 1 \) denotes the minimum length of the annihilating filters that annihilate the signal \( x \), then for a given Hankel structured matrix \( H_d(x) \in H(n, d) \) with \( d > r \), its rank is given by:

   $$ \text{rank}(H_d(x)) = r $$

### Two-Dimensional Convolution

1. **Two-Dimensional Convolution (General Forms)**

   $$ s(t) = I(a, b) \ast K(a, b) = \sum_{a} \sum_{b} I(a, b) K(m - a, n - b) $$

   $$ s(t) = I(a, b) \ast K(a, b) = \sum_{a} \sum_{b} I(m - a, n - b) K(a, b) $$

   $$ s(t) = I(a, b) \ast K(a, b) = \sum_{a} \sum_{b} I(m + a, n + b) K(a, b) $$

2. **2-D Convolution with Input Image and Filter**

   Given a 2-D image \( X = [x_1, \ldots, x_p] \in \mathbb{R}^{n \times p} \) and a 2-D filter \( \Phi = [\phi_1, \ldots, \phi_q] \in \mathbb{R}^{d \times q} \):

   $$ (X \ast \Phi)_{m,k} = \sum_{i=1}^{d} \sum_{j=1}^{q} x_{m+i-1, k+j-1} \phi_{i,j} $$

   Matrix-vector form:

   $$ Y = (X \ast \Phi) = H_{d,q}(X) \Phi $$

   where

   $$ H_{d,q}(X) = \begin{bmatrix}
   H_{d}(x_1) \phi_1 & \cdots & H_{d}(x_1) \phi_q \\
   \vdots & \ddots & \vdots \\
   H_{d}(x_p) \phi_1 & \cdots & H_{d}(x_p) \phi_q
   \end{bmatrix} $$

### Convolution Types

1. **Single-Input Single-Output (SISO) Convolution**

   $$ y = x \ast \phi = H_d(x) \phi $$

2. **Single-Input Multi-Output (SIMO) Convolution**

   $$ Y = x \ast \Phi = H_d(x) \Phi $$

   where \( \Phi = [\phi_1, \ldots, \phi_q] \in \mathbb{R}^{d \times q} \).

3. **Multi-Input Multi-Output (MIMO) Convolution**

   $$ y_i = \sum_{j=1}^{p} z_j \ast \phi_{i,j} $$

   Matrix-vector form:

   $$ Y = \sum_{j=1}^{p} H_d(z_j) \phi_j = H_{d|p}(Z) \Phi $$

4. **Multi-Input Single-Output (MISO) Convolution**

   $$ y = H_{d|p}(Z) \phi $$

   where \( \phi = [\phi_1, \ldots, \phi_p]^T \).

### Block Hankel Matrix for 2-D Convolution

1. **Block Hankel Matrix**

   $$ H_{d_1, d_2}(X) = \begin{bmatrix}
   H_{d_1}(x_1) & H_{d_1}(x_2) & \cdots & H_{d_1}(x_{d_2}) \\
   \vdots & \vdots & \ddots & \vdots \\
   H_{d_1}(x_{n_2}) & H_{d_1}(x_1) & \cdots & H_{d_1}(x_{d_2 - 1})
   \end{bmatrix} $$

2. **2-D Convolution Matrix-Vector Form**

   $$ \text{vec}(Y) = H_{d_1, d_2}(X) \text{vec}(K) $$

3. **Extended Block Hankel Matrix for Multi-Channel 2-D Convolution**

   $$ H_{d_1, d_2|p}(X^{(1)}, \ldots, X^{(p)}) = \begin{bmatrix}
   H_{d_1, d_2}(X^{(1)}) & \cdots & H_{d_1, d_2}(X^{(p)})
   \end{bmatrix} $$

   2-D MIMO Convolution:

   $$ Y = H_{d_1, d_2|p}(X^{(1)}, \ldots, X^{(p)}) K $$


In [3]:
import numpy as np
from scipy.signal import convolve

# Function to compute 1D convolution using a Hankel matrix
def hankel_matrix_1d(x, d):
    n = len(x)
    H = np.zeros((n - d + 1, d))
    for i in range(n - d + 1):
        H[i, :] = x[i:i + d]
    return H

def convolve_1d(x, f):
    d = len(f)
    H = hankel_matrix_1d(x, d)
    y = H @ f
    return y

# Example usage
x = np.array([1, 2, 3, 4, 5])
f = np.array([0.2, 0.5, 0.2])
y = convolve_1d(x, f)
print("1D Convolution result:", y)


1D Convolution result: [1.8 2.7 3.6]


In [4]:
import numpy as np
from scipy.signal import convolve2d

# Function to compute 2D convolution
def convolve_2d(image, kernel):
    return convolve2d(image, kernel, mode='valid')

# Example usage
image = np.array([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])
kernel = np.array([[1, 0],
                   [0, -1]])
result = convolve_2d(image, kernel)
print("2D Convolution result:\n", result)


2D Convolution result:
 [[4 4]
 [4 4]]


In [5]:
import numpy as np

def hankel_matrix_2d(X, d1, d2):
    n1, n2 = X.shape
    H = np.zeros(((n1 - d1 + 1) * (n2 - d2 + 1), d1 * d2))
    for i in range(n1 - d1 + 1):
        for j in range(n2 - d2 + 1):
            H[i * (n2 - d2 + 1) + j, :] = X[i:i + d1, j:j + d2].flatten()
    return H

# Example usage
X = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8],
              [9, 10, 11, 12]])
d1, d2 = 2, 2
H = hankel_matrix_2d(X, d1, d2)
print("2D Hankel Matrix:\n", H)


2D Hankel Matrix:
 [[ 1.  2.  5.  6.]
 [ 2.  3.  6.  7.]
 [ 3.  4.  7.  8.]
 [ 5.  6.  9. 10.]
 [ 6.  7. 10. 11.]
 [ 7.  8. 11. 12.]]


In [6]:
import numpy as np

# Generate synthetic data
def generate_synthetic_data(n_samples, n_features, n_targets):
    X = np.random.randn(n_samples, n_features)
    y = np.random.randn(n_samples, n_targets)
    return X, y

# Example usage
n_samples = 100
n_features = 10
n_targets = 1
X_train, y_train = generate_synthetic_data(n_samples, n_features, n_targets)
import numpy as np

class HankelConvolutionalModel:
    def __init__(self, d):
        self.d = d
        self.W = np.random.randn(d, 1)  # Random initial weights

    def hankel_matrix(self, x):
        n = len(x)
        H = np.zeros((n - self.d + 1, self.d))
        for i in range(n - self.d + 1):
            H[i, :] = x[i:i + self.d]
        return H

    def forward(self, x):
        H = self.hankel_matrix(x)
        return H @ self.W

    def compute_loss(self, y_pred, y_true):
        return np.mean((y_pred - y_true) ** 2)

    def gradient(self, x, y_true):
        y_pred = self.forward(x)
        loss = self.compute_loss(y_pred, y_true)
        grad = 2 * np.mean((y_pred - y_true)[:, np.newaxis] * self.hankel_matrix(x), axis=0)
        return grad, loss

    def update_weights(self, grad, learning_rate):
        self.W -= learning_rate * grad

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            total_loss = 0
            for x, y_true in zip(X, y):
                grad, loss = self.gradient(x, y_true)
                self.update_weights(grad, learning_rate)
                total_loss += loss
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(X)}')

# Example usage
d = 3
model = HankelConvolutionalModel(d)
model.train(X_train, y_train, epochs=10, learning_rate=0.01)


ValueError: operands could not be broadcast together with shapes (3,1) (8,3) (3,1) 