# PyTorch and OpenCV for Deep Learning and Image Processing
This notebook demonstrates how to set up and use PyTorch for deep learning and OpenCV for image processing. It includes:
- Loading and processing images.
- Building simple and complex neural networks.
- Visualizing progress with TQDM.
- Summarizing models with `torchsummary`.


In [None]:
# Import libraries for neural networks, image processing, and data handling
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary

import cv2  # For image processing
import os  # For file handling
import pandas as pd  # For data manipulation
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt  # For visualization

from tqdm.auto import trange  # For progress bars in loops

# Enable inline plotting for Jupyter Notebook
%matplotlib inline

In [None]:
DATASET_PATH = r"train_data"
CSV_PATH = r"csv_path"

BATCH_SIZE = 64
DATASIZE = 6400
NUM_CLASSES = 16
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224

### Preprocessing Data for Model Training

This section of the notebook preprocesses image data for training a deep learning model. The preprocessing involves:

1. **Loading the Data**: Reading the dataset from a CSV file, which contains labels and metadata about the images.
2. **One-Hot Encoding Labels**: Converting the numeric class labels into one-hot encoded vectors.
3. **Resizing and Normalizing Images**:
   - Each image is resized to a standard size of 225x300 pixels.
   - Converted to grayscale to reduce complexity and focus on intensity values.
   - Normalized to have pixel values between 0 and 1 for faster convergence during training.
4. **Storing Preprocessed Data**: Preparing the data as NumPy arrays for model training.

The processed data is stored in:
- `X`: A NumPy array containing the image data.
- `Y`: A NumPy array containing the one-hot encoded labels.


In [None]:
# Load the datafile
datafile = pd.read_csv(CSV_PATH).values

# Initialize lists for storing processed data
x_train = []  # For storing image data
y_train = []  # For storing one-hot encoded labels

# Progress bar for preprocessing
preprocessing = trange(DATASIZE, desc="Preprocessing Progress", unit="image", ncols=100)

for i in preprocessing:
    # Process the label
    label = datafile[i, 1]  # Assuming the second column contains class labels
    y_one_hot = np.zeros(NUM_CLASSES, dtype="float32")
    y_one_hot[label] = 1  # One-hot encoding the label

    # Process the image
    image_path = f"{DATASET_PATH}/{i}.tif"  # Construct the image path
    image = cv2.imread(image_path)  # Read the image
    if image is None:
        print(f"Warning: Image {image_path} could not be loaded.")
        continue

    image_resized = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), interpolation=cv2.INTER_AREA)  # Resize the image
    image_gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)  # Convert to grayscale
    image_normalized = image_gray / 255.0  # Normalize pixel values to [0, 1]

    # Append processed data to lists
    x_train.append(image_normalized)
    y_train.append(y_one_hot)

# Convert lists to NumPy arrays
X = np.array(x_train, dtype="float32").reshape(-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1)  # Reshape to include channel dimension
Y = np.array(y_train, dtype="float32")

print(f"Preprocessed data shapes: X={X.shape}, Y={Y.shape}")

### Converting Preprocessed Data to PyTorch Tensors and DataLoaders

This section performs the following operations:

1. **Convert NumPy Arrays to PyTorch Tensors**:
   - The preprocessed data stored in `X` and `Y` (NumPy arrays) is converted to PyTorch tensors using `torch.from_numpy`.
   - This is necessary because PyTorch models require tensor inputs.

2. **Create PyTorch DataLoaders**:
   - `DataLoader` is a utility provided by PyTorch to batch and shuffle the data for training.
   - The `batch_size` parameter specifies the number of samples per batch, enabling efficient training on large datasets.

3. **Batch Data Extraction**:
   - Convert the batched tensors back to NumPy arrays for further processing or visualization.
   - This step extracts all batches into single NumPy arrays using list comprehensions.

This step ensures the data is efficiently prepared for model training while leveraging the advantages of PyTorch's DataLoader.


In [None]:
# Step 1: Convert NumPy arrays to PyTorch tensors
x_train_tensor = torch.from_numpy(X)
y_train_tensor = torch.from_numpy(Y)

# Step 2: Create TensorDataset for PyTorch DataLoader
dataset = TensorDataset(x_train_tensor, y_train_tensor)

# Step 3: Create DataLoaders for batching
data_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Step 4: Extract batches into NumPy arrays for further processing
x_batches = []
y_batches = []

for batch in data_loader:
    x_batch, y_batch = batch
    x_batches.append(x_batch.numpy())
    y_batches.append(y_batch.numpy())

x_batches = np.array(x_batches)
y_batches = np.array(y_batches)

# Verify shapes
print(f"Shape of batched input data: {x_batches.shape}")
print(f"Shape of batched labels: {y_batches.shape}")

# Convolutional Neural Networks


In [None]:
class ResConvBlock(nn.Module):
    def __init__(self, filter, kernel=3):
        super(ResConvBlock, self).__init__()
        self.conv1 = nn.Conv2d(filter, filter, kernel)
        self.conv2 = nn.Conv2d(filter, filter, kernel)
        self.pad = nn.ZeroPad2d(1)
        self.pool = nn.MaxPool2d(2,2)
        self.norm1 = nn.BatchNorm2d(filter)
        self.norm2 = nn.BatchNorm2d(filter)
        self.lrelu = nn.LeakyReLU(0.1)

    def forward(self, x):
        s = x
        x = self.lrelu(self.norm1(self.pad(self.conv1(x))))
        x = self.lrelu(self.norm2(self.pad(self.conv2(x)))+s)
        return x

In [None]:
class ResBottleNeck(nn.Module):
    def __init__(self, infilter, outfilter, kernel=3):
        super(ResBottleNeck, self).__init__()
        self.conv0 = nn.Conv2d(infilter, infilter, 1)
        self.conv2 = nn.Conv2d(infilter, outfilter, 1)
        self.conv1 = nn.Conv2d(infilter, infilter, kernel, stride=2)
        self.conv = nn.Conv2d(infilter, outfilter, kernel, stride=2)
        self.pad = nn.ZeroPad2d(1)
        self.norm0 = nn.BatchNorm2d(infilter)
        self.norm1 = nn.BatchNorm2d(infilter)
        self.norm2 = nn.BatchNorm2d(outfilter)
        self.norm = nn.BatchNorm2d(outfilter)
        self.lrelu = nn.LeakyReLU(0.1)
        # self.pool = nn.MaxPool2d(2,2)

    def forward(self, x):
        s = x
        x = self.lrelu(self.norm0(self.conv0(x)))
        x = self.lrelu(self.norm1(self.pad(self.conv1(x))))
        x = self.lrelu(self.norm2(self.conv2(x)))
        s = self.lrelu(self.norm(self.pad(self.conv(s))))
        x = x+s
        return x

In [None]:
class ResBlock(nn.Module):
    def __init__(self, infilter, outfilter, kernel=3):
        super(ResBlock, self).__init__()
        self.conv0 = ResConvBlock(infilter)
        self.conv1 = ResConvBlock(infilter)
        self.conv2 = ResConvBlock(infilter)
        self.conv3 = ResConvBlock(infilter)

        self.conv = ResBottleNeck(infilter, outfilter)

    def forward(self, x):
        return self.conv(self.conv3(self.conv2((self.conv1(self.conv0(x))))))

In [None]:
class StartBlock(nn.Module):
    def __init__(self, filter):
        super(StartBlock, self).__init__()
        self.conv1 = nn.Conv2d(1, filter, 7, stride=2)
        # self.conv2 = nn.Conv2d(filter//2, filter, 5, stride=2)

        self.norm1 = nn.BatchNorm2d(filter)
        # self.norm2 = nn.BatchNorm2d(filter)

        self.lrelu = nn.LeakyReLU(0.1)

    def forward(self, x):
        x = self.lrelu(self.norm1(self.conv1(x)))
        return x #self.lrelu(self.norm2(self.conv2(x)))

In [None]:
class GMaxpool(nn.Module):
    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])

In [None]:
class ResNet(nn.Module):
    def __init__(self, filter=16, ndim=256, outclass=16):
        super(ResNet, self).__init__()
        self.res0 = StartBlock(filter)
        self.res1 = ResBlock(filter, filter*2)
        self.res2 = ResBlock(filter*2, filter*4)
        self.res3 = ResBlock(filter*4, filter*8)
        self.res4 = ResBlock(filter*8, filter*16)
        self.res5 = ResBlock(filter*16, filter*32)
        self.res6 = ResBlock(filter*32, filter*64)

        self.avgpool = GMaxpool()
        self.flat = nn.Flatten()

        self.dense1 = nn.Linear(1024, ndim)
        self.dense2 = nn.Linear(ndim, outclass)
        self.drop = nn.Dropout2d(0.2)
        self.lrelu = nn.LeakyReLU(0.2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.res6(self.res5(self.res4(self.res3(self.res2(self.res1(self.res0(x)))))))
        x = self.flat(self.avgpool(x))
        return self.softmax(self.dense2(self.lrelu(self.drop(self.dense1(x)))))

# Training 

#### labels --> one-hot 
one_hot = torch.nn.functional.one_hot(target)
#### one-hot --> labels
labels_again = torch.argmax(one_hot, dim=1)


In [None]:
model = ResNet()
summary(model, (1, 300,225))

In [None]:
learning_rate = 1e-4
decay = 1
epochs = 1
criterion = nn.CrossEntropyLoss()

`Categorical Cross ENtropy`</br>
Input: (N,C) where C = number of classes
Target: (N) where each value is 0 <= targets[i] <= C-1
Output: scalar. If reduce is False, then (N) instead.

In [None]:
steps = len(xtrain)
for epoch in range(epochs):
    lss = 0
    learning_rate = learning_rate/(epoch*decay+1)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    print(f"Epoch {epoch+1}/{epochs}")
    train = trange(steps, desc=f"\tTraining: 0/{steps} steps || Loss: NaNaN || Step Loss: NaNaN || Progress", unit="steps", ncols=1000)
    for c in train:
        xtr = xtrain[c]
        ytr = ytrain[c]
        ypred = model(xtr.reshape(BATCH, 1, 300, 225))
        loss = criterion(ypred.reshape(-1,16), torch.argmax(ytr.reshape(-1,16), dim=1))
        lss = lss + loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train.set_description(f"\tTraining: {c+1}/{steps} steps || Loss: {lss/(c+1):.4f} || Step Loss: {loss:.4f} || Progress")
