### Key Changes for pytorch:
- __Dataset Handling:__ Replaced `h5py` with `scipy.io.loadmat` to read `.mat` files into PyTorch tensors.
- __Model Definition:__ Used `torch.nn.Conv1d`, `torch.nn.MaxPool1d`, and `torch.nn.Linear` to replicate Keras layers.
- __Training & Validation:__ Used `torch.utils.data.Dataset` and `torch.utils.data.DataLoader` for data batching.
- __Training Process:__ Implemented manual training loop with binary cross-entropy loss and metrics calculation.


### Keras to PyTorch Conversion

#### ✅ Data Handling:
- Reads `.mat` file and loads it into PyTorch tensors.
- Balances positive & negative samples.

#### ✅ CNN Model Definition:
- Uses `Conv1d`, `ReLU`, `BatchNorm1d`, and `MaxPool1d`.
- Implements `Flatten` and `Linear` layer for binary classification.

#### ✅ Training:
- Uses `BCELoss` and `Adam` optimizer.
- Implements batch training with `DataLoader`.

#### ✅ Evaluation:
- Tracks loss and accuracy across epochs.
- Saves model and visualizes training performance.

#### ✅ Filter Visualization:
- Prints the shape of convolution filters.

In [None]:
import warnings
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import h5py
import time
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from scipy.io import loadmat

warnings.filterwarnings('ignore')

# ------------------- Step 1: Load Data -------------------

In [None]:
tStart = time.time()
numRun = 1  # For running multiple tests

# for ModRun in range(numRun): # This is to run multiple files. But for now just consider single file
# dataFileName = f'training_data_Rec_7to11_Din_random_1%_11012024_test{ModRun+1}.mat'
# filePath = D:\CNN_John\keras_CNN\TrainingData\Random\Random_training\
dataFileName = f'training_data_Rec_7to11_Din_random_10%_11012024_test8.mat'
matlabDat_train = h5py.File(dataFileName, 'r')

print(matlabDat_train.keys())

x_train = np.transpose(matlabDat_train['dat'][:])
y_train = np.transpose(matlabDat_train['labels'][:])
Nframes = matlabDat_train['Nframes'][0][0]
winLen = matlabDat_train['winLen'][0][0]
shift = matlabDat_train['shift'][0][0]

# ------------------- Step 2: Data Preprocessing -------------------

In [None]:
x_train = np.expand_dims(x_train, axis=2)  # Adding channel dimension
y_train = (y_train > 0.25).astype("int")  # Binarization
y_train = np.expand_dims(y_train, axis=2)

# Convert to PyTorch tensors
x_train = torch.tensor(x_train, dtype=torch.float32).permute(0, 2, 1)  # Shape: (batch, channels, seq_len)
y_train = torch.tensor(y_train, dtype=torch.float32)

# Reduce the number of negative-label samples
Nsamples_pos = torch.sum(y_train).item()
Nsamples_neg_in = y_train.numel() - Nsamples_pos
Nsamples_neg = min(int(10 * Nsamples_pos), Nsamples_neg_in)  # class_sample_ratio = 10

NegInds = torch.where(y_train.squeeze(2) < 0.5)[0]  # Indices of negative samples
prune_size = len(NegInds) - Nsamples_neg

if prune_size > 0:
    remove_inds = torch.randperm(len(NegInds))[:prune_size]
    NegInds = NegInds[remove_inds]

    x_train = torch.cat([x_train, x_train[NegInds]], dim=0)
    y_train = torch.cat([y_train, y_train[NegInds]], dim=0)

# Create DataLoader for batch processing
dataset = TensorDataset(x_train, y_train)
dataloader = DataLoader(dataset, batch_size=250, shuffle=True)

# ------------------- Step 3: Define CNN Model -------------------

In [None]:
num_layers=5
num_filters=16
kernel_size=50
dropout=0.2

class CNNModel(nn.Module):
    def __init__(self, input_size, num_layers=num_layers, num_filters=num_filters, kernel_size=kernel_size, dropout=dropout):
        super(CNNModel, self).__init__()

        layers = []
        in_channels = 1  # Input channels (single-channel signal)

        for l in range(num_layers):
            out_channels = num_filters * (2 ** l)
            layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, padding="same"))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(out_channels))
            layers.append(nn.MaxPool1d(2))  # Pool size of 2
            in_channels = out_channels

        self.conv_layers = nn.Sequential(*layers)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(in_channels, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fc(x)
        return self.sigmoid(x)

In [None]:
# Initialize model
input_size = x_train.shape[2]
model = CNNModel(input_size=input_size)

# ------------------- Step 4: Train the Model -------------------
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 15

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_losses, val_losses, val_accs = [], [], []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for batch_x, batch_y in dataloader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_x).squeeze()

        loss = criterion(outputs, batch_y.squeeze())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        predicted = (outputs > 0.5).float()
        correct += (predicted == batch_y.squeeze()).sum().item()
        total += batch_y.size(0)

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = correct / total
    train_losses.append(epoch_loss)
    val_accs.append(epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

tStop = time.time()
print(f"Training time: {tStop - tStart:.1f} s")


In [None]:
print(model)

In [None]:
# A more detailed summary (like in Keras), you can use `torchinfo`:

# !pip install torchinfo

from torchinfo import summary

# Print model summary (input shape: batch_size=1, channels=1, input_size=your_input_length)
summary(model, input_size=(1, 1, input_size))

In [None]:
# ------------------- Step 5: Plot Training Metrics -------------------
plt.figure(figsize=(10, 5))

plt.subplot(2, 1, 1)
plt.plot(train_losses, label="Train Loss")
plt.legend()
plt.title("Training Loss")

plt.subplot(2, 1, 2)
plt.plot(val_accs, label="Validation Accuracy")
plt.legend()
plt.title("Validation Accuracy")

plt.tight_layout()
plt.savefig(f"Training_{ModRun+1}.png")
plt.show()

# ------------------- Step 6: Save Model -------------------
torch.save(model.state_dict(), f"model_{ModRun+1}.pth")

# ------------------- Step 7: Visualize Filters -------------------
for name, param in model.named_parameters():
    if "conv" in name and "weight" in name:
        print(f"Layer: {name}, Filter Shape: {param.shape}")


### Next Steps
- Run on GPU: If available, the script will automatically utilize CUDA.
- Hyperparameter Tuning: Modify kernel size, dropout, or layers as needed.
- Inference: Load the saved model using:

```
model.load_state_dict(torch.load('model_1.pth'))
model.eval()
```
Further Enhancements:
- Add validation/test dataset split.
- Use learning rate scheduling.
- Implement early stopping.