Change the paths in Data Loader fake_dir and real_dir to STFT/MFCC/Melspectrogram as required

Import Stuff

In [None]:
!pip install torch torchvision torchaudio pytorch-tcn
#Run this block if the following doesn't work

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import numpy as np
from PIL import Image
import os
from pytorch_tcn import TCN
import time

**DATASET Definition**

In [None]:
class SpectrogramDataset(Dataset):
    def __init__(self, real_dir, fake_dir, transform=None):
        self.real_dir = real_dir
        self.fake_dir = fake_dir
        self.real_images = [f for f in os.listdir(real_dir) if not os.path.isdir(os.path.join(real_dir, f))]
        self.fake_images = [f for f in os.listdir(fake_dir) if not os.path.isdir(os.path.join(fake_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.real_images) + len(self.fake_images)

    def __getitem__(self, idx):

      if idx < len(self.real_images):
        img_path = os.path.join(self.real_dir, self.real_images[idx])
        label = 0  # Real
      else:

        img_path = os.path.join(self.fake_dir, self.fake_images[idx - len(self.real_images)])
        label = 1  # Fake

      #Debugging print
      print(f"Processing file: {img_path}")

      # Check if img_path is a directory
      if os.path.isdir(img_path):
        raise ValueError(f"Expected a file but found a directory: {img_path}")

      image = Image.open(img_path).convert('L')  # Convert to grayscale

      if self.transform:
        image = self.transform(image)

      return image, label

**GRU Model**

In [None]:
import torch
import torch.nn as nn

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Define GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)

        # Fully connected layer
        self.fc = None

        # Dynamically calculate the output size for the fully connected layer
        dummy_input = torch.randn(1, 1, input_size)  # Assuming 1 time step and input size as sequence length
        dummy_out, _ = self.gru(dummy_input)  # Pass through GRU
        output_shape = dummy_out.view(1, -1).shape[1]

        # Now define the fully connected layer based on the calculated output shape
        self.fc = nn.Linear(output_shape, num_classes)

    def forward(self, x):
        batch_size = x.size(0)

        # Reshape x to (batch_size, sequence_length, input_size), assuming (batch_size, 1, 128, 128)
        x = x.view(batch_size, 128, 128)

        # Initialize hidden state for the GRU layer
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)

        # Pass through GRU
        out, h_n = self.gru(x, h0)  # Removed Sequential, passing directly through GRU

        # We are only interested in the output of the last time step
        out = out[:, -1, :]

        # Pass through the fully connected layer
        out = self.fc(out)
        return out, h_n  # Returning hidden state as well


Logging functions

In [None]:
import matplotlib.pyplot as plt
import torch
import pandas as pd

def log_weights_biases(model, epoch):
    weights_biases = {}
    for name, param in model.named_parameters():
        if param.requires_grad:
            weights_biases[name] = param.data.cpu().numpy()  # Move to CPU for easier handling
    weights_biases_history.append((epoch, weights_biases))

def track_weight_changes(model, epoch):
    norms = {}
    for name, param in model.named_parameters():
        if param.requires_grad:
            norms[name] = torch.norm(param.data).item()
    weight_norms.append((epoch, norms))

Mount Drive to use images stored on Drive

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

**DATA Loader**

In [None]:
from torchvision import transforms
import os

# Paths to your data folders
real_dir = '/content/drive/MyDrive/SauravSir_CodeFiles/Output/real_images_stft' #Path to where real MFCC/STFT/Melspectrograms images are saved; one at a time
fake_dir = '/content/drive/MyDrive/SauravSir_CodeFiles/Output/real_images_stft' #Path to where fake MFCC/STFT/Melspectrograms images are saved; one at a time

# Image transformations (resizing and converting to tensor)
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.ToTensor()           # Convert images to PyTorch tensors
])

# Create the dataset
dataset = SpectrogramDataset(real_dir, fake_dir, transform=transform)

# Split into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

LOG Prep

In [None]:
# Initialize storage for weights, biases, weight norms, and hidden states
weights_biases_history = []
weight_norms = []
hidden_states_history = []

**TRAIN Model**

In [None]:
import torch.optim as optim
import torch.nn as nn
import torch
import numpy as np
import matplotlib.pyplot as plt
import time
import os

# Model, loss function, optimizer
model = GRUModel(input_size=128, hidden_size=64, num_layers=2, num_classes=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Start time
train_start_time = time.time()  # Start timing training

# Variables to track accuracy and loss
accuracy_list = []
loss_list = []
hidden_states_history = []
num_epochs = 5

# Training loop (modified forward pass to capture both outputs and hidden states)
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass (now captures hidden states)
        outputs, hidden_states = model(inputs)  # Adjusted to capture hidden states
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Accuracy calculations
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calculate epoch loss and accuracy
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    accuracy_list.append(epoch_accuracy)
    loss_list.append(epoch_loss)

    # Log weights, biases, and weight changes
    log_weights_biases(model, epoch)
    track_weight_changes(model, epoch)

    # Store hidden states for visualization
    hidden_states_history.append(hidden_states.cpu().detach().numpy())

    # Print only one line per epoch
    print(f"\033[92mEpoch {epoch + 1}/{num_epochs}\n Loss: {epoch_loss:.4f}\n Accuracy: {epoch_accuracy:.2f}%\033[0m")


train_end_time = time.time()  # End timing training
print(f"\033[1;91mTime taken for training: {train_end_time - train_start_time:.2f} seconds\033[0m")

# Plot Accuracy and Loss per Epoch
plt.figure(figsize=(8, 6))
plt.plot(range(1, num_epochs + 1), accuracy_list, marker='o', label='Accuracy')
plt.plot(range(1, num_epochs + 1), loss_list, marker='x', label='Loss')
plt.title('Training Progress')
plt.xlabel('Epoch')
plt.ylabel('Metric')
plt.legend()
plt.grid(True)

# Annotate accuracy values on the graph
for i, (acc, loss) in enumerate(zip(accuracy_list, loss_list)):
    plt.annotate(f'{acc:.2f}', (i + 1, acc), textcoords="offset points", xytext=(0, 5), ha='center')
    plt.annotate(f'{loss:.2f}', (i + 1, loss), textcoords="offset points", xytext=(0, -10), ha='center')

plt.show()


**EVALUATE Model**

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import torch
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

'''
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # Move the model to GPU if available
'''

# Assuming you already have a loss function defined, e.g.
criterion = torch.nn.CrossEntropyLoss()  # or use BCELoss if binary classification

model.eval()
correct = 0
total = 0
all_labels = []
all_predictions = []
total_loss = 0.0

eval_start_time = time.time()  # Start timing evaluation

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move to device
        outputs, hidden_states = model(inputs)  # Unpack outputs and hidden states

        # Check the shape of the outputs
        print(f"Outputs shape: {outputs.shape}")  # Debugging line

        # For multi-class:
        if len(outputs.shape) == 2 and outputs.shape[1] > 1:  # Ensure correct shape
            _, predicted = torch.max(outputs, 1)  # Get predicted class indices
        else:
            print("Error: Outputs tensor does not have the correct shape for classification.")
            continue  # Skip this iteration if the output shape is incorrect

        # For loss computation
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # Collect all labels and predictions for metric computation
        all_labels.extend(labels.cpu().numpy())  # Move to CPU for compatibility with sklearn
        all_predictions.extend(predicted.cpu().numpy())

        total += labels.size(0)
        correct += (predicted == labels).sum().item()


print()
eval_end_time = time.time()  # End timing evaluation
print(f"\033[1;92mTime taken for evaluation: {eval_end_time - eval_start_time:.2f} seconds\033[0m")
print()

# Convert to numpy arrays for sklearn
all_labels = np.array(all_labels)
all_predictions = np.array(all_predictions)

# Accuracy
accuracy = 100 * correct / total

# Precision, Recall, F1 Score
precision = precision_score(all_labels, all_predictions, average='macro')  # use 'binary' for binary classification
recall = recall_score(all_labels, all_predictions, average='macro')  # use 'binary' for binary classification
f1 = f1_score(all_labels, all_predictions, average='macro')  # use 'binary' for binary classification

# Confusion Matrix
conf_matrix = confusion_matrix(all_labels, all_predictions)

# Average loss
avg_loss = total_loss / len(test_loader)


# Data to be displayed in the table
data = [
    ['Accuracy', f'{accuracy:.2f}%'],
    ['Precision', f'{precision:.2f}'],
    ['Recall', f'{recall:.2f}'],
    ['F1 Score', f'{f1:.2f}'],
    ['Loss', f'{avg_loss:.4f}']
]

# Print the table
print(tabulate(data, headers=['Metric', 'Value'], tablefmt='fancy_grid'))
print()

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
plt.title('Confusion Matrix')
plt.ylabel('True Labels')
plt.xlabel('Predicted Labels')
plt.show()