## Complex PyTorch for Music Genre Classification

In [13]:
# Complex pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from complexPyTorch.complexLayers import *
from complexPyTorch.complexFunctions import *
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

# Plot
import matplotlib.pyplot as plt
import seaborn as sns
import time

# Load Data
import numpy as np
import json
import os
import math
import librosa
import pathlib
from scipy.spatial.distance import cdist
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import random

# MFCCS
from scipy.io import wavfile
import scipy.fftpack as fft
from scipy.signal import get_window

In [3]:
def train(model, device, train_loader, test_loader, optimizer, epoch, metrics_dict, complexify=True, data_fn = None):
    model.train()
    total_loss = 0
    correct = 0
    total_samples = len(train_loader.dataset)
    start_time = time.time()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        if complexify: data = data.type(torch.complex64)
        if data_fn != None: data = data_fn(data)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        
        if batch_idx % 10 == 0:
            batch_accuracy = 100. * correct / ((batch_idx + 1) * len(data))
            print('Train Epoch: {:3} [{:6}/{:6} ({:3.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.2f}%'.format(
                epoch,
                batch_idx * len(data),
                total_samples,
                100. * batch_idx / len(train_loader),
                loss.item(),
                batch_accuracy)
            )
    
    end_time = time.time()
    epoch_times = metrics_dict['epoch_times']
    epoch_times.append(end_time - start_time)
    epoch_loss = total_loss / len(train_loader)
    epoch_accuracy = 100. * correct / total_samples
    train_losses = metrics_dict['train_losses']
    train_accuracies = metrics_dict['train_accuracies']
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)
    print('Epoch {} - Time: {:.2f}s - Train Loss: {:.6f} - Train Accuracy: {:.2f}%'.format(epoch, epoch_times[-1], epoch_loss, epoch_accuracy))
    
    # Evaluate on test data
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            if complexify:
                data = data.type(torch.complex64)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    test_losses = metrics_dict['test_losses']
    test_accuracies = metrics_dict['test_accuracies']
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)
    print('Test Loss: {:.6f} - Test Accuracy: {:.2f}%\n'.format(test_loss, test_accuracy))

### Data Preparation

In [4]:
DATASET_PATH = "Data/binary_data/train"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
BATCH_SIZE = 32
NUM_EPOCHS = 10

In [5]:
genre_list = os.listdir(DATASET_PATH)
if '.DS_Store' in genre_list: genre_list.remove('.DS_Store')
genre_mappings = dict(zip(genre_list, range(len(genre_list))))
print(genre_mappings)

{'classical': 0, 'rock': 1}


### MFCCS

In [6]:
class MusicFeatureExtractor:
    def __init__(self, FFT_size=2048, HOP_SIZE=512, mel_filter_num=13, dct_filter_num=40):
        self.FFT_size = FFT_size
        self.HOP_SIZE = HOP_SIZE
        self.mel_filter_num = mel_filter_num
        self.dct_filter_num = dct_filter_num
        self.epsilon = 1e-10  # Added to log to avoid log10(0)

    def normalize_audio(self, audio):
        audio = audio / np.max(np.abs(audio))
        return audio

    def frame_audio(self, audio):
        frame_num = int((len(audio) - self.FFT_size) / self.HOP_SIZE) + 1
        frames = np.zeros((frame_num, self.FFT_size))
        for n in range(frame_num):
            frames[n] = audio[n * self.HOP_SIZE: n * self.HOP_SIZE + self.FFT_size]
        return frames

    def freq_to_mel(self, freq):
        return 2595.0 * np.log10(1.0 + freq / 700.0)

    def met_to_freq(self, mels):
        return 700.0 * (10.0 ** (mels / 2595.0) - 1.0)

    def get_filter_points(self, fmin, fmax, sample_rate):
        fmin_mel = self.freq_to_mel(fmin)
        fmax_mel = self.freq_to_mel(fmax)
        mels = np.linspace(fmin_mel, fmax_mel, num=self.mel_filter_num + 2)
        freqs = self.met_to_freq(mels)
        return np.floor((self.FFT_size + 1) / sample_rate * freqs).astype(int), freqs

    def get_filters(self, filter_points):
        filters = np.zeros((len(filter_points) - 2, int(self.FFT_size / 2 + 1)))
        for n in range(len(filter_points) - 2):
            filters[n, filter_points[n]: filter_points[n + 1]] = np.linspace(0, 1, filter_points[n + 1] - filter_points[n])
            filters[n, filter_points[n + 1]: filter_points[n + 2]] = np.linspace(1, 0, filter_points[n + 2] - filter_points[n + 1])
        return filters

    def dct(self):
        basis = np.empty((self.dct_filter_num, self.mel_filter_num))
        basis[0, :] = 1.0 / np.sqrt(self.mel_filter_num)
        samples = np.arange(1, 2 * self.mel_filter_num, 2) * np.pi / (2.0 * self.mel_filter_num)
        for i in range(1, self.dct_filter_num):
            basis[i, :] = np.cos(i * samples) * np.sqrt(2.0 / self.mel_filter_num)
        return basis

    def get_mfcc_features(self, audio, sample_rate):
        audio = self.normalize_audio(audio)
        audio_framed = self.frame_audio(audio)
        window = get_window("hann", self.FFT_size, fftbins=True)
        audio_win = audio_framed * window
        audio_winT = np.transpose(audio_win)
        audio_fft = np.empty((int(1 + self.FFT_size // 2), audio_winT.shape[1]), dtype=np.complex64, order='F')
        for n in range(audio_fft.shape[1]):
            audio_fft[:, n] = fft.fft(audio_winT[:, n], axis=0)[:audio_fft.shape[0]]
        audio_fft = np.transpose(audio_fft)
        audio_fft = np.square(np.abs(audio_fft))
        freq_min = 0
        freq_high = sample_rate / 2
        filter_points, mel_freqs = self.get_filter_points(freq_min, freq_high, sample_rate)
        filters = self.get_filters(filter_points)
        audio_filtered = np.dot(filters, np.transpose(audio_fft))
        audio_filtered = np.maximum(audio_filtered, self.epsilon)  # Replace zero values with epsilon
        audio_log = 10.0 * np.log10(audio_filtered)
        dct_filters = self.dct()
        cepstral_coefficents = np.dot(dct_filters, audio_log)
        return np.array([cepstral_coefficents])

class MusicFeatureExtractorComplex:
    def __init__(self, FFT_size=2048, HOP_SIZE=512, mel_filter_num=13, dct_filter_num=40):
        self.FFT_size = FFT_size
        self.HOP_SIZE = HOP_SIZE
        self.mel_filter_num = mel_filter_num
        self.dct_filter_num = dct_filter_num
        self.epsilon = 1e-10  # Added to log to avoid log10(0)

    def normalize_audio(self, audio):
        audio = audio / np.max(np.abs(audio))
        return audio

    def frame_audio(self, audio):
        frame_num = int((len(audio) - self.FFT_size) / self.HOP_SIZE) + 1
        frames = np.zeros((frame_num, self.FFT_size))
        for n in range(frame_num):
            frames[n] = audio[n * self.HOP_SIZE: n * self.HOP_SIZE + self.FFT_size]
        return frames

    def freq_to_mel(self, freq):
        return 2595.0 * np.log10(1.0 + freq / 700.0)

    def met_to_freq(self, mels):
        return 700.0 * (10.0 ** (mels / 2595.0) - 1.0)

    def get_filter_points(self, fmin, fmax, sample_rate):
        fmin_mel = self.freq_to_mel(fmin)
        fmax_mel = self.freq_to_mel(fmax)
        mels = np.linspace(fmin_mel, fmax_mel, num=self.mel_filter_num + 2)
        freqs = self.met_to_freq(mels)
        return np.floor((self.FFT_size + 1) / sample_rate * freqs).astype(int), freqs

    def get_filters(self, filter_points):
        filters = np.zeros((len(filter_points) - 2, int(self.FFT_size / 2 + 1)))
        for n in range(len(filter_points) - 2):
            filters[n, filter_points[n]: filter_points[n + 1]] = np.linspace(0, 1, filter_points[n + 1] - filter_points[n])
            filters[n, filter_points[n + 1]: filter_points[n + 2]] = np.linspace(1, 0, filter_points[n + 2] - filter_points[n + 1])
        return filters

    def dct(self):
        basis = np.empty((self.dct_filter_num, self.mel_filter_num))
        basis[0, :] = 1.0 / np.sqrt(self.mel_filter_num)
        samples = np.arange(1, 2 * self.mel_filter_num, 2) * np.pi / (2.0 * self.mel_filter_num)
        for i in range(1, self.dct_filter_num):
            basis[i, :] = np.cos(i * samples) * np.sqrt(2.0 / self.mel_filter_num)
        return basis

    def get_mfcc_features(self, audio, sample_rate):
        audio = self.normalize_audio(audio)
        audio_framed = self.frame_audio(audio)
        window = get_window("hann", self.FFT_size, fftbins=True)
        audio_win = audio_framed * window
        audio_winT = np.transpose(audio_win)
        audio_fft = np.empty((int(1 + self.FFT_size // 2), audio_winT.shape[1]), dtype=np.complex64, order='F')
        for n in range(audio_fft.shape[1]):
            audio_fft[:, n] = fft.fft(audio_winT[:, n], axis=0)[:audio_fft.shape[0]]
        audio_fft = np.transpose(audio_fft)
        freq_min = 0
        freq_high = sample_rate / 2
        filter_points, mel_freqs = self.get_filter_points(freq_min, freq_high, sample_rate)
        filters = self.get_filters(filter_points)
        audio_filtered = np.dot(filters, np.transpose(audio_fft))  
        audio_filtered[audio_filtered == 0] = self.epsilon # Replace zero values with epsilon
        audio_log = 10.0 * np.log10(audio_filtered)
        dct_filters = self.dct()
        cepstral_coefficents = np.dot(dct_filters, audio_log)
        return np.array([cepstral_coefficents])

In [7]:
class GenreDatasetMFCC(Dataset):

    def __init__(self, train_path, n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=40, training = True):
        cur_path = pathlib.Path(train_path)
        self.files = []
        for i in list(cur_path.rglob("*.wav")):
            for j in range(num_segments):
                self.files.append([j, i])
        self.samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.num_segments = num_segments
        self.mfcc_extractor = MusicFeatureExtractor(
            FFT_size=n_fft, HOP_SIZE=hop_length, mel_filter_num = mel_filter_num, dct_filter_num = dct_filter_num)
        self.dct_filter_num = dct_filter_num
        self.training = training

    def apply_augmentations(self, signal):
        # Apply augmentations to the audio signal
        if random.random() < 0.5:
            signal = librosa.effects.pitch_shift(signal, sr=SAMPLE_RATE, n_steps=random.uniform(-2, 2))
        if random.random() < 0.5:
            signal = librosa.effects.time_stretch(signal, rate=random.uniform(0.8, 1.2))
        return signal

    def adjust_shape(self, sequence, max_sequence_length = 126):
        current_length = sequence.shape[2]
        if current_length < max_sequence_length:
            padding = np.zeros((1, 13, max_sequence_length - current_length))
            padded_sequence = np.concatenate((sequence, padding), axis=2)
        else:
            padded_sequence = sequence[:, :, :max_sequence_length]
        return padded_sequence
        
    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        cur_file = self.files[idx]
        d = cur_file[0]
        file_path = cur_file[1]
        target = genre_mappings[str(file_path).split("/")[3]]
        signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
        start = self.samples_per_segment * d
        finish = start + self.samples_per_segment
        cur_signal = signal[start:finish]
        if self.training: cur_signal = self.apply_augmentations(cur_signal)
        cur_mfcc = self.mfcc_extractor.get_mfcc_features(cur_signal, sample_rate)  # Use the MusicFeatureExtractor to get MFCC features
        cur_mfcc = self.adjust_shape(cur_mfcc)
        return torch.tensor(cur_mfcc, dtype=torch.float32), target


class GenreDatasetPhaseMFCC(GenreDatasetMFCC):

    def __init__(self, train_path, n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=40, training = True):
        super().__init__(train_path, n_fft, hop_length, num_segments, mel_filter_num, dct_filter_num, training)
        self.mfcc_extractor = MusicFeatureExtractorComplex(
            FFT_size=n_fft, HOP_SIZE=hop_length, mel_filter_num = mel_filter_num, dct_filter_num = dct_filter_num)
        
    def __getitem__(self, idx):
        cur_file = self.files[idx]
        d = cur_file[0]
        file_path = cur_file[1]
        target = genre_mappings[str(file_path).split("/")[3]]
        signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
        start = self.samples_per_segment * d
        finish = start + self.samples_per_segment
        cur_signal = signal[start:finish]
        if self.training: cur_signal = self.apply_augmentations(cur_signal)
        cur_mfcc = self.mfcc_extractor.get_mfcc_features(cur_signal, sample_rate)  # Use the MusicFeatureExtractor to get MFCC features
        cur_mfcc = self.adjust_shape(cur_mfcc)
        return torch.tensor(cur_mfcc, dtype=torch.complex64), target

#### 1. No phase data

In [21]:
train_dataset = GenreDatasetPhaseMFCC("Data/binary_data/train/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13)
test_dataset = GenreDatasetPhaseMFCC("Data/binary_data/test/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, shuffle=False, batch_size=BATCH_SIZE, drop_last=False)

In [27]:
class ComplexNet(nn.Module):
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.conv1 = ComplexConv2d(1, 10, kernel_size=2, stride=1)
        self.bn = ComplexBatchNorm2d(10)
        self.conv2 = ComplexConv2d(10, 20, kernel_size=2, stride=1)
        self.gnn_layer = GCNConv(in_channels=40, out_channels=20)  # GNN layer

        self.fc1 = ComplexLinear(20, 128)
        self.fc2 = ComplexLinear(128, 2)  # Binary classification output
        
    def forward(self, x):  # Pass edge_index for GNN
        x = self.conv1(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, 2, 2)
        x = self.bn(x)
        print(x.shape)
        x = self.conv2(x)
        print(x.shape)
        x = complex_relu(x)
        x = complex_max_pool2d(x, 2, 2)
        print(x.shape)
        # Apply GNN layer to capture phase relationships
        edge_index = torch.tensor([
            [i, j] for i in range(126) for j in range(126) if i != j
        ], dtype=torch.long).t().contiguous()
        edge_index = edge_index.view(2, -1).t().contiguous()
        edge_index = edge_index.repeat(x.size(0), 1)
        x = self.gnn_layer(x, edge_index)

        # Flatten and pass through MLP
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.abs()
        x =  F.log_softmax(x, dim=1)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ComplexNet().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e1 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e1)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e1.items():
    print(f'{key}: {value}')

torch.Size([32, 10, 6, 62])
torch.Size([32, 20, 5, 61])
torch.Size([32, 20, 2, 30])


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 504000 but got size 2 for tensor number 1 in the list.

In [None]:
class ComplexNet(nn.Module):
    
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.conv1 = ComplexConv2d(1, 10, 2, 1)
        self.bn  = ComplexBatchNorm2d(10)
        self.conv2 = ComplexConv2d(10, 20, 2, 1)
        self.fc1 = ComplexLinear(30*2*20, 500)
        self.fc2 = ComplexLinear(500, 3)
             
    def forward(self,x):
        x = self.conv1(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, 2, 2)
        x = self.bn(x)
        x = self.conv2(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, 2, 2)
        x = x.view(-1,30*2*20)
        x = self.fc1(x)
        x = complex_relu(x)
        x = self.fc2(x)
        x = x.abs()
        x =  F.log_softmax(x, dim=1)
        return x
        
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ComplexNet().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e2 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e2)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e2.items():
    print(f'{key}: {value}')

In [None]:
train_dataset = GenreDatasetPhaseMFCC("Data/binary_data/train/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13)
test_dataset = GenreDatasetPhaseMFCC("Data/binary_data/test/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, shuffle=False, batch_size=BATCH_SIZE, drop_last=False)

class ComplexNet(nn.Module):
    
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.conv1 = ComplexConv2d(1, 10, 2, 1)
        self.bn  = ComplexBatchNorm2d(10)
        self.conv2 = ComplexConv2d(10, 20, 2, 1)
        self.fc1 = ComplexLinear(30*2*20, 500)
        self.fc2 = ComplexLinear(500, 3)
             
    def forward(self,x):
        x = self.conv1(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, 2, 2)
        x = self.bn(x)
        x = self.conv2(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, 2, 2)
        x = x.view(-1,30*2*20)
        x = self.fc1(x)
        x = complex_relu(x)
        x = self.fc2(x)
        x = x.abs()
        x =  F.log_softmax(x, dim=1)
        return x
        
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ComplexNet().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e3 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e3)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e3.items():
    print(f'{key}: {value}')

## Plots

In [None]:
# Data for the four scenarios
data = {
    "Magnitude Only (Real Net)": metrics_dict_e1,
    "Magnitude Only (Complex Net)": metrics_dict_e2,
    "Magnitude and Phase (Complex Net)": metrics_dict_e3
}

# Data for plotting
epochs = range(1, 21)
colors = ['b', 'g', 'r', 'm', 'y']
scenarios = list(data.keys())

fig, axes = plt.subplots(2, 1, figsize=(10, 10))

for i, scenario in enumerate(scenarios):
    axes[0].plot(epochs, data[scenario]["train_accuracies"], label=scenario, color=colors[i])

axes[0].set_title("Train Accuracy")
axes[0].set_xlabel("Epochs")
axes[0].set_ylabel("Train Accuracy")
axes[0].legend()

for i, scenario in enumerate(scenarios):
    axes[1].plot(epochs, data[scenario]["test_accuracies"], label=scenario, color=colors[i])

axes[1].set_title("Test Accuracy")
axes[1].set_xlabel("Epochs")
axes[1].set_ylabel("Test Accuracy")
axes[1].legend()

plt.tight_layout()
plt.show()

fig, axes = plt.subplots(2, 1, figsize=(10, 10))

for i, scenario in enumerate(scenarios):
    axes[0].plot(epochs, data[scenario]["train_losses"], label=scenario, color=colors[i])

axes[0].set_title("Train Loss")
axes[0].set_xlabel("Epochs")
axes[0].set_ylabel("Train Loss")
axes[0].legend()

for i, scenario in enumerate(scenarios):
    axes[1].plot(epochs, data[scenario]["test_losses"], label=scenario, color=colors[i])

axes[1].set_title("Test Loss")
axes[1].set_xlabel("Epochs")
axes[1].set_ylabel("Test Loss")
axes[1].legend()

plt.tight_layout()
plt.show()

fig, axes = plt.subplots(1, 1, figsize=(10, 5))
for i, scenario in enumerate(scenarios):
    axes.plot(epochs, data[scenario]["epoch_times"], label=scenario, color=colors[i])
axes.set_title("Time")
axes.set_xlabel("Epochs")
axes.set_ylabel("Time (secs)")
axes.legend()


# New tests

In [25]:
train_dataset = GenreDatasetMFCC("Data/train/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13, training = True)
test_dataset = GenreDatasetMFCC("Data/test/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13, training = False)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, shuffle=False, batch_size=BATCH_SIZE, drop_last=False)

In [26]:
class MusicGenreCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MusicGenreCNN, self).__init__()
        # Convolutional layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn1 = nn.BatchNorm2d(32)
        # Convolutional layer 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn2 = nn.BatchNorm2d(32)
        # Convolutional layer 3
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn3 = nn.BatchNorm2d(32)
        # Fully connected layers
        self.fc1 = nn.Linear(32*2*16, 128)  # Calculate the input size based on the output of the last convolutional layer
        self.dropout1 = nn.Dropout2d(0.5)
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        # Convolutional layers
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.bn1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.bn2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = self.bn3(x)
        # Flatten the output from convolutional layers
        x = x.view(-1, 32*2*16)
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        # x = x.abs()
        x = F.log_softmax(x, dim=1)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MusicGenreCNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e1 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e1,
          complexify = False)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e1.items():
    print(f'{key}: {value}')




Epoch 0 - Time: 245.61s - Train Loss: 1.956277 - Train Accuracy: 28.39%
Test Loss: 1.593823 - Test Accuracy: 39.88%

Epoch 1 - Time: 347.25s - Train Loss: 1.686377 - Train Accuracy: 39.64%
Test Loss: 1.874927 - Test Accuracy: 33.62%

Epoch 2 - Time: 408.28s - Train Loss: 1.613709 - Train Accuracy: 42.07%
Test Loss: 1.645144 - Test Accuracy: 39.69%

Epoch 3 - Time: 406.80s - Train Loss: 1.563499 - Train Accuracy: 44.14%
Test Loss: 1.599146 - Test Accuracy: 41.44%

Epoch 4 - Time: 417.97s - Train Loss: 1.509779 - Train Accuracy: 46.25%
Test Loss: 1.517148 - Test Accuracy: 45.69%

Epoch 5 - Time: 438.09s - Train Loss: 1.471889 - Train Accuracy: 47.81%
Test Loss: 1.539526 - Test Accuracy: 43.94%

Epoch 6 - Time: 430.21s - Train Loss: 1.425877 - Train Accuracy: 49.09%
Test Loss: 1.556612 - Test Accuracy: 44.31%

Epoch 7 - Time: 426.52s - Train Loss: 1.393590 - Train Accuracy: 51.03%
Test Loss: 1.367264 - Test Accuracy: 50.31%

Epoch 8 - Time: 415.41s - Train Loss: 1.359390 - Train Accuracy:

In [316]:
class MusicGenreCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MusicGenreCNN, self).__init__()
        # Convolutional layer 1
        self.conv1 = ComplexConv2d(in_channels=1, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool1 = ComplexMaxPool2d(kernel_size=3, stride=2)
        self.bn1 = ComplexBatchNorm2d(32)
        # Convolutional layer 2
        self.conv2 = ComplexConv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool2 = ComplexMaxPool2d(kernel_size=2, stride=2)
        self.bn2 = ComplexBatchNorm2d(32)
        # Fully connected layers
        self.fc1 = ComplexLinear(32*3*32, 128)  # Calculate the input size based on the output of the last convolutional layer
        self.dropout1 = ComplexDropout2d(0.5)
        self.fc2 = ComplexLinear(128, 64)
        self.dropout2 = ComplexDropout2d(0.5)
        self.fc3 = ComplexLinear(64, num_classes)

    def forward(self, x):
        # Convolutional layers
        x = complex_relu(self.conv1(x))
        x = self.pool1(x)
        x = self.bn1(x)
        x = complex_relu(self.conv2(x))
        x = self.pool2(x)
        x = self.bn2(x)
        # Flatten the output from convolutional layers
        x = x.view(-1, 32*3*32)
        # Fully connected layers
        x = complex_relu(self.fc1(x))
        x = self.dropout1(x)
        x = complex_relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        x = x.abs()
        x = F.log_softmax(x, dim=1)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MusicGenreCNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e2 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e2)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e1.items():
    print(f'{key}: {value}')

Epoch 0 - Time: 32.63s - Train Loss: nan - Train Accuracy: 9.99%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 1 - Time: 33.74s - Train Loss: nan - Train Accuracy: 9.90%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 2 - Time: 33.57s - Train Loss: nan - Train Accuracy: 9.90%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 3 - Time: 36.11s - Train Loss: nan - Train Accuracy: 9.88%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 4 - Time: 34.97s - Train Loss: nan - Train Accuracy: 9.90%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 5 - Time: 33.74s - Train Loss: nan - Train Accuracy: 9.89%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 6 - Time: 33.84s - Train Loss: nan - Train Accuracy: 9.90%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 7 - Time: 33.32s - Train Loss: nan - Train Accuracy: 9.90%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 8 - Time: 33.37s - Train Loss: nan - Train Accuracy: 9.89%
Test Loss: nan - Test Accuracy: 10.57%

Epoch 9 - Time: 33.54s - Train Loss: nan - Train Accura

KeyboardInterrupt: 

In [313]:
class MusicGenreCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MusicGenreCNN, self).__init__()
        # Convolutional layer 1
        self.conv1 = ComplexConv2d(in_channels=1, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool1 = ComplexMaxPool2d(kernel_size=3, stride=2)
        self.bn1 = ComplexBatchNorm2d(32)
        # Convolutional layer 2
        self.conv2 = ComplexConv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool2 = ComplexMaxPool2d(kernel_size=3, stride=2)
        self.bn2 = ComplexBatchNorm2d(32)
        # Convolutional layer 3
        self.conv3 = ComplexConv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool3 = ComplexMaxPool2d(kernel_size=2, stride=2)
        self.bn3 = ComplexBatchNorm2d(32)
        # Fully connected layers
        self.fc1 = ComplexLinear(32*2*16, 128)  # Calculate the input size based on the output of the last convolutional layer
        self.dropout1 = ComplexDropout2d(0.5)
        self.fc2 = ComplexLinear(128, 64)
        self.dropout2 = ComplexDropout2d(0.5)
        self.fc3 = ComplexLinear(64, num_classes)

    def forward(self, x):
        # Convolutional layers
        x = complex_relu(self.conv1(x))
        x = self.pool1(x)
        x = self.bn1(x)
        x = complex_relu(self.conv2(x))
        x = self.pool2(x)
        x = self.bn2(x)
        x = complex_relu(self.conv3(x))
        x = self.pool3(x)
        x = self.bn3(x)
        # Flatten the output from convolutional layers
        x = x.view(-1, 32*2*16)
        # Fully connected layers
        x = complex_relu(self.fc1(x))
        x = self.dropout1(x)
        x = complex_relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        x = x.abs()
        x = F.log_softmax(x, dim=1)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MusicGenreCNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e2 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e2)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e1.items():
    print(f'{key}: {value}')

Epoch 0 - Time: 41.36s - Train Loss: 2.475049 - Train Accuracy: 10.93%
Test Loss: 2.305991 - Test Accuracy: 8.44%

Epoch 1 - Time: 41.57s - Train Loss: 2.308495 - Train Accuracy: 9.81%
Test Loss: 2.306710 - Test Accuracy: 10.19%

Epoch 2 - Time: 41.58s - Train Loss: 2.306683 - Train Accuracy: 9.56%
Test Loss: 2.305398 - Test Accuracy: 9.82%

Epoch 3 - Time: 41.79s - Train Loss: 2.305664 - Train Accuracy: 9.43%
Test Loss: 2.303775 - Test Accuracy: 8.44%

Epoch 4 - Time: 42.26s - Train Loss: 2.305558 - Train Accuracy: 9.26%
Test Loss: 2.304778 - Test Accuracy: 10.38%

Epoch 5 - Time: 42.93s - Train Loss: 2.305010 - Train Accuracy: 9.64%
Test Loss: 2.303073 - Test Accuracy: 10.32%

Epoch 6 - Time: 42.79s - Train Loss: 2.305583 - Train Accuracy: 9.63%
Test Loss: 2.303879 - Test Accuracy: 10.57%

Epoch 7 - Time: 43.04s - Train Loss: 2.304335 - Train Accuracy: 9.84%
Test Loss: 2.307010 - Test Accuracy: 9.51%

Epoch 8 - Time: 43.02s - Train Loss: 2.305450 - Train Accuracy: 9.47%
Test Loss: 2.

KeyboardInterrupt: 

#### 2. Phase data

In [320]:
dataset = GenreDatasetPhaseMFCC("Data/genres_original/", n_fft=2048, hop_length=512, num_segments=10, mel_filter_num=13, dct_filter_num=13)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.16, random_state=42)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, shuffle=False, batch_size=BATCH_SIZE, drop_last=False)

In [321]:
class MusicGenreCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MusicGenreCNN, self).__init__()
        # Convolutional layer 1
        self.conv1 = ComplexConv2d(in_channels=1, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool1 = ComplexMaxPool2d(kernel_size=3, stride=2)
        self.bn1 = ComplexBatchNorm2d(32)
        # Convolutional layer 2
        self.conv2 = ComplexConv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool2 = ComplexMaxPool2d(kernel_size=3, stride=2)
        self.bn2 = ComplexBatchNorm2d(32)
        # Convolutional layer 3
        self.conv3 = ComplexConv2d(in_channels=32, out_channels=32, kernel_size=(2, 2), stride=1, padding=1)
        self.pool3 = ComplexMaxPool2d(kernel_size=2, stride=2)
        self.bn3 = ComplexBatchNorm2d(32)
        # Fully connected layers
        self.fc1 = ComplexLinear(32*2*16, 128)  # Calculate the input size based on the output of the last convolutional layer
        self.dropout1 = ComplexDropout2d(0.5)
        self.fc2 = ComplexLinear(128, 64)
        self.dropout2 = ComplexDropout2d(0.5)
        self.fc3 = ComplexLinear(64, num_classes)

    def forward(self, x):
        # Convolutional layers
        x = complex_relu(self.conv1(x))
        x = self.pool1(x)
        x = self.bn1(x)
        x = complex_relu(self.conv2(x))
        x = self.pool2(x)
        x = self.bn2(x)
        x = complex_relu(self.conv3(x))
        x = self.pool3(x)
        x = self.bn3(x)
        # Flatten the output from convolutional layers
        x = x.view(-1, 32*2*16)
        # Fully connected layers
        x = complex_relu(self.fc1(x))
        x = self.dropout1(x)
        x = complex_relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        x = x.abs()
        x = F.log_softmax(x, dim=1)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MusicGenreCNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

metrics_dict_e2 = {
    'epoch_times': [],
    'train_losses': [],
    'train_accuracies': [],
    'test_losses': [],
    'test_accuracies': []
}

for epoch in range(NUM_EPOCHS):
    train(model, 
          device, 
          train_loader, 
          test_loader, 
          optimizer, 
          epoch, 
          metrics_dict_e2)

print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e2.items():
    print(f'{key}: {value}')

Epoch 0 - Time: 41.16s - Train Loss: 2.421256 - Train Accuracy: 10.56%
Test Loss: 2.305844 - Test Accuracy: 9.51%

Epoch 1 - Time: 41.78s - Train Loss: 2.306584 - Train Accuracy: 9.70%
Test Loss: 2.302605 - Test Accuracy: 10.82%

Epoch 2 - Time: 42.32s - Train Loss: 2.305501 - Train Accuracy: 9.43%
Test Loss: 2.303694 - Test Accuracy: 10.38%

Epoch 3 - Time: 42.90s - Train Loss: 2.306063 - Train Accuracy: 10.02%
Test Loss: 2.305065 - Test Accuracy: 8.44%

Epoch 4 - Time: 42.89s - Train Loss: 2.309301 - Train Accuracy: 10.06%
Test Loss: 2.302534 - Test Accuracy: 10.82%

Epoch 5 - Time: 43.12s - Train Loss: 2.303421 - Train Accuracy: 10.34%
Test Loss: 2.299455 - Test Accuracy: 10.51%

Epoch 6 - Time: 43.55s - Train Loss: 2.305809 - Train Accuracy: 9.64%
Test Loss: 2.304744 - Test Accuracy: 9.51%

Epoch 7 - Time: 43.75s - Train Loss: 2.305246 - Train Accuracy: 10.71%
Test Loss: 2.305571 - Test Accuracy: 8.44%

Epoch 8 - Time: 43.48s - Train Loss: 2.302967 - Train Accuracy: 10.70%
Test Los

In [322]:
print("-"*100)
print("-"*100)
print("FINAL RESULTS:")
print("-"*100)
for key, value in metrics_dict_e2.items():
    print(f'{key}: {value}')

----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
FINAL RESULTS:
----------------------------------------------------------------------------------------------------
epoch_times: [41.16187524795532, 41.7832088470459, 42.31985092163086, 42.900376081466675, 42.894107818603516, 43.12066102027893, 43.55217790603638, 43.7485990524292, 43.4820830821991, 43.424493074417114, 43.10460901260376, 42.867106676101685, 42.81493282318115, 42.962246894836426, 43.437156677246094, 43.67679286003113, 43.23204469680786, 43.20026421546936, 43.1420738697052, 43.18421506881714, 43.88410019874573, 42.85453677177429, 43.61320924758911, 43.36027717590332, 43.5458459854126, 42.96121597290039, 43.42939496040344, 43.28667902946472, 43.05354690551758, 43.22593092918396, 42.77023720741272, 42.87620186805725, 43.50260806083679, 43.79348421096802, 43.71834707260132, 43

### Specs:

In [None]:
class GenreDataset(Dataset):

    def __init__(self, train_path, n_fft=2048, hop_length=512, num_segments=10):
        cur_path = pathlib.Path(train_path)
        self.files = []
        for i in list(cur_path.rglob("*.wav")):
            for j in range(num_segments):
                self.files.append([j, i])
        self.samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.num_segments = num_segments

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        cur_file = self.files[idx]
        d = cur_file[0]
        file_path = cur_file[1]
        target = genre_mappings[str(file_path).split("/")[2]]
        signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
        start = self.samples_per_segment * d
        finish = start + self.samples_per_segment
        cur_signal = signal[start:finish]
        cur_spec = librosa.stft(cur_signal, n_fft = self.n_fft, hop_length = self.hop_length)
        return torch.tensor(np.array([np.abs(cur_spec)]), dtype = torch.complex64), target

class GenreDatasetPhase(GenreDataset):

    def __init__(self, train_path, n_fft=2048, hop_length=512, num_segments=10):
        super().__init__(train_path, n_fft, hop_length, num_segments)

    def __getitem__(self, idx):
        cur_file = self.files[idx]
        d = cur_file[0]
        file_path = cur_file[1]
        target = genre_mappings[str(file_path).split("/")[2]]
        signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
        start = self.samples_per_segment * d
        finish = start + self.samples_per_segment
        cur_signal = signal[start:finish]
        cur_spec = librosa.stft(cur_signal, n_fft = self.n_fft, hop_length = self.hop_length)
        return torch.tensor(np.array([cur_spec]), dtype = torch.complex64), target

train_set = GenreDataset("Data/genres_original/")
train_loader = torch.utils.data.DataLoader(dataset=train_set, shuffle=True, batch_size = BATCH_SIZE, drop_last=True)

In [83]:
class MusicGenreCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MusicGenreCNN, self).__init__()
        # Convolutional layers
        self.conv1 = ComplexConv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.bn1 = ComplexBatchNorm2d(16)
        self.conv2 = ComplexConv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Max pooling layer
        self.pool = ComplexMaxPool2d(kernel_size=2, stride=2)
        # Fully connected layers
        self.fc1 = ComplexLinear(65*512*32, 256)  # Calculate the input size based on the output of the last convolutional layer
        self.fc2 = ComplexLinear(256, 10)

    def forward(self, x):
        # Convolutional layers
        x = complex_relu(self.conv1(x))
        print(x.shape)
        x = self.bn1(x)
        print(x.shape)
        x = complex_relu(self.conv2(x))
        print(x.shape)
        x = self.pool(x)
        print(x.shape)
        # Flatten the output from convolutional layers
        x = x.view(-1, 65*512*32)
        print(x.shape)
        # Fully connected layers
        x = complex_relu(self.fc1(x))
        x = self.fc2(x)
        x = x.abs()
        x = F.log_softmax(x, dim=1)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MusicGenreCNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [84]:
epoch_loss = {}
for epoch in range(10):
    cur_loss = train(model, device, train_loader, optimizer, epoch)
    epoch_loss[epoch] = cur_loss

torch.Size([32, 1, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 32, 1025, 130])
torch.Size([32, 32, 512, 65])
torch.Size([32, 1064960])
torch.Size([32, 10])
torch.Size([32, 1, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 32, 1025, 130])
torch.Size([32, 32, 512, 65])
torch.Size([32, 1064960])
torch.Size([32, 10])
torch.Size([32, 1, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 32, 1025, 130])
torch.Size([32, 32, 512, 65])
torch.Size([32, 1064960])
torch.Size([32, 10])
torch.Size([32, 1, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 32, 1025, 130])
torch.Size([32, 32, 512, 65])
torch.Size([32, 1064960])
torch.Size([32, 10])
torch.Size([32, 1, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 16, 1025, 130])
torch.Size([32, 32, 1025, 130])
torch.Size([32, 32, 512, 65])
torch.Size([32, 1064960])
t

RuntimeError: stack expects each tensor to be equal size, but got [1, 1025, 130] at entry 0 and [1, 1025, 129] at entry 19

In [63]:
class RealNet(nn.Module):
    def __init__(self):
        super(RealNet, self).__init__()
        self.conv1 = ComplexConv2d(1, 32, (3, 3))
        self.bn1 = ComplexBatchNorm2d(32)
        self.conv2 = ComplexConv2d(32, 64, (3, 3))
        self.bn2 = ComplexBatchNorm2d(64)
        self.conv3 = ComplexConv2d(64, 32, (2, 2))
        self.bn3 = ComplexBatchNorm2d(32)
        self.fc1 = ComplexLinear(32*127*15, 64)
        self.dropout = ComplexDropout2d(0.3)
        self.fc2 = ComplexLinear(64, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, (3, 3), stride=(2, 2), padding=(1, 1))
        x = self.bn1(x)
        x = self.conv2(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, (3, 3), stride=(2, 2), padding=(1, 1))
        x = self.bn2(x)
        x = self.conv3(x)
        x = complex_relu(x)
        x = complex_max_pool2d(x, (3, 3), stride=(2, 2), padding=(1, 1))
        x = self.bn3(x)
        x = x.view(32, 32*127*15)
        x = self.fc1(x)
        x = complex_relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = x.abs()
        x = F.log_softmax(x, dim=1)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RealNet().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [64]:
epoch_loss = {}
for epoch in range(10):
    cur_loss = train(model, device, train_loader, optimizer, epoch)
    epoch_loss[epoch] = cur_loss

torch.Size([32, 1, 1025, 130])


[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument


KeyboardInterrupt: 