In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import matplotlib.pyplot as plt
import numpy as np
import time
from torch.optim.lr_scheduler import StepLR

from transformers import AutoFeatureExtractor, ASTForAudioClassification, AutoConfig
from datasets import load_dataset
import soundfile
import librosa
from transformers import ASTConfig, ASTModel
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")

In [3]:
def load_splits(loc, num_files):
    hold = []
    for o in range(0, num_files):
        hold.append(torch.load(f"{loc}{o}.pt"))
    return torch.vstack(hold)

In [4]:
x_data = load_splits("spectrograms_data/x_data", 1)
print(f"x_data: {x_data.shape}")

y_data = load_splits("spectrograms_data/y_data", 1)
print(f"y_data: {y_data.shape}")

spectrograms = load_splits("spectrograms_data/spectrograms", 13)
print(f"spectrograms: {spectrograms.shape}")

  hold.append(torch.load(f"{loc}{o}.pt"))


x_data: torch.Size([1000, 1000])
y_data: torch.Size([3000, 1000])
spectrograms: torch.Size([1495, 1017, 167])


In [5]:
class PairedDataset(Dataset):
    def __init__(self, x_data, y_data):
        assert len(x_data) == len(y_data), "Datasets must be of the same length"
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x = self.x_data[idx]
        y = self.y_data[idx]
        return x, y

In [6]:
sample_size = spectrograms.shape[0]
dataset = PairedDataset(spectrograms, y_data[0:sample_size])
training_data = DataLoader(dataset, batch_size=50)
x_data = x_data[0:sample_size][0]

In [6]:
def linear_function(params, x, device):
    x = x.to(device)
    if params.size(1) > 1:
        a = params[:, 0].type(torch.float).unsqueeze(1).to(device)
        b = params[:, 1].type(torch.float).unsqueeze(1).to(device)
        return a*x + b
    else:
        return torch.zeros_like(params)

def quadratic_function(params, x, device):
    params = params.to(device)
    x = x.to(device)
    y = torch.zeros((params.size(0),x.shape[0])).to(device)
    if params.size(1) > 2:
        for n in range(len(params[0])):
            y += (params[:, n].type(torch.float).unsqueeze(1).to(device))*x**(3-n)
        return y
    else:
        return torch.zeros_like(params)

def cubic_function(params, x, device):
    params = params.to(device)
    x = x.to(device)
    y = torch.zeros((params.size(0),x.shape[0])).to(device)
    if params.size(1) > 3:
        for n in range(len(params[0])):
            y += (params[:, n].type(torch.float).unsqueeze(1).to(device))*x**(3-n)
        return y
    else:
        return torch.zeros_like(params)
    
def sin_function(params, x, device):
    x = x.to(device)
    if params.size(1) == 3:
        amplitude = params[:, 0].type(torch.float).unsqueeze(1).to(device)
        frequency = params[:, 1].type(torch.float).unsqueeze(1).to(device)
        phase = params[:, 2].type(torch.float).unsqueeze(1).to(device)
        return amplitude * torch.sin(2 * torch.pi * frequency * x + phase)
    else:
        return torch.zeros_like(x)

In [4]:
def new_loss(output, target, x):
    derivative_true = torch.autograd.grad(outputs=target.requires_grad_(True), inputs=x, grad_outputs=torch.ones_like(target), allow_unused=True)[0]
    if derivative_true is None:
        derivative_true = torch.zeros_like(output)
    derivative_pred = (torch.roll(output, shifts=-1, dims=0) - torch.roll(output, shifts=1, dims=0)) / (2 * 1e-8)
    mse_function = torch.mean(torch.abs(target - output)**3)
    mse_derivative = torch.mean(torch.abs(derivative_pred - derivative_true)**3)
    return mse_function + mse_derivative

In [7]:
class CustomModel(ASTForAudioClassification):
    def __init__(self, config, functions, x_data, device_name):
        super().__init__(config)
        self.functions = functions
        self.x_data = x_data
        self.params = sum(self.functions[1])
        self.device_name = device_name
        
        del self.classifier.dense

        self.flatten_layer = nn.Flatten()

        self.hidden_embedding = nn.Sequential(
            nn.Linear(932352, 128),
            nn.SELU(),
            nn.Linear(128, 64),
            nn.SELU(),
            nn.Linear(64, self.params),
        )

    def forward(self, input_values, targets):
        print(f"inputs: {input_values.shape}")
        inputs = self.audio_spectrogram_transformer(input_values)
        input = self.classifier.layernorm(inputs[0])
        print(f"input: {input.shape}")
        input = self.flatten_layer(input)
        print(f"flattened: {input.shape}")
        embedding = self.hidden_embedding(input)
        print(f"embedding: {embedding.shape}")
        embedding = embedding.view(-1, 12)
        print(f"viewed: {embedding.shape}")
        
        loss_func = nn.MSELoss()
        start_index = 0
        losses = []
        outputs = []
        
        for f in range(len(self.functions[0])):
            #print(f"params: {embedding[:, start_index:start_index+self.functions[1][f]].shape}")
            #print(f"x data: {self.x_data.shape}")
            output = self.functions[0][f](
                embedding[:, start_index:start_index+self.functions[1][f]], 
                self.x_data, 
                device=self.device_name
            ).to(self.device_name)
            #print(f"output: {output.shape}")
            #print(f"targets: {targets.shape}")
            outputs.append(output)
            loss = loss_func(output, targets)
            losses.append(loss)
            start_index += self.functions[1][f]        
        best_index = torch.argmin(torch.tensor(losses))
        best_func = self.functions[0][best_index]
        best_loss, best_out = losses[best_index], outputs[best_index]

        return best_out, best_loss, best_func, outputs, losses

In [8]:
config = ASTConfig()
functions = [[linear_function, quadratic_function, cubic_function, sin_function], [2,3,4,3]]

In [10]:
customModel = CustomModel(config, functions=functions, x_data=x_data, device_name=device).to(device)

In [12]:
loss_func = nn.MSELoss()
optimizer = optim.Adam(customModel.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

In [13]:
epochs = 5
for epoch in range(epochs):
    start_time = time.time()
    train_loss = 0.0
    total_num = 0
    customModel.train()
    
    for train_batch,targets in training_data:
        train_batch = train_batch.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        best_out,_,_,_,_  = customModel(train_batch, targets)
        loss = loss_func(best_out,targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * best_out.shape[0]
        total_num += best_out.shape[0]
    scheduler.step()
    train_loss /= total_num
    print(f"epoch : {epoch}/{epochs}, loss = {train_loss:.8f}")
    print(f"--- {time.time() - start_time} seconds ---")


inputs: torch.Size([50, 1017, 167])


RuntimeError: The size of tensor a (1618) must match the size of tensor b (1214) at non-singleton dimension 1

In [43]:
y_data[sample_size:sample_size+100].shape

torch.Size([100, 1017, 126])

In [44]:
customModel.eval
t_y = y_data[sample_size:sample_size+100]
t_v = torch.swapaxes(y_val[:, sample_size:sample_size+100], 0, 1)
print(t_y.shape)
print(t_v.shape)
best_out, best_loss, best_func, outputs, losses = customModel(t_y, t_v)
print(best_out.shape)
'''rand = np.random.randint(0, 10)
plt.plot(best_out[rand].detach().cpu().numpy(), "r", label='Predicted')
plt.plot(t_v[rand].detach().cpu().numpy(), "b", label='True')
plt.legend();'''

torch.Size([100, 1017, 126])
torch.Size([100, 1000])


OutOfMemoryError: CUDA out of memory. Tried to allocate 356.00 MiB. GPU 4 has a total capacity of 47.53 GiB of which 128.00 MiB is free. Including non-PyTorch memory, this process has 47.39 GiB memory in use. Of the allocated memory 46.78 GiB is allocated by PyTorch, and 298.50 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [11]:
dummyData = torch.zeros((50, 1017, 126)).to(device)
print(dummyData.shape)
dummyTargets = torch.zeros((1017, 1000)).to(device)
print(dummyTargets.shape)
customModel(dummyData, dummyTargets).shape

torch.Size([50, 1017, 126])
torch.Size([1017, 1000])
inputs: torch.Size([50, 1017, 126])
input: torch.Size([50, 1214, 768])
flattened: torch.Size([50, 932352])
embedding: torch.Size([50, 12])
viewed: torch.Size([50, 12])


RuntimeError: The size of tensor a (50) must match the size of tensor b (1017) at non-singleton dimension 0

In [None]:
'''r = np.random.randint(sample_size)
sample = data[r, :, 0]
print(sample.shape)
# STFT parameters
n_fft = 256  # Number of FFT components
win_length = 256  # Window length
hop_length = 128  # Number of samples between frames

# Convert sample to complex tensor with the required dimensions
sample = sample.unsqueeze(0)  # Add batch dimension

# Apply STFT
spectrogram = torch.stft(sample, n_fft=n_fft, win_length=win_length, hop_length=hop_length, return_complex=True)
print(spectrogram.shape)
# Compute magnitude spectrogram
magnitude_spectrogram = torch.abs(spectrogram)
print(magnitude_spectrogram.shape)

# Convert to numpy for plotting
spectrogram_np = magnitude_spectrogram.squeeze().cpu().numpy()
print(spectrogram_np.size)
# Plot the spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(20 * np.log10(spectrogram_np + 1e-8), aspect='auto', origin='lower', cmap='inferno')
plt.colorbar(label='Magnitude (dB)')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.title('Spectrogram of Polynomial Function Output')
plt.show()

plt.plot(data[r, :, 0].detach().cpu().numpy(), "-");
'''