In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torchaudio
import torch
import os
import numpy as np
import torch.nn.functional as F
import IPython.display as ipd

In [None]:
def hann_window(N):
    n = np.arange(N)
    return 0.5 * (1 - np.cos(2 * np.pi * n / (N - 1)))

def hamm_window(N):
    n = np.arange(N)
    return 0.54 - 0.46 * np.cos(2 * np.pi * n / (N - 1))

def rectangular_window(N):
    return np.ones(N)

In [None]:
def compute_stft(file_path, n_fft=1024, win_length=1024, device='cpu', window_type = None):
    hop_length = win_length // 2
    waveform, sample_rate = torchaudio.load(file_path)

    if waveform.shape[0] > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)

    waveform = waveform.to(device)
    window = torch.tensor(window_type(win_length), dtype=torch.float32, device=device)
    num_frames = (waveform.shape[1] - win_length) // hop_length + 1
    spectrogram = torch.zeros(n_fft // 2 + 1, num_frames, device=device)
    for i in range(num_frames):
        start = i * hop_length
        end = start + win_length
        frame = waveform[:, start:end]

        if frame.shape[1] < win_length:
            continue
        windowed_frame = frame * window
        spectrum = torch.fft.rfft(windowed_frame, n=n_fft)
        spectrogram[:, i] = spectrum.abs().squeeze()
    spectrogram = spectrogram.unsqueeze(0).unsqueeze(0)
    spectrogram = F.interpolate(spectrogram, size=(224, 224), mode='bilinear', align_corners=False)

    return  spectrogram.squeeze(0), sample_rate

In [None]:
def process_audio_files(audio_path, save_path, window_type):
    os.makedirs(save_path, exist_ok=True)
    folder_list = [folder for folder in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, folder))]
    for folder in folder_list:
        subfolder_path = os.path.join(audio_path, folder)
        for file in os.listdir(subfolder_path):
            file_path = os.path.join(subfolder_path, file)
            if os.path.isfile(file_path) and file.lower().endswith('.wav'):
                _, file_tensor, _ = compute_stft(file_path, window_type=window_type)
                tensor_filename = f"{os.path.splitext(file)[0]}.pt"
                tensor_save_path = os.path.join(save_path, tensor_filename)
                torch.save(file_tensor, tensor_save_path)
        print(f'{folder}', 'Done')

In [None]:
#Saving Hann tensors
audio_path = '/content/drive/MyDrive/SEM_4/SPEECH/UrbanSound8K/audio'
save_path = '/content/drive/MyDrive/SEM_4/SPEECH/UrbanSound8K/Hann__Tensor'
process_audio_files(audio_path, save_path, hann_window)

fold6 Done
fold5 Done
fold3 Done
fold4 Done
fold8 Done
fold9 Done
fold7 Done
fold1 Done
fold2 Done
fold10 Done


In [None]:
#Saving Hann tensors
audio_path = '/content/drive/MyDrive/SEM_4/SPEECH/UrbanSound8K/audio'
save_path = '/content/drive/MyDrive/SEM_4/SPEECH/UrbanSound8K/Hamm_Tensor'
process_audio_files(audio_path, save_path, hamm_window)

fold6 Done
fold5 Done
fold3 Done
fold4 Done
fold8 Done
fold9 Done
fold7 Done
fold1 Done
fold2 Done
fold10 Done


In [None]:
#Saving Hann tensors
audio_path = '/content/drive/MyDrive/SEM_4/SPEECH/UrbanSound8K/audio'
save_path = '/content/drive/MyDrive/SEM_4/SPEECH/UrbanSound8K/Rectangular_Tensor'
process_audio_files(audio_path, save_path, rectangular_window)

fold6 Done
fold5 Done
fold3 Done
fold4 Done
fold8 Done
fold9 Done
fold7 Done
fold1 Done
fold2 Done
fold10 Done
