In [1]:
import os
import pandas as pd
import torchaudio
import librosa
import numpy as np
import soundfile as sf
from pydub import AudioSegment

In [2]:
def convert_webm_to_wav(webm_path, wav_path):
    audio = AudioSegment.from_file(webm_path, format='webm')
    audio = audio.set_channels(1)  # Convert to mono
    audio.export(wav_path, format='wav')

In [3]:
train = pd.read_csv('/kaggle/input/udp-dl-trabajo-3-covid-19/train.csv')
test = pd.read_csv('/kaggle/input/udp-dl-trabajo-3-covid-19/test.csv')
#samples = train.drop(columns=["covid"])

In [4]:
max_duration = 0
audio_durations = {}

In [5]:
def process_audio_files(csv_file):
    data = pd.read_csv(csv_file)
    #samples = data.drop(columns=["covid"])
    samples = data
    max_duration = 0
    audio_durations = {}
    
    for index, row in samples.iterrows():
        print(f"INDEX {index} - Processing UUID: {row['uuid']}")
        webm_path = f'/kaggle/input/udp-dl-trabajo-3-covid-19/audio/{row["uuid"]}.webm'
        wav_path = f'/kaggle/working/{row["uuid"]}.wav'
        print(webm_path, wav_path)
        if os.path.exists(webm_path):
            convert_webm_to_wav(webm_path, wav_path)
        elif os.path.exists(webm_path.replace('.webm', '.wav')):
            wav_path = webm_path.replace('.webm', '.wav')
        else:
            print(f"Audio file for UUID {row['uuid']} not found.")
            continue

        duration = librosa.get_duration(path=wav_path)
        audio_durations[row['uuid']] = (wav_path, duration)
        if duration > max_duration:
            max_duration = duration
        print(f"Duration of {row['uuid']}: {duration:.2f} seconds")
    
    return audio_durations, max_duration

In [6]:
#no puedo ejecutar los audios de train y test al mismo tiempo ya q la memoria muere asi que por eso estan comentados los de train pq ejecuté por ultima vez los de test
#train_audio_durations, train_max_duration = process_audio_files('/kaggle/input/udp-dl-trabajo-3-covid-19/train.csv')¿
#test_audio_durations, test_max_duration = process_audio_files('/kaggle/input/udp-dl-trabajo-3-covid-19/test.csv')

In [7]:
#aca se busca el audio mas grande entre test y train para agregarle padding dps a los mas cortos 
#overall_max_duration = max(19.74, test_max_duration)
#overall_max_duration = max(train_max_duration, 0)
#overall_max_duration = max(train_max_duration, test_max_duration)
#print(f"Overall maximum duration: {overall_max_duration:.2f} seconds")

In [8]:
def pad_audio_files(audio_durations, max_duration):
    for uuid, (wav_path, duration) in audio_durations.items():
        if duration < max_duration:
            waveform, sample_rate = torchaudio.load(wav_path)
            padding = int((max_duration - duration) * sample_rate)
            padded_waveform = np.pad(waveform.numpy(), ((0, 0), (0, padding)), mode='constant')
            sf.write(wav_path, padded_waveform.T, sample_rate)
            print(f"Padded {uuid} to {max_duration:.2f} seconds")
        else:
            print(f"No padding needed for {uuid}")

In [9]:
#pad_audio_files(train_audio_durations, overall_max_duration)
#pad_audio_files(test_audio_durations, overall_max_duration)

#print("All audio files have been padded to the maximum duration.")

In [10]:
!pip install torchaudio matplotlib



In [11]:
import torchaudio
import torch
import matplotlib.pyplot as plt

In [12]:
def audio_to_mel_spectrogram(audio_path):
    waveform, sample_rate = torchaudio.load(audio_path)
    
    # Crear el transformador de mel-espectrograma
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
        sample_rate=sample_rate,
        n_mels=64
    )(waveform)
    
    # Convertir a escala logarítmica
    mel_spectrogram = torchaudio.transforms.AmplitudeToDB()(mel_spectrogram)
    
    return mel_spectrogram

In [13]:
def process_and_save_mel_spectrograms(audio_durations, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for uuid, (wav_path, _) in audio_durations.items():
        try:
            mel_spectrogram = audio_to_mel_spectrogram(wav_path)
            output_path = os.path.join(output_dir, f"{uuid}.pt")
            torch.save(mel_spectrogram, output_path)
            print(f"Saved mel-spectrogram for {uuid} at {output_path}")
            
            # Eliminar el archivo de audio después de procesarlo
            os.remove(wav_path)
            print(f"Deleted audio file for {uuid} at {wav_path}")
        except Exception as e:
            print(f"Error processing {uuid}: {e}")

In [14]:
#output_dir_train = '/kaggle/working/mel_spectrograms/train'
#output_dir_test = '/kaggle/working/mel_spectrograms/test'

In [15]:
#process_and_save_mel_spectrograms(train_audio_durations, output_dir_train)
#process_and_save_mel_spectrograms(test_audio_durations, output_dir_test)
print("All mel-spectrograms have been saved.")

All mel-spectrograms have been saved.


In [16]:
import random
def load_random_mel_spectrograms(spectrogram_dir, num_samples=3):
    all_files = [f for f in os.listdir(spectrogram_dir) if f.endswith('.pt')]
    
    random_files = random.sample(all_files, num_samples)
    
    mel_spectrograms = [torch.load(os.path.join(spectrogram_dir, f)) for f in random_files]
    
    return mel_spectrograms, random_files

In [17]:
#spectrogram_dir = '/kaggle/working/mel_spectrograms/augmented_train'

In [18]:
#mel_spectrograms, random_files = load_random_mel_spectrograms(spectrogram_dir, num_samples=3)

In [19]:
def plot_mel_spectrograms(mel_spectrograms, filenames):
    for i, (mel_spectrogram, filename) in enumerate(zip(mel_spectrograms, filenames)):
        plt.figure(figsize=(10, 4))
        plt.imshow(mel_spectrogram.squeeze().numpy(), cmap='viridis', origin='lower', aspect='auto')
        plt.colorbar(format='%+2.0f dB')
        plt.title(f'Mel-Spectrogram (dB) - {filename}')
        plt.xlabel('Time')
        plt.ylabel('Frequency')
        plt.show()

#plot_mel_spectrograms(mel_spectrograms, random_files)

In [20]:
import zipfile
import os

def zip_directory(directory_path, output_zip):
    with zipfile.ZipFile(output_zip, 'w') as zipf:
        for root, _, files in os.walk(directory_path):
            for file in files:
                zipf.write(os.path.join(root, file),
                           os.path.relpath(os.path.join(root, file),
                           os.path.join(directory_path, '..')))

#output_dir_train = '/kaggle/working/mel_spectrograms/train'
#output_dir_test = '/kaggle/working/mel_spectrograms/test'
#output_zip_train = '/kaggle/working/mel_spectrograms_train.zip'
#output_zip_test = '/kaggle/working/mel_spectrograms_test.zip'

#zip_directory(output_dir_train, output_zip_train)
#zip_directory(output_dir_test, output_zip_test)

#print(f"Train mel-spectrograms zipped to: {output_zip_train}")
#print(f"Test mel-spectrograms zipped to: {output_zip_test}")


## augmentation
solo de clase covid (1) pq hay 500 muestras vs >6000 de no covid. eso es malo pq el entrenamiento esta muuy sesgado a no covid, y aunque se aumente los no covid quedará tambien sesgado a ese tipo de tos de covid

In [21]:
# Filtrar los datos con etiqueta COVID (1)
covid_data = train[train['covid'] == 1]

In [22]:
spectrogram_dir_train = '/kaggle/input/mel-train/train'

In [23]:
covid_files = [os.path.join(spectrogram_dir_train, f"{uuid}.pt") for uuid in covid_data['uuid'] if os.path.exists(os.path.join(spectrogram_dir_train, f"{uuid}.pt"))]

In [24]:
print(f"Found {len(covid_files)} COVID-19 mel-spectrograms for augmentation.")

Found 487 COVID-19 mel-spectrograms for augmentation.


In [25]:
class MelSpectrogramAugmentation:
    def __init__(self, max_noise=0.1, max_gain=0.1):
        self.max_noise = max_noise
        self.max_gain = max_gain

    def __call__(self, mel_spectrogram):
        if random.random() > 0.5:
            mel_spectrogram = mel_spectrogram + (torch.randn_like(mel_spectrogram) * self.max_noise)
        if random.random() > 0.5:
            gain = random.uniform(1.0 - self.max_gain, 1.0 + self.max_gain)
            mel_spectrogram = mel_spectrogram * gain
        return mel_spectrogram

In [26]:
augment_transform = MelSpectrogramAugmentation()

In [27]:
augmented_dir_train = '/kaggle/working/mel_spectrograms/augmented_train'
os.makedirs(augmented_dir_train, exist_ok=True)

In [28]:
for file_path in covid_files:
    mel_spectrogram = torch.load(file_path)
    for i in range(7):  #4 augmentations
        augmented_spectrogram = augment_transform(mel_spectrogram)
        augmented_uuid = f"{os.path.splitext(os.path.basename(file_path))[0]}_aug_{i}.pt"
        augmented_path = os.path.join(augmented_dir_train, augmented_uuid)
        torch.save(augmented_spectrogram, augmented_path)
        print(f"Saved augmented mel-spectrogram at {augmented_path}")

Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_0.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_1.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_2.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_3.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_4.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_5.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/3db14b07-2720-4fa9-870a-725fda24a0bd_aug_6.pt
Saved augmented mel-spectrogram at /kaggle/working/mel_spectrograms/augmented_train/d5e9a1

In [29]:
output_dir_train_aug = '/kaggle/working/mel_spectrograms/augmented_train'
output_zip_train_aug = '/kaggle/working/mel_spectrograms_augmented_train.zip'
zip_directory(output_dir_train_aug, output_zip_train_aug)